From aa579530fb4d3f43f2919839484f5107da8d14d5 Mon Sep 17 00:00:00 2001
From: TangQunzhang <tangqunzhang@huawei.com>
Date: Fri, 14 May 2021 09:31:05 +0800
Subject: [PATCH] Support session scope memory

---
 ge/CMakeLists.txt                             |   4 +
 ge/executor/CMakeLists.txt                    |   2 +
 ge/executor/ge_executor.cc                    |   2 +-
 ge/graph/build/memory/block_mem_assigner.cc   | 101 +++++---
 ge/graph/build/memory/block_mem_assigner.h    |  21 +-
 ge/graph/build/memory/graph_mem_assigner.cc   |  26 +-
 ge/graph/build/memory/graph_mem_assigner.h    |   4 +-
 ge/graph/build/memory/hybrid_mem_assigner.cc  |  10 +-
 ge/graph/build/memory/hybrid_mem_assigner.h   |   6 +-
 ge/graph/build/memory/memory_assigner.cc      |   2 +-
 ge/graph/build/model_builder.cc               |  17 +-
 ge/graph/build/model_builder.h                |   2 +-
 ge/graph/load/model_manager/davinci_model.cc  | 170 +++++++------
 ge/graph/load/model_manager/davinci_model.h   |   8 +-
 ge/graph/load/model_manager/model_utils.cc    |  19 +-
 .../load/model_manager/task_info/task_info.h  |  20 +-
 ge/graph/manager/graph_caching_allocator.cc   |   4 +-
 ge/graph/manager/graph_caching_allocator.h    |   2 +-
 ge/graph/manager/graph_mem_allocator.cc       | 116 +--------
 ge/graph/manager/graph_mem_allocator.h        | 108 +-------
 ge/graph/manager/graph_mem_manager.cc         | 114 +++++++++
 ge/graph/manager/graph_mem_manager.h          | 141 +++++++++++
 ge/graph/manager/graph_var_manager.cc         |  11 +-
 ge/graph/manager/memory_api.cc                |   2 +-
 ge/graph/manager/rdma_pool_allocator.cc       |   3 +-
 .../manager/session_scope_mem_allocator.cc    |  85 +++++++
 .../manager/session_scope_mem_allocator.h     | 123 +++++++++
 ge/hybrid/common/npu_memory_allocator.cc      |   5 +-
 ge/hybrid/model/hybrid_model_builder.cc       |   3 +-
 .../host_cpu/host_cpu_node_executor.cc        |   3 +-
 ge/init/gelib.cc                              |   2 +-
 ge/session/inner_session.cc                   |   6 +
 ge/single_op/single_op_manager.cc             |   3 +-
 inc/framework/memory/memory_assigner.h        |   2 +-
 tests/ut/ge/CMakeLists.txt                    |   6 +
 .../ge/graph/build/mem_assigner_unittest.cc   | 235 ++++++++++++++++--
 .../ge/graph/build/model_builder_unittest.cc  |  54 ++++
 tests/ut/ge/graph/ge_executor_unittest.cc     |   7 +-
 .../graph_caching_allocator_unittest.cc       |   3 +-
 .../session_scope_mem_allocator_unittest.cc   |  75 ++++++
 .../graph/passes/variable_op_pass_unittest.cc |   2 +-
 41 files changed, 1102 insertions(+), 427 deletions(-)
 create mode 100644 ge/graph/manager/graph_mem_manager.cc
 create mode 100644 ge/graph/manager/graph_mem_manager.h
 create mode 100644 ge/graph/manager/session_scope_mem_allocator.cc
 create mode 100644 ge/graph/manager/session_scope_mem_allocator.h
 create mode 100644 tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 6ff9f5d9..cc777f31 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -173,10 +173,12 @@ set(TRAIN_SRC_LIST
     "graph/manager/graph_manager_utils.cc"
     "graph/manager/graph_mem_allocator.cc"
     "graph/manager/graph_caching_allocator.cc"
+    "graph/manager/session_scope_mem_allocator.cc"
     "graph/manager/graph_var_manager.cc"
     "graph/manager/host_mem_manager.cc"
     "graph/manager/rdma_pool_allocator.cc"
     "graph/manager/host_mem_allocator.cc"
+    "graph/manager/graph_mem_manager.cc"
     "graph/manager/memory_api.cc"
     "graph/manager/model_manager/event_manager.cc"
     "graph/manager/trans_var_data_utils.cc"
@@ -478,6 +480,8 @@ set(INFER_SRC_LIST
     "graph/manager/host_mem_allocator.cc"
     "graph/manager/graph_mem_allocator.cc"
     "graph/manager/graph_caching_allocator.cc"
+    "graph/manager/session_scope_mem_allocator.cc"
+    "graph/manager/graph_mem_manager.cc"
     "model/ge_model.cc"
     "model/ge_root_model.cc"
     "graph/common/transop_util.cc"
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index 856e7cf1..820518ad 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -28,6 +28,8 @@ set(SRC_LIST
     "../graph/manager/graph_var_manager.cc"
     "../graph/manager/graph_mem_allocator.cc"
     "../graph/manager/graph_caching_allocator.cc"
+    "../graph/manager/session_scope_mem_allocator.cc"
+    "../graph/manager/graph_mem_manager.cc"
     "../graph/manager/trans_var_data_utils.cc"
     "../graph/manager/util/debug.cc"
     "../graph/manager/rdma_pool_allocator.cc"
diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc
index 6d3114f4..e66dcb58 100755
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -26,7 +26,7 @@
 #include "graph/execute/graph_execute.h"
 #include "graph/load/graph_loader.h"
 #include "graph/load/model_manager/model_manager.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "single_op/single_op_manager.h"
 #include "graph/load/model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc
index 2756c6fe..9b81eae3 100755
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -500,6 +500,7 @@ string MemoryBlock::String() {
   ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " ";
   ss << "real_size_list: " << ToString(real_size_list_) << " ";
   ss << "ref_count: " << ref_count_ << " ";
+  ss << "reuse_mem_: " << reuse_mem_ << " ";
   ss << "members: ";
   for (auto x : NodeTypeIndexList()) {
     ss << "__node: " << ToString(x) << " ";
@@ -513,8 +514,8 @@ string MemoryBlock::String() {
 
 BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<string, string> &anchor_to_symbol,
                                    const map<string, list<NodeIndexIO>> &symbol_to_anchors)
-    : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)),
-      symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {}
+    : compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors),
+      anchor_to_symbol_(anchor_to_symbol), life_time_(0) {}
 
 BlockMemAssigner::~BlockMemAssigner() {
   GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size());
@@ -1123,7 +1124,7 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) {
 MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size,
                                            OpMemoryType mem_type, const NodePtr &n, uint32_t out_index,
                                            const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem,
-                                           const bool continuous, int64_t memory_type) {
+                                           const bool continuous, uint64_t memory_type) {
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
       n == nullptr,
       REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed");
@@ -1824,8 +1825,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
         zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false);
         continue;
       }
-      int64_t memory_type = RT_MEMORY_HBM;
-      if (!GetWorkSpaceMemoryType(n, i, memory_type)) {
+      uint64_t memory_type = RT_MEMORY_HBM;
+      if (!GetWorkSpaceMemoryType(n, i, memory_type, workspace_reuse_flag)) {
         GELOGW("Get workspace memory type failed.");
         return;
       }
@@ -1860,7 +1861,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
 }
 
 void BlockMemAssigner::CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id,
-                                           MemoryBlock *mem_block, int64_t memory_type) {
+                                           MemoryBlock *mem_block, uint64_t memory_type) {
   bool reuse_mem_flag =
       ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true;
   if (reuse_mem_flag) {
@@ -1992,24 +1993,29 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) {
   }
 }
 
-void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) {
-  if (block.memory_type_ == RT_MEMORY_HBM) {
-    if (block.first_continuous_block_) {
-      mem_offset += MEM_ALIGN_SIZE;
-    }
-    block.Resize();
-    block.SetHeadOffset(mem_offset);
-    mem_offset += block.Size();
-    block.SetTailOffset(mem_offset - 1);
-  } else if (block.memory_type_ == RT_MEMORY_P2P_DDR) {
-    if (block.first_continuous_block_) {
-      p2p_mem_offset += MEM_ALIGN_SIZE;
+void AddBlockMemOffset(std::map<uint64_t, size_t> &mem_offsets, MemoryBlock &block) {
+  auto it = mem_offsets.find(block.memory_type_);
+  if (it == mem_offsets.end()) {
+    auto result = mem_offsets.insert(std::pair<int64_t, size_t>(block.memory_type_, 0));
+    // Insert failure is unlikely
+    if (!result.second) {
+      return;
     }
-    block.Resize();
-    block.SetHeadOffset(p2p_mem_offset);
-    p2p_mem_offset += block.Size();
-    block.SetTailOffset(p2p_mem_offset - 1);
+    it = result.first;
+  }
+
+  if (it == mem_offsets.end()) {
+    return;
+  }
+
+  auto &mem_offset = it->second;
+  if (block.first_continuous_block_) {
+    mem_offset += MEM_ALIGN_SIZE;
   }
+  block.Resize();
+  block.SetHeadOffset(mem_offset);
+  mem_offset += block.Size();
+  block.SetTailOffset(mem_offset - 1);
 }
 
 bool DynamicBatchBlockReuse(MemoryBlock &block) {
@@ -2036,27 +2042,27 @@ void BlockMemAssigner::ResizeDynamicBatchBlocks() {
     }
   }
 
-  size_t max_mem_offset = mem_offset_;
-  size_t max_p2p_mem_offset = p2p_mem_offset_;
+  std::map<uint64_t, size_t> max_mem_offsets = mem_offsets_;
   for (auto &batch_blocks : dynamic_batch_blocks) {
-    size_t mem_offset = mem_offset_;
-    size_t p2p_mem_offset = p2p_mem_offset_;
+    std::map<uint64_t, size_t> mem_offsets = mem_offsets_;
     for (auto block : batch_blocks.second) {
       if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) {
         continue;
       }
-      AddBlockMemOffset(mem_offset, p2p_mem_offset, *block);
+      AddBlockMemOffset(mem_offsets, *block);
     }
-    if (mem_offset > max_mem_offset) {
-      max_mem_offset = mem_offset;
-    }
-    if (p2p_mem_offset > max_p2p_mem_offset) {
-      max_p2p_mem_offset = p2p_mem_offset;
+
+    for (auto &it : mem_offsets) {
+      auto itmax = max_mem_offsets.find(it.first);
+      if (itmax == max_mem_offsets.end()) {
+        max_mem_offsets[it.first] = it.second;
+      } else if (it.second > itmax->second) {
+        itmax->second = it.second;
+      }
+      GELOGI("Batch:%s memory type:%ld offset:%zu", batch_blocks.first.c_str(), it.first, it.second);
     }
-    GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset);
   }
-  mem_offset_ = max_mem_offset;
-  p2p_mem_offset_ = max_p2p_mem_offset;
+  mem_offsets_ = max_mem_offsets;
 }
 
 ///
@@ -2074,11 +2080,13 @@ void BlockMemAssigner::ResizeMemoryBlocks() {
       continue;
     }
 
-    AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block);
+    AddBlockMemOffset(mem_offsets_, *memory_block);
   }
   ResizeDynamicBatchBlocks();
-  GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu,"
-         "theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_);
+  for (auto it : mem_offsets_) {
+    GELOGI("Memory type:%ld mem_offset exclude zero_copy_memory:%zu, theory_min_memory_size:%zu", it.first, it.second,
+           theory_min_memory_size_);
+  }
 }
 
 ///
@@ -2217,7 +2225,8 @@ bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
          (node_type == CONSTANTOP) || (node_type == HVDWAIT);
 }
 
-bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {
+bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type,
+                                              vector<bool> &workspace_reuse_flag) {
   memory_type = RT_MEMORY_HBM;
   vector<int64_t> workspace_memory_type;
   auto op_desc = node->GetOpDesc();
@@ -2233,6 +2242,20 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index,
     return false;
   }
   memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM;
+
+  vector<int32_t> workspace_no_reuse_scope;
+  bool has_workspace_no_reuse_scope =
+    ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+  if (has_workspace_no_reuse_scope && (index < workspace_no_reuse_scope.size())
+      && (workspace_no_reuse_scope[index] == kSessionNoReuse)) {
+    memory_type |= kSessionScopeMemory;
+    if (workspace_reuse_flag.empty()) {
+      workspace_reuse_flag.assign(workspace_no_reuse_scope.size(), true);
+    }
+    // set to no reuse
+    workspace_reuse_flag[index] = false;
+    GELOGI("%s's workspace is session scope no reuse, memory type:%lu.", node->GetName().c_str(), memory_type);
+  }
   return true;
 }
 }  // namespace ge
diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h
index 3b1e0d31..231cce09 100755
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -34,6 +34,10 @@
 namespace ge {
 const size_t kMaxLifeTime = 0xffffffff;
 const int32_t kInvalidThreadScopeId = -1;
+const uint64_t kSessionScopeMemory = 0x100000000;
+const uint64_t kMemoryTypeMask = 0xffffffff;
+
+enum MemoryNoReuseScope { kReuse, kSessionNoReuse, kGraphNoReuse };
 
 using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>;
 
@@ -224,9 +228,7 @@ class BlockMemAssigner : public MemAssigner {
 
   Status Assign() override;
 
-  size_t GetMemOffset() const { return mem_offset_; }
-
-  size_t GetP2PMemOffset() const { return p2p_mem_offset_; }
+  const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; }
 
   int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; }
 
@@ -329,14 +331,10 @@ class BlockMemAssigner : public MemAssigner {
   ///
   void UpdateOpTensorMemType(std::list<NodeIndexIO> node_index_io_list, int64_t memory_type);
 
-  size_t mem_offset_;
-  size_t p2p_mem_offset_;
-
+  std::map<uint64_t, size_t> mem_offsets_;
   ge::ComputeGraphPtr compute_graph_;
-
   std::vector<MemoryBlock *> memory_blocks_;
   std::vector<MemoryBlock *> blocks_store_;
-
   std::vector<NodeTypeIndex> zero_memory_list_;
 
   // ref mapping
@@ -380,7 +378,7 @@ class BlockMemAssigner : public MemAssigner {
   ///
   MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type,
                            const ge::NodePtr &n, uint32_t out_index, const std::vector<bool> &workspace_reuse_flag,
-                           const bool is_op_reuse_mem, const bool continuous, int64_t memory_type);
+                           const bool is_op_reuse_mem, const bool continuous, uint64_t memory_type);
 
   ///
   /// @ingroup GE
@@ -394,7 +392,7 @@ class BlockMemAssigner : public MemAssigner {
   /// @author
   ///
   void CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id,
-                           MemoryBlock *mem_block, int64_t memory_type);
+                           MemoryBlock *mem_block, uint64_t memory_type);
 
   ///
   /// @ingroup GE
@@ -457,7 +455,8 @@ class BlockMemAssigner : public MemAssigner {
 
   bool IsContinuousOutput(const NodePtr &n);
 
-  bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type);
+  bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type,
+                              vector<bool> &workspace_reuse_flag);
 
   void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n);
 
diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc
index a45fb239..8becd90e 100755
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -107,11 +107,22 @@ Status GraphMemoryAssigner::AssignMemory() {
            compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
     return ge::FAILED;
   }
-  MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
-  memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
 
-  if (mem_assigner->GetP2PMemOffset() >= 0) {
-    MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
+  for (auto pair : mem_assigner->GetMemOffsets()) {
+    MemoryOffset offset(pair.first, pair.second);
+    memory_offset_.emplace(pair.first, offset);
+  }
+
+  // base memtype offset must be exist
+  auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it == mem_assigner->GetMemOffsets().end()) {
+    MemoryOffset memory_offset(RT_MEMORY_HBM, 0);
+    memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
+  }
+
+  it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR);
+  if (it == mem_assigner->GetMemOffsets().end()) {
+    MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0);
     memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
   }
 
@@ -224,7 +235,7 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out
   return SUCCESS;
 }
 
-Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
+Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset) {
   if (memory_offset_.empty()) {
     REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
                        compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
@@ -264,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size
   return SUCCESS;
 }
 
-Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
+Status GraphMemoryAssigner::AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
   BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
   if (priority_assigner == nullptr) {
     REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s",
@@ -1398,6 +1409,9 @@ ge::Status GraphMemoryAssigner::SetInputOffset() {
            "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
   }
   for (auto pair : memory_offset_) {
+    if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) {
+      continue;
+    }
     GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
             pair.second.mem_offset_, pair.first);
   }
diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h
index 0d9b03e5..a6a2a686 100755
--- a/ge/graph/build/memory/graph_mem_assigner.h
+++ b/ge/graph/build/memory/graph_mem_assigner.h
@@ -103,9 +103,9 @@ class GraphMemoryAssigner {
 
   ge::Status AssignMemory2HasRefAttrNode();
 
-  ge::Status ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset);
+  ge::Status ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset);
 
-  ge::Status AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size);
+  ge::Status AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size);
 
   ge::Status SetInputOffset();
 
diff --git a/ge/graph/build/memory/hybrid_mem_assigner.cc b/ge/graph/build/memory/hybrid_mem_assigner.cc
index eff821bf..ccf673b3 100755
--- a/ge/graph/build/memory/hybrid_mem_assigner.cc
+++ b/ge/graph/build/memory/hybrid_mem_assigner.cc
@@ -23,7 +23,7 @@
 
 namespace ge {
 HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph)
-    : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {}
+    : compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {}
 
 Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size) {
   vector<int64_t> ranges;
@@ -36,7 +36,10 @@ Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_
 
   block_assigner->AssignMemoryWithReuse(ranges);
 
-  mem_size = block_assigner->GetMemOffset();
+  // total size
+  for (auto it : block_assigner->GetMemOffsets()) {
+    mem_size += it.second;
+  }
   return SUCCESS;
 }
 
@@ -73,8 +76,7 @@ Status HybridMemAssigner::Assign() {
   }
 
   priority_assigner->SetOpMemOffset(false);
-  mem_offset_ = priority_assigner->GetMemOffset();
-  p2p_mem_offset_ = priority_assigner->GetP2PMemOffset();
+  mem_offsets_ = priority_assigner->GetMemOffsets();
   priority_assigner_ = std::move(priority_assigner);
 
   return SUCCESS;
diff --git a/ge/graph/build/memory/hybrid_mem_assigner.h b/ge/graph/build/memory/hybrid_mem_assigner.h
index 7baece44..2bdfd5c5 100755
--- a/ge/graph/build/memory/hybrid_mem_assigner.h
+++ b/ge/graph/build/memory/hybrid_mem_assigner.h
@@ -42,16 +42,14 @@ class HybridMemAssigner : public MemAssigner {
 
   Status Assign() override;
 
-  size_t GetMemOffset() const { return mem_offset_; }
-  size_t GetP2PMemOffset() const { return p2p_mem_offset_; }
+  const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; }
 
   BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; }
 
  private:
   Status AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size);
 
-  size_t mem_offset_;
-  size_t p2p_mem_offset_;
+  std::map<uint64_t, size_t> mem_offsets_;
 
   ge::ComputeGraphPtr compute_graph_;
 
diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc
index 34b97c60..570aae07 100755
--- a/ge/graph/build/memory/memory_assigner.cc
+++ b/ge/graph/build/memory/memory_assigner.cc
@@ -20,7 +20,7 @@
 #include "graph/build/memory/graph_mem_assigner.h"
 
 namespace ge {
-Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) {
+Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) {
   GraphMemoryAssigner graph_mem_assigner(compute_graph_);
 
   if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) {
diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 0e625990..ce2f57f9 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -47,6 +47,7 @@
 #include "omg/version.h"
 #include "register/op_registry.h"
 #include "graph/passes/set_input_output_offset_pass.h"
+#include "graph/build/memory/block_mem_assigner.h"
 
 using std::map;
 using std::set;
@@ -398,9 +399,21 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str());
                    GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str());
                    return FAILED);
+  auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
+  size_t session_scope_mem_offset = 0;
+  auto it = mem_type_to_mem_offset_.find(mem_type_session_scope);
+  if (it != mem_type_to_mem_offset_.end()) {
+    session_scope_mem_offset = it->second;
+  }
   if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) {
     p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR];
   }
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset),
+                   REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
+                                      ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE.c_str());
+  GELOGE(FAILED, "SetInt of ATTR_NAME_SESSION_SCOPE_MEMORY_SIZE failed.");
+  return FAILED);
+
   GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_),
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
                    GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
@@ -434,8 +447,8 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
                    REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_OUT_NODES_NAME.c_str());
                    GELOGE(FAILED, "[Set][Str] %s in model failed.", ATTR_MODEL_OUT_NODES_NAME.c_str());
                    return FAILED);
-  GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_,
-         p2p_mem_offset_, zero_copy_mem_size_);
+  GELOGI("For model, max_mem_offset: %zu, p2p_mem_size: %zu, zero_copy_mem_size: %zu, session_scope_mem_size: %zu",
+         max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_, session_scope_mem_offset);
   string fp_ceiling_mode;
   if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) {
     if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h
index 67def859..6f097329 100644
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -93,7 +93,7 @@ class ModelBuilder {
 
   uint64_t session_id_;
 
-  map<int64_t, size_t> mem_type_to_mem_offset_;
+  map<uint64_t, size_t> mem_type_to_mem_offset_;
 
   size_t weight_offset_;
 
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index 5d90d5a1..b52796c8 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -40,7 +40,7 @@
 #include "graph/load/model_manager/cpu_queue_schedule.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/load/model_manager/tbe_handle_store.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
 #include "graph/manager/util/debug.h"
@@ -60,6 +60,8 @@
 #include "graph/common/local_context.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "graph/common/omg_util.h"
+#include "graph/build/memory/block_mem_assigner.h"
+#include "graph/manager/session_scope_mem_allocator.h"
 
 // create std::thread, catch exceptions using try/catch
 #define CREATE_STD_THREAD(thread_id, func, args)                                                  \
@@ -168,7 +170,6 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
       mem_base_(nullptr),
       is_inner_mem_base_(false),
       is_inner_weight_base_(false),
-      is_inner_p2p_mem_base_(false),
       data_inputer_(nullptr),
       load_begin_time_(0),
       load_end_time_(0),
@@ -236,7 +237,7 @@ DavinciModel::~DavinciModel() {
 
       FreeFeatureMapMem();
 
-      FreeP2PMem();
+      FreeExMem();
 
       OpDebugUnRegister();
 
@@ -389,7 +390,6 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
   is_feature_map_mem_has_inited_ = true;
 
   std::size_t data_size = TotalMemSize();
-  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
 
   if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
     REPORT_INNER_ERROR("E19999", "Param dev_ptr is nullptr or mem_size:%zu < ge_model.mem_size:%zu, "
@@ -400,7 +400,6 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
   }
 
   mem_base_ = static_cast<uint8_t *>(dev_ptr);
-  p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr);
   is_inner_mem_base_ = false;
 
   if (TotalMemSize() && mem_base_ == nullptr) {
@@ -422,24 +421,13 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
     is_inner_mem_base_ = true;
   }
 
-  if (p2p_data_size != 0) {
-    p2p_mem_base_ = MallocP2PMem(p2p_data_size);
-    if (p2p_mem_base_ == nullptr) {
-      REPORT_CALL_ERROR("E19999", "MallocFeatureMapMem fail, p2p_data_size:%zu, model_id:%u, check invalid",
-                        p2p_data_size, model_id_);
-      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][Memory] for p2p failed, size:%zu, model_id:%u",
-             p2p_data_size, model_id_);
-      return ACL_ERROR_GE_MEMORY_ALLOCATION;
-    }
-    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
-           p2p_mem_base_, p2p_data_size);
-    is_inner_p2p_mem_base_ = true;
+  if (!runtime_param_.memory_infos.empty()) {
+    GE_CHK_STATUS_RET(MallocExMem(), "MallocExMem failed.");
   }
 
   GE_CHK_STATUS_RET(InitVariableMem(), "[Init][VariableMemory] failed, model_id:%u", model_id_);
   runtime_param_.mem_base = mem_base_;
   runtime_param_.weight_base = weights_mem_base_;
-  runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_;
   return SUCCESS;
 }
 
@@ -465,7 +453,6 @@ Status DavinciModel::InitVariableMem() {
 void DavinciModel::InitRuntimeParams() {
   int64_t value = 0;
   bool ret;
-  MemInfo p2p_mem_info;
   ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value);
   runtime_param_.mem_size = ret ? (uint64_t)value : 0;
   ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value);
@@ -490,16 +477,18 @@ void DavinciModel::InitRuntimeParams() {
   runtime_param_.var_size = ret ? (uint64_t)value : 0;
   session_id_ = runtime_param_.session_id;
   ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value);
-  p2p_mem_info.memory_size = ret ? (uint64_t)value : 0;
+  MemInfo p2p_mem_info;
+  p2p_mem_info.memory_size = static_cast<size_t>(ret ? value : 0);
+  p2p_mem_info.memory_type = RT_MEMORY_P2P_DDR;
+  p2p_mem_info.memory_key = "_p";
   runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info);
 
-  GELOGI(
-      "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, "
-      "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, "
-      "memory_size:%lu, weight_size:%lu, var_size:%lu",
-      runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num,
-      runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base,
-      runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size);
+  ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, value);
+  MemInfo session_scope_mem_info;
+  session_scope_mem_info.memory_size = static_cast<size_t>(ret ? value : 0);
+  runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info);
+
+  GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str());
 }
 
 void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) {
@@ -4089,14 +4078,15 @@ Status DavinciModel::InitEntryTask() {
 uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) {
   uint8_t *mem_base = nullptr;
   const string purpose("feature map,used for op input and output.");
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK) {
     data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize());
     string memory_key = std::to_string(0) + "_f";
-    mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId());
+    mem_base =
+      MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, data_size, GetDeviceId());
   } else {
-    mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, data_size, GetDeviceId());
+    mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, data_size, GetDeviceId());
   }
 
   if (mem_base != nullptr) {
@@ -4105,83 +4095,119 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) {
   return mem_base;
 }
 
-uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) {
-  uint8_t *p2p_mem_base = nullptr;
-  const string purpose("p2p memory, used for some op related to hcom");
-  if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
-    string p2p_memory_key = std::to_string(0) + "_p";
-    p2p_mem_base =
-        MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId());
-  } else {
-    p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId());
+Status DavinciModel::MallocExMem() {
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
+  INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
+  for (auto it : runtime_param_.memory_infos) {
+    auto mem_size = it.second.memory_size;
+    if (mem_size == 0) {
+      continue;
+    }
+    bool sessoion_scope = ((kSessionScopeMemory & it.first) == kSessionScopeMemory);
+    auto mem_type = it.first & kMemoryTypeMask;
+    uint8_t *mem_base = nullptr;
+    const string purpose("p2p memory, used for some op related to hcom or session scope memory");
+    if (sessoion_scope) {
+      mem_base = MemManager::Instance().SessionScopeMemInstance(mem_type).Malloc(mem_size, runtime_param_.session_id);
+    } else if (res_static_memory == EN_OK) {
+      string memory_key = std::to_string(0) + it.second.memory_key;
+      mem_base =
+        MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, memory_key, mem_size, GetDeviceId());
+    } else {
+      mem_base = MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, mem_size, GetDeviceId());
+    }
+
+    if (mem_base == nullptr) {
+      REPORT_CALL_ERROR("E19999", "MallocExMem fail, type:%ld size:%zu, model_id:%u, check invalid",
+                        mem_type, mem_size, model_id_);
+      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc ex memory failed, type:%ld size: %zu", mem_type, mem_size);
+      return ACL_ERROR_GE_MEMORY_ALLOCATION;
+    }
+    it.second.memory_base = mem_base;
+    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] mem_type[%ld] mem_addr[%p] mem_size[%zu]",
+           runtime_param_.graph_id, mem_type, mem_base, mem_size);
   }
-  return p2p_mem_base;
+  return SUCCESS;
 }
 
 uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) {
   uint8_t *weights_mem_base = nullptr;
   const string purpose("weights memory in inference network.");
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK) {
     string weight_memory_key = std::to_string(0) + "_w";
-    weights_mem_base =
-        MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId());
+    weights_mem_base = MemManager::Instance()
+                         .MemInstance(RT_MEMORY_HBM)
+                         .MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId());
   } else {
-    weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId());
+    weights_mem_base =
+      MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, weights_size, GetDeviceId());
   }
   return weights_mem_base;
 }
 
 void DavinciModel::FreeFeatureMapMem() {
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK && is_inner_mem_base_) {
     string weight_memory_key = std::to_string(0) + "_f";
-    if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) {
-      GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()),
-                    "[Free][Memory] failed, model_id:%u", model_id_);
+    if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(weight_memory_key) != nullptr) {
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weight_memory_key, GetDeviceId()),
+                    "failed to free weight memory");
     }
     mem_base_ = nullptr;
   } else {
-    GE_IF_BOOL_EXEC(mem_base_ != nullptr && is_inner_mem_base_,
-                    GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(mem_base_, GetDeviceId()),
-                                  "[Free][Memory] failed, model_id:%u", model_id_);
-                    mem_base_ = nullptr);
+    GE_IF_BOOL_EXEC(
+      mem_base_ != nullptr && is_inner_mem_base_,
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(mem_base_, GetDeviceId()),
+                    "failed to free feature_map memory");
+      mem_base_ = nullptr);
   }
 }
 
-void DavinciModel::FreeP2PMem() {
-  if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
-    std::string p2p_memory_key = std::to_string(0) + "_p";
-    if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) {
-      GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()),
-                    "[Free][Memory] failed, model_id:%u", model_id_);
+void DavinciModel::FreeExMem() {
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
+  INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
+  for (auto it : runtime_param_.memory_infos) {
+    // free when session destory
+    if ((kSessionScopeMemory & it.first) == kSessionScopeMemory) {
+      continue;
+    }
+    auto mem_type = it.first & kMemoryTypeMask;
+    if (res_static_memory == EN_OK) {
+      std::string memory_key = std::to_string(0) + it.second.memory_key;
+      if (MemManager::Instance().MemInstance(mem_type).GetMemoryAddr(memory_key) != nullptr) {
+        GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(memory_key, GetDeviceId()),
+                      "failed to free memory");
+      }
+      it.second.memory_base = nullptr;
+    } else {
+      GE_IF_BOOL_EXEC(
+        it.second.memory_base != nullptr,
+        GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(it.second.memory_base, GetDeviceId()),
+                      "failed to free memory");
+        it.second.memory_base = nullptr);
     }
-    p2p_mem_base_ = nullptr;
-  } else {
-    GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_,
-                    GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()),
-                                  "[Free][Memory] failed, model_id:%u", model_id_);
-                    p2p_mem_base_ = nullptr);
   }
 }
 
 void DavinciModel::FreeWeightsMem() {
-  char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
+  char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
   INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
   if (res == EN_OK) {
     string memory_key = std::to_string(0) + "_w";
-    if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) {
-      GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()),
-                    "[Free][Memory] failed, model_id:%u", model_id_);
+    if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(memory_key) != nullptr) {
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key, GetDeviceId()),
+                    "failed to free feature_map memory");
     }
     weights_mem_base_ = nullptr;
   } else {
-    GE_IF_BOOL_EXEC(weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_,
-                    GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weights_mem_base_, GetDeviceId()),
-                                  "[Free][Memory] failed, model_id:%u", model_id_);
-                    weights_mem_base_ = nullptr);
+    GE_IF_BOOL_EXEC(
+      weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_,
+      GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weights_mem_base_, GetDeviceId()),
+                    "failed to free weight memory");
+      weights_mem_base_ = nullptr);
   }
 }
 
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index a4abcae6..e4898dec 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -248,8 +248,6 @@ class DavinciModel {
   // get total mem size
   size_t TotalMemSize() const { return runtime_param_.mem_size; }
 
-  const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; }
-
   // model name
   string Name() const { return name_; }
 
@@ -586,10 +584,8 @@ class DavinciModel {
   // memory address of model
   uintptr_t fixed_mem_base_;  // Initial of mem_base_, keep forever.
   uint8_t *mem_base_;
-  uint8_t *p2p_mem_base_;
   bool is_inner_mem_base_;
   bool is_inner_weight_base_;
-  bool is_inner_p2p_mem_base_;
   // input data manager
   DataInputer *data_inputer_;
   int64_t load_begin_time_;
@@ -668,13 +664,13 @@ class DavinciModel {
 
   uint8_t *MallocWeightsMem(size_t weights_size);
 
-  uint8_t *MallocP2PMem(size_t p2p_data_size);
+  Status MallocExMem();
 
   void FreeFeatureMapMem();
 
   void FreeWeightsMem();
 
-  void FreeP2PMem();
+  void FreeExMem();
 
   void ReleaseTask();
 
diff --git a/ge/graph/load/model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc
index 058a538f..f6ff591a 100755
--- a/ge/graph/load/model_manager/model_utils.cc
+++ b/ge/graph/load/model_manager/model_utils.cc
@@ -21,6 +21,7 @@
 #include "graph/utils/tensor_utils.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/types.h"
+#include "graph/build/memory/block_mem_assigner.h"
 
 #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET)                                                                 \
   do {                                                                                                       \
@@ -514,10 +515,16 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
   bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type);
   bool has_mem_type_workspace =
     ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type);
+
+  vector<int32_t> workspace_no_reuse_scope;
+  bool has_workspace_no_reuse_scope =
+    ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
   for (size_t i = 0; i < v_workspace_bytes.size(); ++i) {
     // Temporary solution, the aicpu workspace of multiple images cannot be shared.
-    if (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] &&
-        !model_param.is_single_op) {
+    bool aicpu_work_space = (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] &&
+                             !model_param.is_single_op);
+    if (aicpu_work_space) {
       void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]);
       v_workspace_data_addr.push_back(mem_addr);
       GELOGI(
@@ -548,7 +555,13 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
              model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]);
     } else {
       VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]);
-      uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i];
+      uint8_t *mem_addr = nullptr;
+      bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size());
+      if (session_scope_memory) {
+        mem_addr = model_param.memory_infos.at(kSessionScopeMemory | RT_MEMORY_HBM).memory_base + v_workspace_offset[i];
+      } else {
+        mem_addr = model_param.mem_base + v_workspace_offset[i];
+      }
       v_workspace_data_addr.push_back(mem_addr);
       GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]",
              model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i],
diff --git a/ge/graph/load/model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h
index 99ec3c4e..5657f003 100644
--- a/ge/graph/load/model_manager/task_info/task_info.h
+++ b/ge/graph/load/model_manager/task_info/task_info.h
@@ -18,6 +18,7 @@
 #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_H_
 
 #include <vector>
+#include <sstream>
 
 #include "cce/customize.h"
 #include "framework/common/taskdown_common.h"
@@ -28,9 +29,11 @@
 
 namespace ge {
 struct MemInfo {
-  uint64_t memory_size = 0;
+  size_t memory_size = 0;
   uint64_t logic_memory_base = 0;
   uint8_t *memory_base = nullptr;
+  uint32_t memory_type = RT_MEMORY_HBM;
+  std::string memory_key = "";
 };
 
 struct RuntimeParam {
@@ -40,6 +43,19 @@ struct RuntimeParam {
   }
   ~RuntimeParam() = default;
 
+  std::string ToString() {
+    std::stringstream ss;
+    ss << "session_id:" << session_id << ", stream_num:" << stream_num << ", event_num:" << event_num
+       << ", label_num:" << label_num << ", logic_mem_base:" << logic_mem_base
+       << ", logic_weight_base:" << logic_weight_base << ", logic_var_base:" << logic_var_base
+       << ", memory_size:" << mem_size << ", weight_size:" << weight_size << ", var_size:" << var_size
+       << ", ex_memory_info:";
+    for (auto it : memory_infos) {
+      ss << "[memory_type:" << it.first << ", memory_size:" << it.second.memory_size << "]";
+    }
+    return ss.str();
+  }
+
   uint64_t mem_size = 0;
   uint64_t logic_mem_base = 0;
   uint8_t *mem_base = nullptr;
@@ -49,7 +65,7 @@ struct RuntimeParam {
   uint64_t var_size = 0;
   uint64_t logic_var_base = 0;
   uint8_t *var_base = nullptr;
-  std::map<uint32_t, MemInfo> memory_infos;
+  std::map<uint64_t, MemInfo> memory_infos;
   uint32_t batch_num = 0;
   uint32_t stream_num = 0;
   uint32_t event_num = 0;
diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc
index 8c8df326..75aa5c01 100644
--- a/ge/graph/manager/graph_caching_allocator.cc
+++ b/ge/graph/manager/graph_caching_allocator.cc
@@ -21,7 +21,7 @@
 #include <utility>
 
 #include "framework/common/debug/ge_log.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace ge {
 const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize,
@@ -117,7 +117,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) {
     }
     free_block_bins_[i] = bin_ptr;
   }
-  memory_allocator_ = MemManager::Instance(memory_type_);
+  memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
   if (memory_allocator_ == nullptr) {
     return ACL_ERROR_GE_INTERNAL_ERROR;
   }
diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h
index a9c3202a..2db00ff2 100644
--- a/ge/graph/manager/graph_caching_allocator.h
+++ b/ge/graph/manager/graph_caching_allocator.h
@@ -88,8 +88,8 @@ class CachingAllocator {
   ///
   /// @ingroup ge_graph
   /// @brief free memory
+  /// @param [in] memory_ptr memory address ptr
   /// @param [in] device_id device id
-  /// @param [out] memory_ptr memory address ptr
   /// @return Status result of function
   ///
   Status Free(uint8_t *memory_addr, uint32_t device_id = 0);
diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc
index 24e75356..0cccaf99 100755
--- a/ge/graph/manager/graph_mem_allocator.cc
+++ b/ge/graph/manager/graph_mem_allocator.cc
@@ -17,11 +17,9 @@
 #include "graph/manager/graph_mem_allocator.h"
 
 #include <string>
-#include "graph/manager/graph_caching_allocator.h"
-#include "graph/manager/rdma_pool_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+
 namespace ge {
-void MemoryAllocator::Initialize(uint32_t device_id) {
+Status MemoryAllocator::Initialize(uint32_t device_id) {
   GELOGI("MemoryAllocator::Initialize");
 
   // when redo Initialize free memory
@@ -31,6 +29,7 @@ void MemoryAllocator::Initialize(uint32_t device_id) {
     }
   }
   memory_base_map_.clear();
+  return SUCCESS;
 }
 
 void MemoryAllocator::Finalize(uint32_t device_id) {
@@ -152,113 +151,4 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic
 
   return it->second.memory_addr_;
 }
-
-MemManager::MemManager() {}
-
-MemManager::~MemManager() { Finalize(); }
-
-MemManager &MemManager::Instance() {
-  static MemManager mem_manager;
-  return mem_manager;
-}
-
-MemoryAllocator *MemManager::Instance(rtMemType_t memory_type) { return Instance().GetMemoryAllocator(memory_type); }
-
-Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
-  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-  MemoryAllocator *memory_allocator = nullptr;
-  for (unsigned int index : memory_type) {
-    auto it = memory_allocator_map_.find(index);
-    if (it == memory_allocator_map_.end()) {
-      memory_allocator = new (std::nothrow) MemoryAllocator(index);
-
-      if (memory_allocator != nullptr) {
-        memory_allocator_map_[index] = memory_allocator;
-        GELOGI("Create MemoryAllocator memory type[%u] success.", index);
-      } else {
-        REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index);
-        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed.");
-      }
-    } else {
-      memory_allocator = it->second;
-    }
-
-    if (memory_allocator == nullptr) {
-      GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed.");
-      return ACL_ERROR_GE_MEMORY_ALLOCATION;
-    } else {
-      memory_allocator->Initialize(0);
-    }
-  }
-
-  auto ret = InitAllocator(memory_type, caching_allocator_map_);
-  if (ret != SUCCESS) {
-    GELOGE(ret, "Create CachingAllocator failed.");
-    return ret;
-  }
-
-  ret = InitAllocator(memory_type, rdma_allocator_map_);
-  if (ret != SUCCESS) {
-    GELOGE(ret, "Create RdmaAllocator failed.");
-    return ret;
-  }
-
-  ret = InitAllocator(memory_type, host_allocator_map_);
-  if (ret != SUCCESS) {
-    GELOGE(ret, "Create HostMemAllocator failed.");
-    return ret;
-  }
-  return SUCCESS;
-}
-
-template <typename T>
-void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) {
-  for (auto &allocator : allocate_map) {
-    if (allocator.second != nullptr) {
-      allocator.second->Finalize();
-      delete allocator.second;
-      allocator.second = nullptr;
-    }
-  }
-  allocate_map.clear();
-}
-
-void MemManager::Finalize() noexcept {
-  GELOGI("Finalize.");
-  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-  // caching and rdma allocator use memory allocator, so finalize them first
-  FinalizeAllocatorMap(caching_allocator_map_);
-  FinalizeAllocatorMap(rdma_allocator_map_);
-  FinalizeAllocatorMap(host_allocator_map_);
-  FinalizeAllocatorMap(memory_allocator_map_);
-}
-
-MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) {
-  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-  MemoryAllocator *memory_allocator = nullptr;
-  auto it = memory_allocator_map_.find(memory_type);
-  if (it != memory_allocator_map_.end()) {
-    memory_allocator = it->second;
-  }
-
-  // Usually impossible
-  if (memory_allocator == nullptr) {
-    GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type);
-    static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED);
-    return &default_memory_allocator;
-  }
-
-  return memory_allocator;
-}
-
-CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
-  return Instance().GetAllocator(memory_type, caching_allocator_map_);
-}
-
-RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
-  return Instance().GetAllocator(memory_type, rdma_allocator_map_);
-}
-HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
-  return Instance().GetAllocator(memory_type, host_allocator_map_);
-}
 }  // namespace ge
diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h
index 9f8b86b2..b6d73f0a 100644
--- a/ge/graph/manager/graph_mem_allocator.h
+++ b/ge/graph/manager/graph_mem_allocator.h
@@ -26,7 +26,6 @@
 
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/ge_inner_error_codes.h"
-#include "graph/manager/host_mem_allocator.h"
 #include "graph/node.h"
 #include "runtime/mem.h"
 
@@ -71,9 +70,9 @@ class MemoryAllocator {
   /// @ingroup ge_graph
   /// @brief memory allocator init
   /// @param [in] options user config params
-  /// @return void
+  /// @return Status of init
   ///
-  void Initialize(uint32_t device_id = 0);
+  Status Initialize(uint32_t device_id = 0);
 
   ///
   /// @ingroup ge_graph
@@ -136,109 +135,6 @@ class MemoryAllocator {
   bool mem_malloced_;
   map<string, MemoryInfo> memory_base_map_;
 };
-
-using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
-class CachingAllocator;
-class RdmaPoolAllocator;
-class MemManager {
- public:
-  MemManager();
-  virtual ~MemManager();
-  static MemManager &Instance();
-  static MemoryAllocator *Instance(rtMemType_t memory_type);
-  CachingAllocator &CachingInstance(rtMemType_t memory_type);
-  RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
-  HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
-  MemManager(const MemManager &) = delete;
-  MemManager &operator=(const MemManager &) = delete;
-  ///
-  /// @ingroup ge_graph
-  /// @brief memory allocator manager init
-  /// @param [in] options user config params
-  /// @return Status result of function
-  ///
-  Status Initialize(const std::vector<rtMemType_t> &memory_type);
-
-  ///
-  /// @ingroup ge_graph
-  /// @brief memory allocator finalize
-  /// @return void
-  ///
-  void Finalize() noexcept;
-
- private:
-  ///
-  /// @ingroup ge_graph
-  /// @brief ge memory allocator
-  /// @param [in] memory_type memory type
-  /// @return MemoryAllocator ptr
-  ///
-  MemoryAllocator *GetMemoryAllocator(rtMemType_t memory_type);
-
-  ///
-  /// @ingroup ge_graph
-  /// @param [in] memory_type memory type
-  /// @param [in] allocate_map memory allocator map
-  /// @return Status result of function
-  ///
-  template <typename T>
-  Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) {
-    T *allocator = nullptr;
-    for (unsigned int index : memory_type) {
-      auto it = allocate_map.find(index);
-      if (it == allocate_map.end()) {
-        allocator = new (std::nothrow) T(index);
-        if (allocator != nullptr) {
-          allocate_map[index] = allocator;
-          GELOGI("Create Allocator memory type[%u] success.", index);
-        } else {
-          GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed.");
-        }
-      } else {
-        allocator = it->second;
-      }
-
-      if (allocator == nullptr) {
-        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed.");
-        return ACL_ERROR_GE_MEMORY_ALLOCATION;
-      } else {
-        if (allocator->Initialize() != SUCCESS) {
-          return ACL_ERROR_GE_INTERNAL_ERROR;
-        }
-      }
-    }
-    return SUCCESS;
-  }
-  ///
-  /// @ingroup ge_graph
-  /// @param [in] memory_type memory type
-  /// @param [in] allocate_map memory allocator map
-  /// @return Allocator ptr
-  ///
-  template <typename T>
-  T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) {
-    std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
-    T *allocator = nullptr;
-    auto it = allocate_map.find(memory_type);
-    if (it != allocate_map.end()) {
-      allocator = it->second;
-    }
-
-    // Usually impossible
-    if (allocator == nullptr) {
-      GELOGW("Get allocator failed, memory type is %u.", memory_type);
-      static T default_allocator(RT_MEMORY_RESERVED);
-      return default_allocator;
-    }
-    return *allocator;
-  }
-
-  std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
-  std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
-  std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
-  std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
-  std::recursive_mutex allocator_mutex_;
-};
 }  // namespace ge
 
 #endif  // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_
diff --git a/ge/graph/manager/graph_mem_manager.cc b/ge/graph/manager/graph_mem_manager.cc
new file mode 100644
index 00000000..8d300dc2
--- /dev/null
+++ b/ge/graph/manager/graph_mem_manager.cc
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph/manager/graph_mem_manager.h"
+
+#include <string>
+
+namespace ge {
+MemManager::MemManager() {}
+
+MemManager::~MemManager() { Finalize(); }
+
+MemManager &MemManager::Instance() {
+  static MemManager mem_manager;
+  return mem_manager;
+}
+
+Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
+  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
+  if (init_) {
+    GELOGW("MemManager has been inited.");
+    return SUCCESS;
+  }
+
+  auto ret = InitAllocator(memory_type, memory_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create MemoryAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, caching_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create CachingAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, rdma_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create RdmaAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, host_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create HostMemAllocator failed.");
+    return ret;
+  }
+
+  ret = InitAllocator(memory_type, session_scope_allocator_map_);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Create HostMemAllocator failed.");
+    return ret;
+  }
+  init_ = true;
+  return SUCCESS;
+}
+
+template <typename T>
+void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) {
+  for (auto &allocator : allocate_map) {
+    if (allocator.second != nullptr) {
+      allocator.second->Finalize();
+      delete allocator.second;
+      allocator.second = nullptr;
+    }
+  }
+  allocate_map.clear();
+}
+
+void MemManager::Finalize() noexcept {
+  GELOGI("Finalize.");
+  std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
+  // caching and rdma allocator use memory allocator, so finalize them first
+  FinalizeAllocatorMap(session_scope_allocator_map_);
+  FinalizeAllocatorMap(caching_allocator_map_);
+  FinalizeAllocatorMap(rdma_allocator_map_);
+  FinalizeAllocatorMap(host_allocator_map_);
+  FinalizeAllocatorMap(memory_allocator_map_);
+  init_ = false;
+}
+
+MemoryAllocator &MemManager::MemInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, memory_allocator_map_);
+}
+
+CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, caching_allocator_map_);
+}
+
+RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, rdma_allocator_map_);
+}
+
+HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, host_allocator_map_);
+}
+
+SessionScopeMemAllocator &MemManager::SessionScopeMemInstance(rtMemType_t memory_type) {
+  return GetAllocator(memory_type, session_scope_allocator_map_);
+}
+}  // namespace ge
diff --git a/ge/graph/manager/graph_mem_manager.h b/ge/graph/manager/graph_mem_manager.h
new file mode 100644
index 00000000..d7993ed4
--- /dev/null
+++ b/ge/graph/manager/graph_mem_manager.h
@@ -0,0 +1,141 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_
+#define GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "framework/common/debug/ge_log.h"
+#include "framework/common/ge_inner_error_codes.h"
+#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_caching_allocator.h"
+#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/rdma_pool_allocator.h"
+#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/session_scope_mem_allocator.h"
+#include "graph/node.h"
+#include "runtime/mem.h"
+
+namespace ge {
+using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
+
+class MemManager {
+ public:
+  MemManager();
+  virtual ~MemManager();
+  static MemManager &Instance();
+  MemoryAllocator &MemInstance(rtMemType_t memory_type);
+  CachingAllocator &CachingInstance(rtMemType_t memory_type);
+  RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
+  HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
+  SessionScopeMemAllocator &SessionScopeMemInstance(rtMemType_t memory_type);
+  MemManager(const MemManager &) = delete;
+  MemManager &operator=(const MemManager &) = delete;
+  ///
+  /// @ingroup ge_graph
+  /// @brief memory allocator manager init
+  /// @param [in] options user config params
+  /// @return Status result of function
+  ///
+  Status Initialize(const std::vector<rtMemType_t> &memory_type);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief memory allocator finalize
+  /// @return void
+  ///
+  void Finalize() noexcept;
+
+  const std::vector<rtMemType_t> &GetAllMemoryType() const { return memory_type_; }
+
+ private:
+  ///
+  /// @ingroup ge_graph
+  /// @param [in] memory_type memory type
+  /// @param [in] allocate_map memory allocator map
+  /// @return Status result of function
+  ///
+  template <typename T>
+  Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) {
+    T *allocator = nullptr;
+    for (unsigned int index : memory_type) {
+      auto it = allocate_map.find(index);
+      if (it == allocate_map.end()) {
+        allocator = new (std::nothrow) T(index);
+        if (allocator != nullptr) {
+          allocate_map[index] = allocator;
+          GELOGI("Create Allocator memory type[%u] success.", index);
+        } else {
+          REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index);
+          GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed.");
+        }
+      } else {
+        allocator = it->second;
+      }
+
+      if (allocator == nullptr) {
+        GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed.");
+        return ACL_ERROR_GE_MEMORY_ALLOCATION;
+      } else {
+        if (allocator->Initialize() != SUCCESS) {
+          return ACL_ERROR_GE_INTERNAL_ERROR;
+        }
+      }
+    }
+    return SUCCESS;
+  }
+  ///
+  /// @ingroup ge_graph
+  /// @param [in] memory_type memory type
+  /// @param [in] allocate_map memory allocator map
+  /// @return Allocator ptr
+  ///
+  template <typename T>
+  T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) {
+    std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
+    T *allocator = nullptr;
+    auto it = allocate_map.find(memory_type);
+    if (it != allocate_map.end()) {
+      allocator = it->second;
+    }
+
+    // Usually impossible
+    if (allocator == nullptr) {
+      GELOGW("Get allocator failed, memory type is %u.", memory_type);
+      static T default_allocator(RT_MEMORY_RESERVED);
+      return default_allocator;
+    }
+    return *allocator;
+  }
+
+  std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
+  std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
+  std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
+  std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
+  std::map<rtMemType_t, SessionScopeMemAllocator *> session_scope_allocator_map_;
+  std::recursive_mutex allocator_mutex_;
+  std::vector<rtMemType_t> memory_type_;
+  bool init_ = false;
+};
+}  // namespace ge
+
+#endif  // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_
diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc
index 79103b88..5f7586da 100755
--- a/ge/graph/manager/graph_var_manager.cc
+++ b/ge/graph/manager/graph_var_manager.cc
@@ -17,8 +17,7 @@
 #include "graph/manager/graph_var_manager.h"
 
 #include "graph/debug/ge_attr_define.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/rdma_pool_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
 #include "graph/utils/type_utils.h"
 
@@ -728,7 +727,7 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) {
   var_memory_size = (var_memory_size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize;
 
   const string purpose("variables and constant op memory in training network.");
-  var_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, var_memory_size);
+  var_mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, var_memory_size);
   if (var_mem_base == nullptr) {
     GELOGE(ge::INTERNAL_ERROR,
            "VarManager::MallocVarMemory failed "
@@ -745,7 +744,7 @@ uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) {
     return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr();
   }
   string memory_key = std::to_string(session_id_);
-  return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key);
+  return MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(memory_key);
 }
 
 uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) {
@@ -754,7 +753,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty
     return logic_addr;
   }
   string mem_key = std::to_string(session_id_);
-  uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key);
+  uint8_t *mem_base = MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(mem_key);
   if (mem_base == nullptr) {
     return nullptr;
   }
@@ -766,7 +765,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty
 ge::Status VarManager::FreeVarMemory() {
   std::lock_guard<std::recursive_mutex> lock(mutex_);
   string memory_key = std::to_string(SessionId());
-  return MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key);
+  return MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key);
 }
 
 ge::Status VarManager::SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) {
diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc
index 415f8088..8e737021 100644
--- a/ge/graph/manager/memory_api.cc
+++ b/ge/graph/manager/memory_api.cc
@@ -19,7 +19,7 @@
 #include <memory>
 
 #include "common/ge/plugin_manager.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/rdma_pool_allocator.h"
 #include "graph/utils/type_utils.h"
diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc
index c19a2159..58829adb 100644
--- a/ge/graph/manager/rdma_pool_allocator.cc
+++ b/ge/graph/manager/rdma_pool_allocator.cc
@@ -20,6 +20,7 @@
 #include "framework/common/debug/ge_log.h"
 #include "graph/ge_context.h"
 #include "runtime/dev.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace {
 const size_t kAlignedSize = 512;
@@ -49,7 +50,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type)
       })) {}
 
 Status RdmaPoolAllocator::Initialize() {
-  memory_allocator_ = MemManager::Instance(memory_type_);
+  memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
   if (memory_allocator_ == nullptr) {
     return ACL_ERROR_GE_INTERNAL_ERROR;
   }
diff --git a/ge/graph/manager/session_scope_mem_allocator.cc b/ge/graph/manager/session_scope_mem_allocator.cc
new file mode 100644
index 00000000..8eb01445
--- /dev/null
+++ b/ge/graph/manager/session_scope_mem_allocator.cc
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph/manager/session_scope_mem_allocator.h"
+
+#include <set>
+#include <string>
+#include <utility>
+
+#include "framework/common/debug/ge_log.h"
+#include "graph/manager/graph_mem_manager.h"
+
+namespace ge {
+
+SessionScopeMemAllocator::SessionScopeMemAllocator(rtMemType_t memory_type)
+    : memory_type_(memory_type), memory_allocator_(nullptr) {}
+
+Status SessionScopeMemAllocator::Initialize(uint32_t device_id) {
+  GELOGI("Device id %u", device_id);
+  // when redo Initialize free old memory
+  FreeAllMemory();
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
+  if (memory_allocator_ == nullptr) {
+    return ACL_ERROR_GE_INTERNAL_ERROR;
+  }
+  return ge::SUCCESS;
+}
+
+void SessionScopeMemAllocator::Finalize(uint32_t device_id) {
+  GELOGI("Device id %u", device_id);
+  FreeAllMemory();
+}
+
+uint8_t *SessionScopeMemAllocator::Malloc(size_t size, uint64_t session_id, uint32_t device_id) {
+  GELOGI("Start malloc memory, size:%zu, session id:%lu device id:%u", size, session_id, device_id);
+  const std::string purpose = "Memory for session scope.";
+  auto ptr = memory_allocator_->MallocMemory(purpose, size, device_id);
+  if (ptr == nullptr) {
+    GELOGE(ge::FAILED, "Malloc failed, no enough memory for size:%zu, session_id:%lu device_id:%u", size,
+           session_id, device_id);
+    return nullptr;
+  }
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  std::shared_ptr<uint8_t> mem_ptr(ptr, [&](uint8_t *p) { (void)memory_allocator_->FreeMemory(p); });
+  allocated_memory_[session_id].emplace_back(size, mem_ptr);
+  return ptr;
+}
+
+Status SessionScopeMemAllocator::Free(uint64_t session_id, uint32_t device_id) {
+  GELOGI("Free session:%lu memory, device id:%u.", session_id, device_id);
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  auto it = allocated_memory_.find(session_id);
+  if (it == allocated_memory_.end()) {
+    REPORT_INNER_ERROR("E19999", "Param memory not allocated before, session_id:%lu device_id:%u, check invalid",
+                       session_id, device_id);
+    GELOGE(PARAM_INVALID, "Invalid session_id");
+    return ge::PARAM_INVALID;
+  }
+  allocated_memory_.erase(it);
+  return ge::SUCCESS;
+}
+
+void SessionScopeMemAllocator::FreeAllMemory() {
+  GELOGI("Free all memory");
+  std::lock_guard<std::recursive_mutex> lock(mutex_);
+  for (auto &session_mem : allocated_memory_) {
+    session_mem.second.clear();
+  }
+  allocated_memory_.clear();
+}
+}  // namespace ge
diff --git a/ge/graph/manager/session_scope_mem_allocator.h b/ge/graph/manager/session_scope_mem_allocator.h
new file mode 100644
index 00000000..5aea9554
--- /dev/null
+++ b/ge/graph/manager/session_scope_mem_allocator.h
@@ -0,0 +1,123 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_
+#define GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <functional>
+
+#include "framework/common/ge_inner_error_codes.h"
+#include "graph/node.h"
+#include "graph/manager/block_memory.h"
+#include "runtime/mem.h"
+#include "graph/manager/graph_mem_allocator.h"
+
+namespace ge {
+class SessionScopeMemoryInfo {
+ public:
+  SessionScopeMemoryInfo(size_t size, const std::shared_ptr<uint8_t> &ptr) : size(size), ptr(ptr) {}
+  SessionScopeMemoryInfo() = delete;
+  virtual ~SessionScopeMemoryInfo() = default;
+
+  SessionScopeMemoryInfo(const SessionScopeMemoryInfo &other) {
+    if (&other == this) {
+      return;
+    }
+    size = other.size;
+    ptr = other.ptr;
+  };
+
+  SessionScopeMemoryInfo &operator=(const SessionScopeMemoryInfo &other) {
+    if (&other == this) {
+      return *this;
+    }
+    size = other.size;
+    ptr = other.ptr;
+  };
+
+ private:
+  size_t size = 0;
+  std::shared_ptr<uint8_t> ptr = nullptr;
+};
+
+class SessionScopeMemAllocator {
+ public:
+  explicit SessionScopeMemAllocator(rtMemType_t memory_type);
+
+  SessionScopeMemAllocator(const SessionScopeMemAllocator &) = delete;
+
+  SessionScopeMemAllocator &operator=(const SessionScopeMemAllocator &) = delete;
+
+  virtual ~SessionScopeMemAllocator() = default;
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief caching allocator init
+  /// @param [in] device id
+  /// @return Status of init
+  ///
+  Status Initialize(uint32_t device_id = 0);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief memory allocator finalize, release all memory
+  /// @return void
+  ///
+  void Finalize(uint32_t device_id = 0);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief malloc memory
+  /// @param [in] size memory size
+  /// @param [in] session_id session id
+  /// @param [in] device id
+  /// @return  memory address
+  ///
+  uint8_t *Malloc(size_t size, uint64_t session_id, uint32_t device_id = 0);
+
+  ///
+  /// @ingroup ge_graph
+  /// @brief free memory
+  /// @param [in] session_id session id
+  /// @param [in] device_id device id
+  /// @return Status result of function
+  ///
+  Status Free(uint64_t session_id, uint32_t device_id = 0);
+
+ private:
+  void FreeAllMemory();
+
+ private:
+  rtMemType_t memory_type_;
+
+  // device memory allocator
+  MemoryAllocator *memory_allocator_;
+
+  // lock around all operations
+  mutable std::recursive_mutex mutex_;
+
+  // allocated blocks by memory pointer
+  std::unordered_map<uint64_t, std::vector<SessionScopeMemoryInfo>> allocated_memory_;
+};
+}  // namespace ge
+#endif  // GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_
diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc
index 5a04c461..b66038d9 100644
--- a/ge/hybrid/common/npu_memory_allocator.cc
+++ b/ge/hybrid/common/npu_memory_allocator.cc
@@ -17,10 +17,7 @@
 #include "npu_memory_allocator.h"
 #include <mutex>
 #include "framework/common/debug/log.h"
-#include "graph/manager/graph_caching_allocator.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/rdma_pool_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace ge {
 namespace hybrid {
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index e108dddf..0629bd97 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -26,8 +26,7 @@
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/utils/graph_utils.h"
 #include "hybrid/common/npu_memory_allocator.h"
 #include "hybrid/node_executor/node_executor.h"
diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
index 6e8a1eb9..d35989a1 100755
--- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
+++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
@@ -18,8 +18,7 @@
 #include "hybrid/node_executor/host_cpu/kernel_factory.h"
 #include "graph/passes/folding_pass.h"
 #include "hybrid/model/hybrid_model.h"
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/host_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "ge_local_engine/engine/host_cpu_engine.h"
 
 namespace ge {
diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc
index 96ed1b9c..2374e75f 100644
--- a/ge/init/gelib.cc
+++ b/ge/init/gelib.cc
@@ -39,7 +39,7 @@
 #include "graph/ge_context.h"
 #include "graph/ge_global_options.h"
 #include "graph/load/model_manager/model_manager.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "runtime/kernel.h"
diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc
index 9308e267..39c87107 100755
--- a/ge/session/inner_session.cc
+++ b/ge/session/inner_session.cc
@@ -32,6 +32,7 @@
 #include "graph/common/local_context.h"
 #include "graph/load/model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/utils/tensor_adapter.h"
 #include "runtime/mem.h"
 
@@ -155,6 +156,11 @@ Status InnerSession::Finalize() {
   // release var memory
   GELOGI("VarManager free var memory.");
   (void)VarManager::Instance(session_id_)->FreeVarMemory();
+
+  for (auto memory_type : MemManager::Instance().GetAllMemoryType()) {
+    (void)MemManager::Instance().SessionScopeMemInstance(memory_type).Free(session_id_);
+  }
+
   // release analyzer saved info(Session Level)
   Analyzer::GetInstance()->DestroySessionJsonObject(session_id_);
 
diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc
index 180b50c1..d09dd802 100644
--- a/ge/single_op/single_op_manager.cc
+++ b/ge/single_op/single_op_manager.cc
@@ -19,8 +19,7 @@
 #include <mutex>
 #include <string>
 
-#include "graph/manager/graph_mem_allocator.h"
-#include "graph/manager/graph_caching_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 
 namespace ge {
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() {
diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h
index f5837b3a..173cc64e 100644
--- a/inc/framework/memory/memory_assigner.h
+++ b/inc/framework/memory/memory_assigner.h
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner {
 
   MemoryAssigner &operator=(const MemoryAssigner &) = delete;
 
-  Status AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
+  Status AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
 
  private:
   ge::ComputeGraphPtr compute_graph_;
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 7cdec968..e7a8ec73 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -337,8 +337,10 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
     "${GE_CODE_DIR}/ge/graph/common/local_context.cc"
     "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc"
     "${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
     "${GE_CODE_DIR}/ge/common/model_saver.cc"
     "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc"
@@ -396,8 +398,10 @@ set(GRAPH_LOAD_COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc"
     "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
     "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
     "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
+    "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc"
     "${GE_CODE_DIR}/ge/common/thread_pool.cc"
 )
 
@@ -792,6 +796,7 @@ set(MULTI_PARTS_TEST_FILES
     "graph/preprocess/graph_preprocess_unittest.cc"
     "graph/manager/hcom_util_unittest.cc"
     "graph/manager/graph_caching_allocator_unittest.cc"
+    "graph/manager/session_scope_mem_allocator_unittest.cc"
     "graph/manager/run_graph_unittest.cc"
     "graph/partition/dynamic_shape_partition_unittest.cc"
     "graph/manager/graph_manager_unittest.cc"
@@ -824,6 +829,7 @@ set(PROFILING_MNG_TEST_FILES
 set(HYBRID_TEST_FILES
     "hybrid/ge_hybrid_unittest.cc"
     "hybrid/known_node_executor_unittest.cc"
+    "hybrid/executor/worker/execution_engine_unittest.cc"
     "hybrid/executor/subgraph_executor_unittest.cc"
     "hybrid/executor/worker/execution_engine_unittest.cc"
     "hybrid/model/hybrid_model_builder_unittest.cc"
diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc
index 2a0f2405..c9b0b579 100644
--- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc
+++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc
@@ -44,7 +44,8 @@ using domi::GetContext;
 
 class UtestMemoryAssignerTest : public testing::Test {
  public:
-  ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", int64_t size = 1024) {
+  ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some",
+                                   int64_t size = 1024) {
     ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
     auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
     auto desc_temp = *desc_temp_ptr;
@@ -214,7 +215,8 @@ class UtestMemoryAssignerTest : public testing::Test {
 
     return builder.GetGraph();
   }
-  void make_ffts_reuse_graph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId,
+
+  void MakeFftsReuseGraph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId,
                             int32_t thread_scope_id_2 = kInvalidThreadScopeId) {
     ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
     ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
@@ -253,28 +255,119 @@ class UtestMemoryAssignerTest : public testing::Test {
     graph->TopologicalSorting();
   }
 
+  void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+    std::vector<int64_t> workspace_bytes;
+    workspace_bytes.push_back(1024);
+    workspace_bytes.push_back(512);
+    op_def_c->SetWorkspaceBytes(workspace_bytes);
+    vector<int32_t> workspace_no_reuse_scope = { 0 , 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
+    vector<int32_t> workspace_no_reuse_scope_e = { 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e);
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
+
+ void MakeContinuousReuseGraph(ge::ComputeGraphPtr graph, bool nopading = false) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+    if (nopading) {
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true);
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, true);
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_OUTPUT_REUSE_INPUT, true);
+      (void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, 0);
+    } else {
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_INPUT, true);
+      (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_OUTPUT, true);
+    }
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
+
+  void MakeMultiBatchReuseGraph(ge::ComputeGraphPtr graph) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+   (void)ge::AttrUtils::SetStr(op_def_b, ATTR_NAME_BATCH_LABEL, "Batch_0");
+   (void)ge::AttrUtils::SetStr(op_def_c, ATTR_NAME_BATCH_LABEL, "Batch_0");
+   (void)ge::AttrUtils::SetStr(op_def_e, ATTR_NAME_BATCH_LABEL, "Batch_1");
+   (void)ge::AttrUtils::SetStr(op_def_f, ATTR_NAME_BATCH_LABEL, "Batch_1");
+   vector<int32_t> workspace_no_reuse_scope = { 1 };
+   (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+   (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
+
  protected:
   void SetUp() {}
 
   void TearDown() { GetContext().out_nodes_map.clear(); }
 };
 
-/*
-TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) {
-  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
-  ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
-  ge::NodePtr node_a = graph->AddNode(op_def_a);
-  MemoryBlock* memory_block = new MemoryBlock(0);
-  memory_block->Init(1, kOutput, node_a, 0, 1);
-  memory_block->real_size_list_.clear();
-  memory_block->Resize();
-
-  EXPECT_EQ(memory_block->Size(), 0);
-
-  delete memory_block;
-}
-*/
-
 namespace ge {
 
 class MockBlockMemAssigner : public BlockMemAssigner {
@@ -313,12 +406,44 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) {
   EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600);
 }
 
+TEST_F(UtestMemoryAssignerTest, block_memory_assign_nopading_continuous_memory) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeContinuousReuseGraph(graph, true);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+
+  EXPECT_EQ(offset, 8192);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, block_memory_assign_continuous_memory) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeContinuousReuseGraph(graph);
+  map<uint64_t, size_t> mem_offset;
+  size_t zero_copy_mem_size = 0;
+  MemoryAssigner memoryAssigner(graph);
+  ge::Status ret = memoryAssigner.AssignMemory(false, mem_offset, zero_copy_mem_size);
+  size_t offset = 0;
+  auto it = mem_offset.find(RT_MEMORY_HBM);
+  if (it != mem_offset.end()) {
+    offset = it->second;
+  }
+
+  EXPECT_EQ(offset, 11264);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
 TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) {
   ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
   MakeGraph(graph);
   auto node_f = graph->FindNode("F");
   MemoryAssigner memory_assigner(graph);
-  map<int64_t, size_t> mem_offset;
+  map<uint64_t, size_t> mem_offset;
   size_t zero_memory_size = 0;
   EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
 
@@ -335,7 +460,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) {
   std::string value = "A";
   (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value);
   MemoryAssigner memory_assigner(graph);
-  map<int64_t, size_t> mem_offset;
+  map<uint64_t, size_t> mem_offset;
   size_t zero_memory_size = 0;
   VarManager::Instance(0)->Init(0, 0, 0, 0);
   EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
@@ -356,7 +481,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) {
   std::string value = "M";
   (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value);
   MemoryAssigner memory_assigner(graph);
-  map<int64_t, size_t> mem_offset;
+  map<uint64_t, size_t> mem_offset;
   size_t zero_memory_size = 0;
   VarManager::Instance(0)->Init(0, 0, 0, 0);
   EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
@@ -460,30 +585,86 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace)
 
 TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) {
   ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
-  make_ffts_reuse_graph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId);
+  MakeFftsReuseGraph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId);
   HybridMemAssigner hybridMemAssigner(graph);
   ge::Status ret = hybridMemAssigner.Assign();
-  size_t offset = hybridMemAssigner.GetMemOffset();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
   EXPECT_EQ(offset, 5120);
   EXPECT_EQ(ret, SUCCESS);
 }
 
 TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) {
   ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
-  make_ffts_reuse_graph(graph, 0, 1);
+  MakeFftsReuseGraph(graph, 0, 1);
   HybridMemAssigner hybridMemAssigner(graph);
   ge::Status ret = hybridMemAssigner.Assign();
-  size_t offset = hybridMemAssigner.GetMemOffset();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
   EXPECT_EQ(offset, 6656);
   EXPECT_EQ(ret, SUCCESS);
 }
 
 TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) {
   ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
-  make_ffts_reuse_graph(graph, 0, kInvalidThreadScopeId);
+  MakeFftsReuseGraph(graph, 0, kInvalidThreadScopeId);
   HybridMemAssigner hybridMemAssigner(graph);
   ge::Status ret = hybridMemAssigner.Assign();
-  size_t offset = hybridMemAssigner.GetMemOffset();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
   EXPECT_EQ(offset, 5632);
   EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, one_session_scope_op) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeSessionScopeReuseGraph(graph);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+
+  auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
+  size_t session_scope_offset = 0;
+  it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    session_scope_offset = it->second;
+  }
+  EXPECT_EQ(offset, 5120);
+  EXPECT_EQ(session_scope_offset, 1536);
+  EXPECT_EQ(ret, SUCCESS);
+}
+
+TEST_F(UtestMemoryAssignerTest, multi_batch_reuse) {
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeMultiBatchReuseGraph(graph);
+  HybridMemAssigner hybridMemAssigner(graph);
+  ge::Status ret = hybridMemAssigner.Assign();
+  size_t offset = 0;
+  auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    offset = it->second;
+  }
+
+  auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
+  size_t session_scope_offset = 0;
+  it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope);
+  if (it != hybridMemAssigner.GetMemOffsets().end()) {
+    session_scope_offset = it->second;
+  }
+  EXPECT_EQ(offset, 6656);
+  EXPECT_EQ(session_scope_offset, 1536);
+  EXPECT_EQ(ret, SUCCESS);
 }
\ No newline at end of file
diff --git a/tests/ut/ge/graph/build/model_builder_unittest.cc b/tests/ut/ge/graph/build/model_builder_unittest.cc
index d5efc9bb..628d0fda 100644
--- a/tests/ut/ge/graph/build/model_builder_unittest.cc
+++ b/tests/ut/ge/graph/build/model_builder_unittest.cc
@@ -30,6 +30,7 @@
 #define protected public
 #define private public
 #include "graph/build/model_builder.h"
+#include "memory/memory_assigner.h"
 #undef protected
 #undef private
 
@@ -127,6 +128,41 @@ class UtestModelBuilderTest : public testing::Test {
     graph->TopologicalSorting();
   }
 
+void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) {
+    ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
+    ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
+    ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
+    ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
+    ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
+    ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512);
+    ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);
+
+    std::vector<int64_t> workspace_bytes;
+    workspace_bytes.push_back(1024);
+    workspace_bytes.push_back(512);
+    op_def_c->SetWorkspaceBytes(workspace_bytes);
+    vector<int32_t> workspace_no_reuse_scope = { 0 , 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
+
+    vector<int32_t> workspace_no_reuse_scope_e = { 1 };
+    (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e);
+
+    ge::NodePtr node_a = graph->AddNode(op_def_a);
+    ge::NodePtr node_b = graph->AddNode(op_def_b);
+    ge::NodePtr node_c = graph->AddNode(op_def_c);
+    ge::NodePtr node_d = graph->AddNode(op_def_d);
+    ge::NodePtr node_e = graph->AddNode(op_def_e);
+    ge::NodePtr node_f = graph->AddNode(op_def_f);
+    ge::NodePtr node_g = graph->AddNode(op_def_g);
+
+    ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
+    ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
+    graph->TopologicalSorting();
+  }
 
  protected:
   void SetUp() {}
@@ -162,6 +198,24 @@ TEST_F(UtestModelBuilderTest, test_save_atomic_bin) {
   EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS);
 }
 
+TEST_F(UtestModelBuilderTest, build_model_for_get_task) {
+  Graph2SubGraphInfoList subgraphs;
+  std::map<std::string, int> stream_max_parallel_num;
+  ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
+  MakeSessionScopeReuseGraph(graph);
+  std::map<std::string, std::string> option;
+  ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false);
+
+  MemoryAssigner mem_assigner(graph);
+  EXPECT_EQ(mem_assigner.AssignMemory(false, builder.mem_type_to_mem_offset_, builder.zero_copy_mem_size_), SUCCESS);
+
+  ge::Model model;
+  EXPECT_EQ(builder.BuildModelDef(model), SUCCESS);
+  int64_t session_scope_mem_offset = 0;
+  ge::AttrUtils::GetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset);
+  EXPECT_EQ(session_scope_mem_offset, 1536);
+}
+
 TEST_F(UtestModelBuilderTest, test_model_save) {
   Graph2SubGraphInfoList subgraphs;
   std::map<std::string, int> stream_max_parallel_num;
diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc
index 3969ad9c..19b8aeab 100644
--- a/tests/ut/ge/graph/ge_executor_unittest.cc
+++ b/tests/ut/ge/graph/ge_executor_unittest.cc
@@ -43,6 +43,7 @@
 #include "graph/manager/graph_mem_allocator.h"
 #include "graph/utils/graph_utils.h"
 #include "proto/ge_ir.pb.h"
+#include "graph/manager/graph_var_manager.h"
 #undef private
 #undef protected
 
@@ -194,6 +195,11 @@ TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) {
 }
 
 TEST_F(UtestGeExecutor, execute_graph_with_stream) {
+  VarManager::Instance(0)->Init(0, 0, 0, 0);
+  map<string, string> options;
+  options[GRAPH_MEMORY_MAX_SIZE] = "1048576";
+  VarManager::Instance(0)->SetMemoryMallocSize(options);
+
   DavinciModel model(0, nullptr);
   ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
 
@@ -278,7 +284,6 @@ TEST_F(UtestGeExecutor, execute_graph_with_stream) {
   OutputData output_data;
   vector<Tensor> outputs;
   EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
-  
 
   GraphExecutor graph_executer;
   graph_executer.init_flag_ = true;
diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
index 7863a70f..5833a13a 100644
--- a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
+++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
@@ -28,8 +28,7 @@
 
 #define protected public
 #define private public
-#include "graph/manager/graph_caching_allocator.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #undef protected
 #undef private
 
diff --git a/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc
new file mode 100644
index 00000000..4a336af9
--- /dev/null
+++ b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "graph/anchor.h"
+#include "graph/attr_value.h"
+#include "graph/debug/ge_attr_define.h"
+#include "graph/utils/graph_utils.h"
+#include "graph/utils/node_utils.h"
+#include "graph/utils/op_desc_utils.h"
+#include "graph/utils/tensor_utils.h"
+#include "omg/omg_inner_types.h"
+
+#define protected public
+#define private public
+#include "graph/manager/graph_mem_manager.h"
+#undef protected
+#undef private
+
+using namespace std;
+using namespace testing;
+using namespace ge;
+using domi::GetContext;
+
+class UtestSessionScopeMemAllocator : public testing::Test {
+ protected:
+  void SetUp() {}
+
+  void TearDown() { GetContext().out_nodes_map.clear(); }
+};
+
+TEST_F(UtestSessionScopeMemAllocator, initialize_success) {
+  std::vector<rtMemType_t> mem_type;
+  mem_type.push_back(RT_MEMORY_HBM);
+  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
+  MemManager::Instance().Finalize();
+}
+
+TEST_F(UtestSessionScopeMemAllocator, malloc_success) {
+  std::vector<rtMemType_t> mem_type;
+  mem_type.push_back(RT_MEMORY_HBM);
+  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
+  uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(1000, 0);
+  EXPECT_NE(nullptr, ptr);
+  MemManager::Instance().Finalize();
+}
+
+TEST_F(UtestSessionScopeMemAllocator, free_success) {
+  std::vector<rtMemType_t> mem_type;
+  mem_type.push_back(RT_MEMORY_HBM);
+  EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
+  uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
+  EXPECT_NE(nullptr, ptr);
+  ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
+  EXPECT_NE(nullptr, ptr);
+
+  EXPECT_EQ(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0));
+  EXPECT_NE(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0));
+  MemManager::Instance().Finalize();
+}
diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
index d6af6de9..f1ea7a27 100644
--- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc
@@ -35,7 +35,7 @@
 #include "graph/manager/graph_context.h"
 #include "graph/optimize/graph_optimize.h"
 #include "graph/manager/util/variable_accelerate_ctrl.h"
-#include "graph/manager/graph_mem_allocator.h"
+#include "graph/manager/graph_mem_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph_builder_utils.h"
 #include "cce/dnn.h"