From aa579530fb4d3f43f2919839484f5107da8d14d5 Mon Sep 17 00:00:00 2001 From: TangQunzhang Date: Fri, 14 May 2021 09:31:05 +0800 Subject: [PATCH] Support session scope memory --- ge/CMakeLists.txt | 4 + ge/executor/CMakeLists.txt | 2 + ge/executor/ge_executor.cc | 2 +- ge/graph/build/memory/block_mem_assigner.cc | 101 +++++--- ge/graph/build/memory/block_mem_assigner.h | 21 +- ge/graph/build/memory/graph_mem_assigner.cc | 26 +- ge/graph/build/memory/graph_mem_assigner.h | 4 +- ge/graph/build/memory/hybrid_mem_assigner.cc | 10 +- ge/graph/build/memory/hybrid_mem_assigner.h | 6 +- ge/graph/build/memory/memory_assigner.cc | 2 +- ge/graph/build/model_builder.cc | 17 +- ge/graph/build/model_builder.h | 2 +- ge/graph/load/model_manager/davinci_model.cc | 170 +++++++------ ge/graph/load/model_manager/davinci_model.h | 8 +- ge/graph/load/model_manager/model_utils.cc | 19 +- .../load/model_manager/task_info/task_info.h | 20 +- ge/graph/manager/graph_caching_allocator.cc | 4 +- ge/graph/manager/graph_caching_allocator.h | 2 +- ge/graph/manager/graph_mem_allocator.cc | 116 +-------- ge/graph/manager/graph_mem_allocator.h | 108 +------- ge/graph/manager/graph_mem_manager.cc | 114 +++++++++ ge/graph/manager/graph_mem_manager.h | 141 +++++++++++ ge/graph/manager/graph_var_manager.cc | 11 +- ge/graph/manager/memory_api.cc | 2 +- ge/graph/manager/rdma_pool_allocator.cc | 3 +- .../manager/session_scope_mem_allocator.cc | 85 +++++++ .../manager/session_scope_mem_allocator.h | 123 +++++++++ ge/hybrid/common/npu_memory_allocator.cc | 5 +- ge/hybrid/model/hybrid_model_builder.cc | 3 +- .../host_cpu/host_cpu_node_executor.cc | 3 +- ge/init/gelib.cc | 2 +- ge/session/inner_session.cc | 6 + ge/single_op/single_op_manager.cc | 3 +- inc/framework/memory/memory_assigner.h | 2 +- tests/ut/ge/CMakeLists.txt | 6 + .../ge/graph/build/mem_assigner_unittest.cc | 235 ++++++++++++++++-- .../ge/graph/build/model_builder_unittest.cc | 54 ++++ tests/ut/ge/graph/ge_executor_unittest.cc | 7 +- .../graph_caching_allocator_unittest.cc | 3 +- .../session_scope_mem_allocator_unittest.cc | 75 ++++++ .../graph/passes/variable_op_pass_unittest.cc | 2 +- 41 files changed, 1102 insertions(+), 427 deletions(-) create mode 100644 ge/graph/manager/graph_mem_manager.cc create mode 100644 ge/graph/manager/graph_mem_manager.h create mode 100644 ge/graph/manager/session_scope_mem_allocator.cc create mode 100644 ge/graph/manager/session_scope_mem_allocator.h create mode 100644 tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 6ff9f5d9..cc777f31 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -173,10 +173,12 @@ set(TRAIN_SRC_LIST "graph/manager/graph_manager_utils.cc" "graph/manager/graph_mem_allocator.cc" "graph/manager/graph_caching_allocator.cc" + "graph/manager/session_scope_mem_allocator.cc" "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" "graph/manager/host_mem_allocator.cc" + "graph/manager/graph_mem_manager.cc" "graph/manager/memory_api.cc" "graph/manager/model_manager/event_manager.cc" "graph/manager/trans_var_data_utils.cc" @@ -478,6 +480,8 @@ set(INFER_SRC_LIST "graph/manager/host_mem_allocator.cc" "graph/manager/graph_mem_allocator.cc" "graph/manager/graph_caching_allocator.cc" + "graph/manager/session_scope_mem_allocator.cc" + "graph/manager/graph_mem_manager.cc" "model/ge_model.cc" "model/ge_root_model.cc" "graph/common/transop_util.cc" diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 856e7cf1..820518ad 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -28,6 +28,8 @@ set(SRC_LIST "../graph/manager/graph_var_manager.cc" "../graph/manager/graph_mem_allocator.cc" "../graph/manager/graph_caching_allocator.cc" + "../graph/manager/session_scope_mem_allocator.cc" + "../graph/manager/graph_mem_manager.cc" "../graph/manager/trans_var_data_utils.cc" "../graph/manager/util/debug.cc" "../graph/manager/rdma_pool_allocator.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 6d3114f4..e66dcb58 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -26,7 +26,7 @@ #include "graph/execute/graph_execute.h" #include "graph/load/graph_loader.h" #include "graph/load/model_manager/model_manager.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "single_op/single_op_manager.h" #include "graph/load/model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 2756c6fe..9b81eae3 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -500,6 +500,7 @@ string MemoryBlock::String() { ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " "; ss << "real_size_list: " << ToString(real_size_list_) << " "; ss << "ref_count: " << ref_count_ << " "; + ss << "reuse_mem_: " << reuse_mem_ << " "; ss << "members: "; for (auto x : NodeTypeIndexList()) { ss << "__node: " << ToString(x) << " "; @@ -513,8 +514,8 @@ string MemoryBlock::String() { BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map &anchor_to_symbol, const map> &symbol_to_anchors) - : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), - symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} + : compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors), + anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} BlockMemAssigner::~BlockMemAssigner() { GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size()); @@ -1123,7 +1124,7 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) { MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, const vector &workspace_reuse_flag, const bool is_op_reuse_mem, - const bool continuous, int64_t memory_type) { + const bool continuous, uint64_t memory_type) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( n == nullptr, REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); @@ -1824,8 +1825,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { zero_memory_list_.emplace_back(n, kWorkspace, static_cast(i), false); continue; } - int64_t memory_type = RT_MEMORY_HBM; - if (!GetWorkSpaceMemoryType(n, i, memory_type)) { + uint64_t memory_type = RT_MEMORY_HBM; + if (!GetWorkSpaceMemoryType(n, i, memory_type, workspace_reuse_flag)) { GELOGW("Get workspace memory type failed."); return; } @@ -1860,7 +1861,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { } void BlockMemAssigner::CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block, int64_t memory_type) { + MemoryBlock *mem_block, uint64_t memory_type) { bool reuse_mem_flag = ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; if (reuse_mem_flag) { @@ -1992,24 +1993,29 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { } } -void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) { - if (block.memory_type_ == RT_MEMORY_HBM) { - if (block.first_continuous_block_) { - mem_offset += MEM_ALIGN_SIZE; - } - block.Resize(); - block.SetHeadOffset(mem_offset); - mem_offset += block.Size(); - block.SetTailOffset(mem_offset - 1); - } else if (block.memory_type_ == RT_MEMORY_P2P_DDR) { - if (block.first_continuous_block_) { - p2p_mem_offset += MEM_ALIGN_SIZE; +void AddBlockMemOffset(std::map &mem_offsets, MemoryBlock &block) { + auto it = mem_offsets.find(block.memory_type_); + if (it == mem_offsets.end()) { + auto result = mem_offsets.insert(std::pair(block.memory_type_, 0)); + // Insert failure is unlikely + if (!result.second) { + return; } - block.Resize(); - block.SetHeadOffset(p2p_mem_offset); - p2p_mem_offset += block.Size(); - block.SetTailOffset(p2p_mem_offset - 1); + it = result.first; + } + + if (it == mem_offsets.end()) { + return; + } + + auto &mem_offset = it->second; + if (block.first_continuous_block_) { + mem_offset += MEM_ALIGN_SIZE; } + block.Resize(); + block.SetHeadOffset(mem_offset); + mem_offset += block.Size(); + block.SetTailOffset(mem_offset - 1); } bool DynamicBatchBlockReuse(MemoryBlock &block) { @@ -2036,27 +2042,27 @@ void BlockMemAssigner::ResizeDynamicBatchBlocks() { } } - size_t max_mem_offset = mem_offset_; - size_t max_p2p_mem_offset = p2p_mem_offset_; + std::map max_mem_offsets = mem_offsets_; for (auto &batch_blocks : dynamic_batch_blocks) { - size_t mem_offset = mem_offset_; - size_t p2p_mem_offset = p2p_mem_offset_; + std::map mem_offsets = mem_offsets_; for (auto block : batch_blocks.second) { if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) { continue; } - AddBlockMemOffset(mem_offset, p2p_mem_offset, *block); + AddBlockMemOffset(mem_offsets, *block); } - if (mem_offset > max_mem_offset) { - max_mem_offset = mem_offset; - } - if (p2p_mem_offset > max_p2p_mem_offset) { - max_p2p_mem_offset = p2p_mem_offset; + + for (auto &it : mem_offsets) { + auto itmax = max_mem_offsets.find(it.first); + if (itmax == max_mem_offsets.end()) { + max_mem_offsets[it.first] = it.second; + } else if (it.second > itmax->second) { + itmax->second = it.second; + } + GELOGI("Batch:%s memory type:%ld offset:%zu", batch_blocks.first.c_str(), it.first, it.second); } - GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset); } - mem_offset_ = max_mem_offset; - p2p_mem_offset_ = max_p2p_mem_offset; + mem_offsets_ = max_mem_offsets; } /// @@ -2074,11 +2080,13 @@ void BlockMemAssigner::ResizeMemoryBlocks() { continue; } - AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block); + AddBlockMemOffset(mem_offsets_, *memory_block); } ResizeDynamicBatchBlocks(); - GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu," - "theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_); + for (auto it : mem_offsets_) { + GELOGI("Memory type:%ld mem_offset exclude zero_copy_memory:%zu, theory_min_memory_size:%zu", it.first, it.second, + theory_min_memory_size_); + } } /// @@ -2217,7 +2225,8 @@ bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { (node_type == CONSTANTOP) || (node_type == HVDWAIT); } -bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { +bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type, + vector &workspace_reuse_flag) { memory_type = RT_MEMORY_HBM; vector workspace_memory_type; auto op_desc = node->GetOpDesc(); @@ -2233,6 +2242,20 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, return false; } memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; + + vector workspace_no_reuse_scope; + bool has_workspace_no_reuse_scope = + ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + if (has_workspace_no_reuse_scope && (index < workspace_no_reuse_scope.size()) + && (workspace_no_reuse_scope[index] == kSessionNoReuse)) { + memory_type |= kSessionScopeMemory; + if (workspace_reuse_flag.empty()) { + workspace_reuse_flag.assign(workspace_no_reuse_scope.size(), true); + } + // set to no reuse + workspace_reuse_flag[index] = false; + GELOGI("%s's workspace is session scope no reuse, memory type:%lu.", node->GetName().c_str(), memory_type); + } return true; } } // namespace ge diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 3b1e0d31..231cce09 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -34,6 +34,10 @@ namespace ge { const size_t kMaxLifeTime = 0xffffffff; const int32_t kInvalidThreadScopeId = -1; +const uint64_t kSessionScopeMemory = 0x100000000; +const uint64_t kMemoryTypeMask = 0xffffffff; + +enum MemoryNoReuseScope { kReuse, kSessionNoReuse, kGraphNoReuse }; using DependStreamLife = std::map>; @@ -224,9 +228,7 @@ class BlockMemAssigner : public MemAssigner { Status Assign() override; - size_t GetMemOffset() const { return mem_offset_; } - - size_t GetP2PMemOffset() const { return p2p_mem_offset_; } + const std::map &GetMemOffsets() const { return mem_offsets_; } int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; } @@ -329,14 +331,10 @@ class BlockMemAssigner : public MemAssigner { /// void UpdateOpTensorMemType(std::list node_index_io_list, int64_t memory_type); - size_t mem_offset_; - size_t p2p_mem_offset_; - + std::map mem_offsets_; ge::ComputeGraphPtr compute_graph_; - std::vector memory_blocks_; std::vector blocks_store_; - std::vector zero_memory_list_; // ref mapping @@ -380,7 +378,7 @@ class BlockMemAssigner : public MemAssigner { /// MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, const ge::NodePtr &n, uint32_t out_index, const std::vector &workspace_reuse_flag, - const bool is_op_reuse_mem, const bool continuous, int64_t memory_type); + const bool is_op_reuse_mem, const bool continuous, uint64_t memory_type); /// /// @ingroup GE @@ -394,7 +392,7 @@ class BlockMemAssigner : public MemAssigner { /// @author /// void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block, int64_t memory_type); + MemoryBlock *mem_block, uint64_t memory_type); /// /// @ingroup GE @@ -457,7 +455,8 @@ class BlockMemAssigner : public MemAssigner { bool IsContinuousOutput(const NodePtr &n); - bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); + bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type, + vector &workspace_reuse_flag); void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index a45fb239..8becd90e 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -107,11 +107,22 @@ Status GraphMemoryAssigner::AssignMemory() { compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); return ge::FAILED; } - MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); - memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); - if (mem_assigner->GetP2PMemOffset() >= 0) { - MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); + for (auto pair : mem_assigner->GetMemOffsets()) { + MemoryOffset offset(pair.first, pair.second); + memory_offset_.emplace(pair.first, offset); + } + + // base memtype offset must be exist + auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM); + if (it == mem_assigner->GetMemOffsets().end()) { + MemoryOffset memory_offset(RT_MEMORY_HBM, 0); + memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); + } + + it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR); + if (it == mem_assigner->GetMemOffsets().end()) { + MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0); memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); } @@ -224,7 +235,7 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out return SUCCESS; } -Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { +Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); @@ -264,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_mem_copy_size) { +Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size) { BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); if (priority_assigner == nullptr) { REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s", @@ -1398,6 +1409,9 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); } for (auto pair : memory_offset_) { + if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) { + continue; + } GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), pair.second.mem_offset_, pair.first); } diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index 0d9b03e5..a6a2a686 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -103,9 +103,9 @@ class GraphMemoryAssigner { ge::Status AssignMemory2HasRefAttrNode(); - ge::Status ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset); + ge::Status ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset); - ge::Status AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size); + ge::Status AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size); ge::Status SetInputOffset(); diff --git a/ge/graph/build/memory/hybrid_mem_assigner.cc b/ge/graph/build/memory/hybrid_mem_assigner.cc index eff821bf..ccf673b3 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.cc +++ b/ge/graph/build/memory/hybrid_mem_assigner.cc @@ -23,7 +23,7 @@ namespace ge { HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph) - : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} + : compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} Status HybridMemAssigner::AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size) { vector ranges; @@ -36,7 +36,10 @@ Status HybridMemAssigner::AssignMemory(std::unique_ptr &block_ block_assigner->AssignMemoryWithReuse(ranges); - mem_size = block_assigner->GetMemOffset(); + // total size + for (auto it : block_assigner->GetMemOffsets()) { + mem_size += it.second; + } return SUCCESS; } @@ -73,8 +76,7 @@ Status HybridMemAssigner::Assign() { } priority_assigner->SetOpMemOffset(false); - mem_offset_ = priority_assigner->GetMemOffset(); - p2p_mem_offset_ = priority_assigner->GetP2PMemOffset(); + mem_offsets_ = priority_assigner->GetMemOffsets(); priority_assigner_ = std::move(priority_assigner); return SUCCESS; diff --git a/ge/graph/build/memory/hybrid_mem_assigner.h b/ge/graph/build/memory/hybrid_mem_assigner.h index 7baece44..2bdfd5c5 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.h +++ b/ge/graph/build/memory/hybrid_mem_assigner.h @@ -42,16 +42,14 @@ class HybridMemAssigner : public MemAssigner { Status Assign() override; - size_t GetMemOffset() const { return mem_offset_; } - size_t GetP2PMemOffset() const { return p2p_mem_offset_; } + const std::map &GetMemOffsets() const { return mem_offsets_; } BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; } private: Status AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size); - size_t mem_offset_; - size_t p2p_mem_offset_; + std::map mem_offsets_; ge::ComputeGraphPtr compute_graph_; diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc index 34b97c60..570aae07 100755 --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -20,7 +20,7 @@ #include "graph/build/memory/graph_mem_assigner.h" namespace ge { -Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size) { +Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size) { GraphMemoryAssigner graph_mem_assigner(compute_graph_); if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) { diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 0e625990..ce2f57f9 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -47,6 +47,7 @@ #include "omg/version.h" #include "register/op_registry.h" #include "graph/passes/set_input_output_offset_pass.h" +#include "graph/build/memory/block_mem_assigner.h" using std::map; using std::set; @@ -398,9 +399,21 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); return FAILED); + auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); + size_t session_scope_mem_offset = 0; + auto it = mem_type_to_mem_offset_.find(mem_type_session_scope); + if (it != mem_type_to_mem_offset_.end()) { + session_scope_mem_offset = it->second; + } if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) { p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR]; } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset), + REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", + ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE.c_str()); + GELOGE(FAILED, "SetInt of ATTR_NAME_SESSION_SCOPE_MEMORY_SIZE failed."); + return FAILED); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_), REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); @@ -434,8 +447,8 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_OUT_NODES_NAME.c_str()); GELOGE(FAILED, "[Set][Str] %s in model failed.", ATTR_MODEL_OUT_NODES_NAME.c_str()); return FAILED); - GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, - p2p_mem_offset_, zero_copy_mem_size_); + GELOGI("For model, max_mem_offset: %zu, p2p_mem_size: %zu, zero_copy_mem_size: %zu, session_scope_mem_size: %zu", + max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_, session_scope_mem_offset); string fp_ceiling_mode; if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) { if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index 67def859..6f097329 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -93,7 +93,7 @@ class ModelBuilder { uint64_t session_id_; - map mem_type_to_mem_offset_; + map mem_type_to_mem_offset_; size_t weight_offset_; diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 5d90d5a1..b52796c8 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -40,7 +40,7 @@ #include "graph/load/model_manager/cpu_queue_schedule.h" #include "graph/load/model_manager/model_manager.h" #include "graph/load/model_manager/tbe_handle_store.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/manager/util/debug.h" @@ -60,6 +60,8 @@ #include "graph/common/local_context.h" #include "common/formats/utils/formats_trans_utils.h" #include "graph/common/omg_util.h" +#include "graph/build/memory/block_mem_assigner.h" +#include "graph/manager/session_scope_mem_allocator.h" // create std::thread, catch exceptions using try/catch #define CREATE_STD_THREAD(thread_id, func, args) \ @@ -168,7 +170,6 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr(dev_ptr); - p2p_mem_base_ = static_cast(dev_ptr); is_inner_mem_base_ = false; if (TotalMemSize() && mem_base_ == nullptr) { @@ -422,24 +421,13 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { is_inner_mem_base_ = true; } - if (p2p_data_size != 0) { - p2p_mem_base_ = MallocP2PMem(p2p_data_size); - if (p2p_mem_base_ == nullptr) { - REPORT_CALL_ERROR("E19999", "MallocFeatureMapMem fail, p2p_data_size:%zu, model_id:%u, check invalid", - p2p_data_size, model_id_); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][Memory] for p2p failed, size:%zu, model_id:%u", - p2p_data_size, model_id_); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } - GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - p2p_mem_base_, p2p_data_size); - is_inner_p2p_mem_base_ = true; + if (!runtime_param_.memory_infos.empty()) { + GE_CHK_STATUS_RET(MallocExMem(), "MallocExMem failed."); } GE_CHK_STATUS_RET(InitVariableMem(), "[Init][VariableMemory] failed, model_id:%u", model_id_); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; - runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; return SUCCESS; } @@ -465,7 +453,6 @@ Status DavinciModel::InitVariableMem() { void DavinciModel::InitRuntimeParams() { int64_t value = 0; bool ret; - MemInfo p2p_mem_info; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value); runtime_param_.mem_size = ret ? (uint64_t)value : 0; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value); @@ -490,16 +477,18 @@ void DavinciModel::InitRuntimeParams() { runtime_param_.var_size = ret ? (uint64_t)value : 0; session_id_ = runtime_param_.session_id; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value); - p2p_mem_info.memory_size = ret ? (uint64_t)value : 0; + MemInfo p2p_mem_info; + p2p_mem_info.memory_size = static_cast(ret ? value : 0); + p2p_mem_info.memory_type = RT_MEMORY_P2P_DDR; + p2p_mem_info.memory_key = "_p"; runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info); - GELOGI( - "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, " - "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, " - "memory_size:%lu, weight_size:%lu, var_size:%lu", - runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num, - runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base, - runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); + ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, value); + MemInfo session_scope_mem_info; + session_scope_mem_info.memory_size = static_cast(ret ? value : 0); + runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info); + + GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str()); } void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { @@ -4089,14 +4078,15 @@ Status DavinciModel::InitEntryTask() { uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { uint8_t *mem_base = nullptr; const string purpose("feature map,used for op input and output."); - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK) { data_size = static_cast(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()); string memory_key = std::to_string(0) + "_f"; - mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId()); + mem_base = + MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, data_size, GetDeviceId()); } else { - mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, data_size, GetDeviceId()); + mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, data_size, GetDeviceId()); } if (mem_base != nullptr) { @@ -4105,83 +4095,119 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { return mem_base; } -uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) { - uint8_t *p2p_mem_base = nullptr; - const string purpose("p2p memory, used for some op related to hcom"); - if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { - string p2p_memory_key = std::to_string(0) + "_p"; - p2p_mem_base = - MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId()); - } else { - p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId()); +Status DavinciModel::MallocExMem() { + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; + INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); + for (auto it : runtime_param_.memory_infos) { + auto mem_size = it.second.memory_size; + if (mem_size == 0) { + continue; + } + bool sessoion_scope = ((kSessionScopeMemory & it.first) == kSessionScopeMemory); + auto mem_type = it.first & kMemoryTypeMask; + uint8_t *mem_base = nullptr; + const string purpose("p2p memory, used for some op related to hcom or session scope memory"); + if (sessoion_scope) { + mem_base = MemManager::Instance().SessionScopeMemInstance(mem_type).Malloc(mem_size, runtime_param_.session_id); + } else if (res_static_memory == EN_OK) { + string memory_key = std::to_string(0) + it.second.memory_key; + mem_base = + MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, memory_key, mem_size, GetDeviceId()); + } else { + mem_base = MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, mem_size, GetDeviceId()); + } + + if (mem_base == nullptr) { + REPORT_CALL_ERROR("E19999", "MallocExMem fail, type:%ld size:%zu, model_id:%u, check invalid", + mem_type, mem_size, model_id_); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc ex memory failed, type:%ld size: %zu", mem_type, mem_size); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + it.second.memory_base = mem_base; + GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] mem_type[%ld] mem_addr[%p] mem_size[%zu]", + runtime_param_.graph_id, mem_type, mem_base, mem_size); } - return p2p_mem_base; + return SUCCESS; } uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { uint8_t *weights_mem_base = nullptr; const string purpose("weights memory in inference network."); - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK) { string weight_memory_key = std::to_string(0) + "_w"; - weights_mem_base = - MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); + weights_mem_base = MemManager::Instance() + .MemInstance(RT_MEMORY_HBM) + .MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); } else { - weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId()); + weights_mem_base = + MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, weights_size, GetDeviceId()); } return weights_mem_base; } void DavinciModel::FreeFeatureMapMem() { - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK && is_inner_mem_base_) { string weight_memory_key = std::to_string(0) + "_f"; - if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); + if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(weight_memory_key) != nullptr) { + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weight_memory_key, GetDeviceId()), + "failed to free weight memory"); } mem_base_ = nullptr; } else { - GE_IF_BOOL_EXEC(mem_base_ != nullptr && is_inner_mem_base_, - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(mem_base_, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); - mem_base_ = nullptr); + GE_IF_BOOL_EXEC( + mem_base_ != nullptr && is_inner_mem_base_, + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(mem_base_, GetDeviceId()), + "failed to free feature_map memory"); + mem_base_ = nullptr); } } -void DavinciModel::FreeP2PMem() { - if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { - std::string p2p_memory_key = std::to_string(0) + "_p"; - if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) { - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); +void DavinciModel::FreeExMem() { + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; + INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); + for (auto it : runtime_param_.memory_infos) { + // free when session destory + if ((kSessionScopeMemory & it.first) == kSessionScopeMemory) { + continue; + } + auto mem_type = it.first & kMemoryTypeMask; + if (res_static_memory == EN_OK) { + std::string memory_key = std::to_string(0) + it.second.memory_key; + if (MemManager::Instance().MemInstance(mem_type).GetMemoryAddr(memory_key) != nullptr) { + GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(memory_key, GetDeviceId()), + "failed to free memory"); + } + it.second.memory_base = nullptr; + } else { + GE_IF_BOOL_EXEC( + it.second.memory_base != nullptr, + GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(it.second.memory_base, GetDeviceId()), + "failed to free memory"); + it.second.memory_base = nullptr); } - p2p_mem_base_ = nullptr; - } else { - GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_, - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); - p2p_mem_base_ = nullptr); } } void DavinciModel::FreeWeightsMem() { - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK) { string memory_key = std::to_string(0) + "_w"; - if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) { - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); + if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(memory_key) != nullptr) { + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key, GetDeviceId()), + "failed to free feature_map memory"); } weights_mem_base_ = nullptr; } else { - GE_IF_BOOL_EXEC(weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_, - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weights_mem_base_, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); - weights_mem_base_ = nullptr); + GE_IF_BOOL_EXEC( + weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_, + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weights_mem_base_, GetDeviceId()), + "failed to free weight memory"); + weights_mem_base_ = nullptr); } } diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index a4abcae6..e4898dec 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -248,8 +248,6 @@ class DavinciModel { // get total mem size size_t TotalMemSize() const { return runtime_param_.mem_size; } - const map &P2PMemInfos() const { return runtime_param_.memory_infos; } - // model name string Name() const { return name_; } @@ -586,10 +584,8 @@ class DavinciModel { // memory address of model uintptr_t fixed_mem_base_; // Initial of mem_base_, keep forever. uint8_t *mem_base_; - uint8_t *p2p_mem_base_; bool is_inner_mem_base_; bool is_inner_weight_base_; - bool is_inner_p2p_mem_base_; // input data manager DataInputer *data_inputer_; int64_t load_begin_time_; @@ -668,13 +664,13 @@ class DavinciModel { uint8_t *MallocWeightsMem(size_t weights_size); - uint8_t *MallocP2PMem(size_t p2p_data_size); + Status MallocExMem(); void FreeFeatureMapMem(); void FreeWeightsMem(); - void FreeP2PMem(); + void FreeExMem(); void ReleaseTask(); diff --git a/ge/graph/load/model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc index 058a538f..f6ff591a 100755 --- a/ge/graph/load/model_manager/model_utils.cc +++ b/ge/graph/load/model_manager/model_utils.cc @@ -21,6 +21,7 @@ #include "graph/utils/tensor_utils.h" #include "graph/manager/graph_var_manager.h" #include "graph/types.h" +#include "graph/build/memory/block_mem_assigner.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ do { \ @@ -514,10 +515,16 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); bool has_mem_type_workspace = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type); + + vector workspace_no_reuse_scope; + bool has_workspace_no_reuse_scope = + ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { // Temporary solution, the aicpu workspace of multiple images cannot be shared. - if (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] && - !model_param.is_single_op) { + bool aicpu_work_space = (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] && + !model_param.is_single_op); + if (aicpu_work_space) { void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]); v_workspace_data_addr.push_back(mem_addr); GELOGI( @@ -548,7 +555,13 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); - uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i]; + uint8_t *mem_addr = nullptr; + bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size()); + if (session_scope_memory) { + mem_addr = model_param.memory_infos.at(kSessionScopeMemory | RT_MEMORY_HBM).memory_base + v_workspace_offset[i]; + } else { + mem_addr = model_param.mem_base + v_workspace_offset[i]; + } v_workspace_data_addr.push_back(mem_addr); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], diff --git a/ge/graph/load/model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h index 99ec3c4e..5657f003 100644 --- a/ge/graph/load/model_manager/task_info/task_info.h +++ b/ge/graph/load/model_manager/task_info/task_info.h @@ -18,6 +18,7 @@ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_H_ #include +#include #include "cce/customize.h" #include "framework/common/taskdown_common.h" @@ -28,9 +29,11 @@ namespace ge { struct MemInfo { - uint64_t memory_size = 0; + size_t memory_size = 0; uint64_t logic_memory_base = 0; uint8_t *memory_base = nullptr; + uint32_t memory_type = RT_MEMORY_HBM; + std::string memory_key = ""; }; struct RuntimeParam { @@ -40,6 +43,19 @@ struct RuntimeParam { } ~RuntimeParam() = default; + std::string ToString() { + std::stringstream ss; + ss << "session_id:" << session_id << ", stream_num:" << stream_num << ", event_num:" << event_num + << ", label_num:" << label_num << ", logic_mem_base:" << logic_mem_base + << ", logic_weight_base:" << logic_weight_base << ", logic_var_base:" << logic_var_base + << ", memory_size:" << mem_size << ", weight_size:" << weight_size << ", var_size:" << var_size + << ", ex_memory_info:"; + for (auto it : memory_infos) { + ss << "[memory_type:" << it.first << ", memory_size:" << it.second.memory_size << "]"; + } + return ss.str(); + } + uint64_t mem_size = 0; uint64_t logic_mem_base = 0; uint8_t *mem_base = nullptr; @@ -49,7 +65,7 @@ struct RuntimeParam { uint64_t var_size = 0; uint64_t logic_var_base = 0; uint8_t *var_base = nullptr; - std::map memory_infos; + std::map memory_infos; uint32_t batch_num = 0; uint32_t stream_num = 0; uint32_t event_num = 0; diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index 8c8df326..75aa5c01 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -21,7 +21,7 @@ #include #include "framework/common/debug/ge_log.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, @@ -117,7 +117,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) { } free_block_bins_[i] = bin_ptr; } - memory_allocator_ = MemManager::Instance(memory_type_); + memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); if (memory_allocator_ == nullptr) { return ACL_ERROR_GE_INTERNAL_ERROR; } diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index a9c3202a..2db00ff2 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -88,8 +88,8 @@ class CachingAllocator { /// /// @ingroup ge_graph /// @brief free memory + /// @param [in] memory_ptr memory address ptr /// @param [in] device_id device id - /// @param [out] memory_ptr memory address ptr /// @return Status result of function /// Status Free(uint8_t *memory_addr, uint32_t device_id = 0); diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index 24e75356..0cccaf99 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -17,11 +17,9 @@ #include "graph/manager/graph_mem_allocator.h" #include -#include "graph/manager/graph_caching_allocator.h" -#include "graph/manager/rdma_pool_allocator.h" -#include "graph/manager/host_mem_allocator.h" + namespace ge { -void MemoryAllocator::Initialize(uint32_t device_id) { +Status MemoryAllocator::Initialize(uint32_t device_id) { GELOGI("MemoryAllocator::Initialize"); // when redo Initialize free memory @@ -31,6 +29,7 @@ void MemoryAllocator::Initialize(uint32_t device_id) { } } memory_base_map_.clear(); + return SUCCESS; } void MemoryAllocator::Finalize(uint32_t device_id) { @@ -152,113 +151,4 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic return it->second.memory_addr_; } - -MemManager::MemManager() {} - -MemManager::~MemManager() { Finalize(); } - -MemManager &MemManager::Instance() { - static MemManager mem_manager; - return mem_manager; -} - -MemoryAllocator *MemManager::Instance(rtMemType_t memory_type) { return Instance().GetMemoryAllocator(memory_type); } - -Status MemManager::Initialize(const std::vector &memory_type) { - std::lock_guard lock(allocator_mutex_); - MemoryAllocator *memory_allocator = nullptr; - for (unsigned int index : memory_type) { - auto it = memory_allocator_map_.find(index); - if (it == memory_allocator_map_.end()) { - memory_allocator = new (std::nothrow) MemoryAllocator(index); - - if (memory_allocator != nullptr) { - memory_allocator_map_[index] = memory_allocator; - GELOGI("Create MemoryAllocator memory type[%u] success.", index); - } else { - REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed."); - } - } else { - memory_allocator = it->second; - } - - if (memory_allocator == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed."); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } else { - memory_allocator->Initialize(0); - } - } - - auto ret = InitAllocator(memory_type, caching_allocator_map_); - if (ret != SUCCESS) { - GELOGE(ret, "Create CachingAllocator failed."); - return ret; - } - - ret = InitAllocator(memory_type, rdma_allocator_map_); - if (ret != SUCCESS) { - GELOGE(ret, "Create RdmaAllocator failed."); - return ret; - } - - ret = InitAllocator(memory_type, host_allocator_map_); - if (ret != SUCCESS) { - GELOGE(ret, "Create HostMemAllocator failed."); - return ret; - } - return SUCCESS; -} - -template -void FinalizeAllocatorMap(std::map &allocate_map) { - for (auto &allocator : allocate_map) { - if (allocator.second != nullptr) { - allocator.second->Finalize(); - delete allocator.second; - allocator.second = nullptr; - } - } - allocate_map.clear(); -} - -void MemManager::Finalize() noexcept { - GELOGI("Finalize."); - std::lock_guard lock(allocator_mutex_); - // caching and rdma allocator use memory allocator, so finalize them first - FinalizeAllocatorMap(caching_allocator_map_); - FinalizeAllocatorMap(rdma_allocator_map_); - FinalizeAllocatorMap(host_allocator_map_); - FinalizeAllocatorMap(memory_allocator_map_); -} - -MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) { - std::lock_guard lock(allocator_mutex_); - MemoryAllocator *memory_allocator = nullptr; - auto it = memory_allocator_map_.find(memory_type); - if (it != memory_allocator_map_.end()) { - memory_allocator = it->second; - } - - // Usually impossible - if (memory_allocator == nullptr) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); - static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED); - return &default_memory_allocator; - } - - return memory_allocator; -} - -CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { - return Instance().GetAllocator(memory_type, caching_allocator_map_); -} - -RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { - return Instance().GetAllocator(memory_type, rdma_allocator_map_); -} -HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { - return Instance().GetAllocator(memory_type, host_allocator_map_); -} } // namespace ge diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index 9f8b86b2..b6d73f0a 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -26,7 +26,6 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/manager/host_mem_allocator.h" #include "graph/node.h" #include "runtime/mem.h" @@ -71,9 +70,9 @@ class MemoryAllocator { /// @ingroup ge_graph /// @brief memory allocator init /// @param [in] options user config params - /// @return void + /// @return Status of init /// - void Initialize(uint32_t device_id = 0); + Status Initialize(uint32_t device_id = 0); /// /// @ingroup ge_graph @@ -136,109 +135,6 @@ class MemoryAllocator { bool mem_malloced_; map memory_base_map_; }; - -using MemoryAllocatorPtr = std::shared_ptr; -class CachingAllocator; -class RdmaPoolAllocator; -class MemManager { - public: - MemManager(); - virtual ~MemManager(); - static MemManager &Instance(); - static MemoryAllocator *Instance(rtMemType_t memory_type); - CachingAllocator &CachingInstance(rtMemType_t memory_type); - RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); - HostMemAllocator &HostMemInstance(rtMemType_t memory_type); - MemManager(const MemManager &) = delete; - MemManager &operator=(const MemManager &) = delete; - /// - /// @ingroup ge_graph - /// @brief memory allocator manager init - /// @param [in] options user config params - /// @return Status result of function - /// - Status Initialize(const std::vector &memory_type); - - /// - /// @ingroup ge_graph - /// @brief memory allocator finalize - /// @return void - /// - void Finalize() noexcept; - - private: - /// - /// @ingroup ge_graph - /// @brief ge memory allocator - /// @param [in] memory_type memory type - /// @return MemoryAllocator ptr - /// - MemoryAllocator *GetMemoryAllocator(rtMemType_t memory_type); - - /// - /// @ingroup ge_graph - /// @param [in] memory_type memory type - /// @param [in] allocate_map memory allocator map - /// @return Status result of function - /// - template - Status InitAllocator(const std::vector &memory_type, std::map &allocate_map) { - T *allocator = nullptr; - for (unsigned int index : memory_type) { - auto it = allocate_map.find(index); - if (it == allocate_map.end()) { - allocator = new (std::nothrow) T(index); - if (allocator != nullptr) { - allocate_map[index] = allocator; - GELOGI("Create Allocator memory type[%u] success.", index); - } else { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); - } - } else { - allocator = it->second; - } - - if (allocator == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } else { - if (allocator->Initialize() != SUCCESS) { - return ACL_ERROR_GE_INTERNAL_ERROR; - } - } - } - return SUCCESS; - } - /// - /// @ingroup ge_graph - /// @param [in] memory_type memory type - /// @param [in] allocate_map memory allocator map - /// @return Allocator ptr - /// - template - T &GetAllocator(rtMemType_t memory_type, std::map allocate_map) { - std::lock_guard lock(allocator_mutex_); - T *allocator = nullptr; - auto it = allocate_map.find(memory_type); - if (it != allocate_map.end()) { - allocator = it->second; - } - - // Usually impossible - if (allocator == nullptr) { - GELOGW("Get allocator failed, memory type is %u.", memory_type); - static T default_allocator(RT_MEMORY_RESERVED); - return default_allocator; - } - return *allocator; - } - - std::map memory_allocator_map_; - std::map caching_allocator_map_; - std::map rdma_allocator_map_; - std::map host_allocator_map_; - std::recursive_mutex allocator_mutex_; -}; } // namespace ge #endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ diff --git a/ge/graph/manager/graph_mem_manager.cc b/ge/graph/manager/graph_mem_manager.cc new file mode 100644 index 00000000..8d300dc2 --- /dev/null +++ b/ge/graph/manager/graph_mem_manager.cc @@ -0,0 +1,114 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/manager/graph_mem_manager.h" + +#include + +namespace ge { +MemManager::MemManager() {} + +MemManager::~MemManager() { Finalize(); } + +MemManager &MemManager::Instance() { + static MemManager mem_manager; + return mem_manager; +} + +Status MemManager::Initialize(const std::vector &memory_type) { + std::lock_guard lock(allocator_mutex_); + if (init_) { + GELOGW("MemManager has been inited."); + return SUCCESS; + } + + auto ret = InitAllocator(memory_type, memory_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create MemoryAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, caching_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create CachingAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, rdma_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create RdmaAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, host_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create HostMemAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, session_scope_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create HostMemAllocator failed."); + return ret; + } + init_ = true; + return SUCCESS; +} + +template +void FinalizeAllocatorMap(std::map &allocate_map) { + for (auto &allocator : allocate_map) { + if (allocator.second != nullptr) { + allocator.second->Finalize(); + delete allocator.second; + allocator.second = nullptr; + } + } + allocate_map.clear(); +} + +void MemManager::Finalize() noexcept { + GELOGI("Finalize."); + std::lock_guard lock(allocator_mutex_); + // caching and rdma allocator use memory allocator, so finalize them first + FinalizeAllocatorMap(session_scope_allocator_map_); + FinalizeAllocatorMap(caching_allocator_map_); + FinalizeAllocatorMap(rdma_allocator_map_); + FinalizeAllocatorMap(host_allocator_map_); + FinalizeAllocatorMap(memory_allocator_map_); + init_ = false; +} + +MemoryAllocator &MemManager::MemInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, memory_allocator_map_); +} + +CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, caching_allocator_map_); +} + +RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, rdma_allocator_map_); +} + +HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, host_allocator_map_); +} + +SessionScopeMemAllocator &MemManager::SessionScopeMemInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, session_scope_allocator_map_); +} +} // namespace ge diff --git a/ge/graph/manager/graph_mem_manager.h b/ge/graph/manager/graph_mem_manager.h new file mode 100644 index 00000000..d7993ed4 --- /dev/null +++ b/ge/graph/manager/graph_mem_manager.h @@ -0,0 +1,141 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_ +#define GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_ + +#include +#include +#include +#include +#include +#include + +#include "framework/common/debug/ge_log.h" +#include "framework/common/ge_inner_error_codes.h" +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/session_scope_mem_allocator.h" +#include "graph/node.h" +#include "runtime/mem.h" + +namespace ge { +using MemoryAllocatorPtr = std::shared_ptr; + +class MemManager { + public: + MemManager(); + virtual ~MemManager(); + static MemManager &Instance(); + MemoryAllocator &MemInstance(rtMemType_t memory_type); + CachingAllocator &CachingInstance(rtMemType_t memory_type); + RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); + HostMemAllocator &HostMemInstance(rtMemType_t memory_type); + SessionScopeMemAllocator &SessionScopeMemInstance(rtMemType_t memory_type); + MemManager(const MemManager &) = delete; + MemManager &operator=(const MemManager &) = delete; + /// + /// @ingroup ge_graph + /// @brief memory allocator manager init + /// @param [in] options user config params + /// @return Status result of function + /// + Status Initialize(const std::vector &memory_type); + + /// + /// @ingroup ge_graph + /// @brief memory allocator finalize + /// @return void + /// + void Finalize() noexcept; + + const std::vector &GetAllMemoryType() const { return memory_type_; } + + private: + /// + /// @ingroup ge_graph + /// @param [in] memory_type memory type + /// @param [in] allocate_map memory allocator map + /// @return Status result of function + /// + template + Status InitAllocator(const std::vector &memory_type, std::map &allocate_map) { + T *allocator = nullptr; + for (unsigned int index : memory_type) { + auto it = allocate_map.find(index); + if (it == allocate_map.end()) { + allocator = new (std::nothrow) T(index); + if (allocator != nullptr) { + allocate_map[index] = allocator; + GELOGI("Create Allocator memory type[%u] success.", index); + } else { + REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); + } + } else { + allocator = it->second; + } + + if (allocator == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } else { + if (allocator->Initialize() != SUCCESS) { + return ACL_ERROR_GE_INTERNAL_ERROR; + } + } + } + return SUCCESS; + } + /// + /// @ingroup ge_graph + /// @param [in] memory_type memory type + /// @param [in] allocate_map memory allocator map + /// @return Allocator ptr + /// + template + T &GetAllocator(rtMemType_t memory_type, std::map allocate_map) { + std::lock_guard lock(allocator_mutex_); + T *allocator = nullptr; + auto it = allocate_map.find(memory_type); + if (it != allocate_map.end()) { + allocator = it->second; + } + + // Usually impossible + if (allocator == nullptr) { + GELOGW("Get allocator failed, memory type is %u.", memory_type); + static T default_allocator(RT_MEMORY_RESERVED); + return default_allocator; + } + return *allocator; + } + + std::map memory_allocator_map_; + std::map caching_allocator_map_; + std::map rdma_allocator_map_; + std::map host_allocator_map_; + std::map session_scope_allocator_map_; + std::recursive_mutex allocator_mutex_; + std::vector memory_type_; + bool init_ = false; +}; +} // namespace ge + +#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 79103b88..5f7586da 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -17,8 +17,7 @@ #include "graph/manager/graph_var_manager.h" #include "graph/debug/ge_attr_define.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/rdma_pool_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/utils/type_utils.h" @@ -728,7 +727,7 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { var_memory_size = (var_memory_size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; const string purpose("variables and constant op memory in training network."); - var_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, var_memory_size); + var_mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, var_memory_size); if (var_mem_base == nullptr) { GELOGE(ge::INTERNAL_ERROR, "VarManager::MallocVarMemory failed " @@ -745,7 +744,7 @@ uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); } string memory_key = std::to_string(session_id_); - return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); + return MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(memory_key); } uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { @@ -754,7 +753,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty return logic_addr; } string mem_key = std::to_string(session_id_); - uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); + uint8_t *mem_base = MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(mem_key); if (mem_base == nullptr) { return nullptr; } @@ -766,7 +765,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty ge::Status VarManager::FreeVarMemory() { std::lock_guard lock(mutex_); string memory_key = std::to_string(SessionId()); - return MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key); + return MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key); } ge::Status VarManager::SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 415f8088..8e737021 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -19,7 +19,7 @@ #include #include "common/ge/plugin_manager.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/rdma_pool_allocator.h" #include "graph/utils/type_utils.h" diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc index c19a2159..58829adb 100644 --- a/ge/graph/manager/rdma_pool_allocator.cc +++ b/ge/graph/manager/rdma_pool_allocator.cc @@ -20,6 +20,7 @@ #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" #include "runtime/dev.h" +#include "graph/manager/graph_mem_manager.h" namespace { const size_t kAlignedSize = 512; @@ -49,7 +50,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type) })) {} Status RdmaPoolAllocator::Initialize() { - memory_allocator_ = MemManager::Instance(memory_type_); + memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); if (memory_allocator_ == nullptr) { return ACL_ERROR_GE_INTERNAL_ERROR; } diff --git a/ge/graph/manager/session_scope_mem_allocator.cc b/ge/graph/manager/session_scope_mem_allocator.cc new file mode 100644 index 00000000..8eb01445 --- /dev/null +++ b/ge/graph/manager/session_scope_mem_allocator.cc @@ -0,0 +1,85 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/manager/session_scope_mem_allocator.h" + +#include +#include +#include + +#include "framework/common/debug/ge_log.h" +#include "graph/manager/graph_mem_manager.h" + +namespace ge { + +SessionScopeMemAllocator::SessionScopeMemAllocator(rtMemType_t memory_type) + : memory_type_(memory_type), memory_allocator_(nullptr) {} + +Status SessionScopeMemAllocator::Initialize(uint32_t device_id) { + GELOGI("Device id %u", device_id); + // when redo Initialize free old memory + FreeAllMemory(); + std::lock_guard lock(mutex_); + memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); + if (memory_allocator_ == nullptr) { + return ACL_ERROR_GE_INTERNAL_ERROR; + } + return ge::SUCCESS; +} + +void SessionScopeMemAllocator::Finalize(uint32_t device_id) { + GELOGI("Device id %u", device_id); + FreeAllMemory(); +} + +uint8_t *SessionScopeMemAllocator::Malloc(size_t size, uint64_t session_id, uint32_t device_id) { + GELOGI("Start malloc memory, size:%zu, session id:%lu device id:%u", size, session_id, device_id); + const std::string purpose = "Memory for session scope."; + auto ptr = memory_allocator_->MallocMemory(purpose, size, device_id); + if (ptr == nullptr) { + GELOGE(ge::FAILED, "Malloc failed, no enough memory for size:%zu, session_id:%lu device_id:%u", size, + session_id, device_id); + return nullptr; + } + std::lock_guard lock(mutex_); + std::shared_ptr mem_ptr(ptr, [&](uint8_t *p) { (void)memory_allocator_->FreeMemory(p); }); + allocated_memory_[session_id].emplace_back(size, mem_ptr); + return ptr; +} + +Status SessionScopeMemAllocator::Free(uint64_t session_id, uint32_t device_id) { + GELOGI("Free session:%lu memory, device id:%u.", session_id, device_id); + std::lock_guard lock(mutex_); + auto it = allocated_memory_.find(session_id); + if (it == allocated_memory_.end()) { + REPORT_INNER_ERROR("E19999", "Param memory not allocated before, session_id:%lu device_id:%u, check invalid", + session_id, device_id); + GELOGE(PARAM_INVALID, "Invalid session_id"); + return ge::PARAM_INVALID; + } + allocated_memory_.erase(it); + return ge::SUCCESS; +} + +void SessionScopeMemAllocator::FreeAllMemory() { + GELOGI("Free all memory"); + std::lock_guard lock(mutex_); + for (auto &session_mem : allocated_memory_) { + session_mem.second.clear(); + } + allocated_memory_.clear(); +} +} // namespace ge diff --git a/ge/graph/manager/session_scope_mem_allocator.h b/ge/graph/manager/session_scope_mem_allocator.h new file mode 100644 index 00000000..5aea9554 --- /dev/null +++ b/ge/graph/manager/session_scope_mem_allocator.h @@ -0,0 +1,123 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ +#define GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/common/ge_inner_error_codes.h" +#include "graph/node.h" +#include "graph/manager/block_memory.h" +#include "runtime/mem.h" +#include "graph/manager/graph_mem_allocator.h" + +namespace ge { +class SessionScopeMemoryInfo { + public: + SessionScopeMemoryInfo(size_t size, const std::shared_ptr &ptr) : size(size), ptr(ptr) {} + SessionScopeMemoryInfo() = delete; + virtual ~SessionScopeMemoryInfo() = default; + + SessionScopeMemoryInfo(const SessionScopeMemoryInfo &other) { + if (&other == this) { + return; + } + size = other.size; + ptr = other.ptr; + }; + + SessionScopeMemoryInfo &operator=(const SessionScopeMemoryInfo &other) { + if (&other == this) { + return *this; + } + size = other.size; + ptr = other.ptr; + }; + + private: + size_t size = 0; + std::shared_ptr ptr = nullptr; +}; + +class SessionScopeMemAllocator { + public: + explicit SessionScopeMemAllocator(rtMemType_t memory_type); + + SessionScopeMemAllocator(const SessionScopeMemAllocator &) = delete; + + SessionScopeMemAllocator &operator=(const SessionScopeMemAllocator &) = delete; + + virtual ~SessionScopeMemAllocator() = default; + + /// + /// @ingroup ge_graph + /// @brief caching allocator init + /// @param [in] device id + /// @return Status of init + /// + Status Initialize(uint32_t device_id = 0); + + /// + /// @ingroup ge_graph + /// @brief memory allocator finalize, release all memory + /// @return void + /// + void Finalize(uint32_t device_id = 0); + + /// + /// @ingroup ge_graph + /// @brief malloc memory + /// @param [in] size memory size + /// @param [in] session_id session id + /// @param [in] device id + /// @return memory address + /// + uint8_t *Malloc(size_t size, uint64_t session_id, uint32_t device_id = 0); + + /// + /// @ingroup ge_graph + /// @brief free memory + /// @param [in] session_id session id + /// @param [in] device_id device id + /// @return Status result of function + /// + Status Free(uint64_t session_id, uint32_t device_id = 0); + + private: + void FreeAllMemory(); + + private: + rtMemType_t memory_type_; + + // device memory allocator + MemoryAllocator *memory_allocator_; + + // lock around all operations + mutable std::recursive_mutex mutex_; + + // allocated blocks by memory pointer + std::unordered_map> allocated_memory_; +}; +} // namespace ge +#endif // GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index 5a04c461..b66038d9 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -17,10 +17,7 @@ #include "npu_memory_allocator.h" #include #include "framework/common/debug/log.h" -#include "graph/manager/graph_caching_allocator.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/rdma_pool_allocator.h" -#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { namespace hybrid { diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index e108dddf..0629bd97 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -26,8 +26,7 @@ #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc index 6e8a1eb9..d35989a1 100755 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -18,8 +18,7 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" #include "graph/passes/folding_pass.h" #include "hybrid/model/hybrid_model.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "ge_local_engine/engine/host_cpu_engine.h" namespace ge { diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 96ed1b9c..2374e75f 100644 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -39,7 +39,7 @@ #include "graph/ge_context.h" #include "graph/ge_global_options.h" #include "graph/load/model_manager/model_manager.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "runtime/kernel.h" diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index 9308e267..39c87107 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -32,6 +32,7 @@ #include "graph/common/local_context.h" #include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/utils/tensor_adapter.h" #include "runtime/mem.h" @@ -155,6 +156,11 @@ Status InnerSession::Finalize() { // release var memory GELOGI("VarManager free var memory."); (void)VarManager::Instance(session_id_)->FreeVarMemory(); + + for (auto memory_type : MemManager::Instance().GetAllMemoryType()) { + (void)MemManager::Instance().SessionScopeMemInstance(memory_type).Free(session_id_); + } + // release analyzer saved info(Session Level) Analyzer::GetInstance()->DestroySessionJsonObject(session_id_); diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index 180b50c1..d09dd802 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -19,8 +19,7 @@ #include #include -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() { diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h index f5837b3a..173cc64e 100644 --- a/inc/framework/memory/memory_assigner.h +++ b/inc/framework/memory/memory_assigner.h @@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner { MemoryAssigner &operator=(const MemoryAssigner &) = delete; - Status AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size); + Status AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size); private: ge::ComputeGraphPtr compute_graph_; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 7cdec968..e7a8ec73 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -337,8 +337,10 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" "${GE_CODE_DIR}/ge/graph/common/local_context.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" "${GE_CODE_DIR}/ge/common/model_saver.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" @@ -396,8 +398,10 @@ set(GRAPH_LOAD_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" "${GE_CODE_DIR}/ge/common/thread_pool.cc" ) @@ -792,6 +796,7 @@ set(MULTI_PARTS_TEST_FILES "graph/preprocess/graph_preprocess_unittest.cc" "graph/manager/hcom_util_unittest.cc" "graph/manager/graph_caching_allocator_unittest.cc" + "graph/manager/session_scope_mem_allocator_unittest.cc" "graph/manager/run_graph_unittest.cc" "graph/partition/dynamic_shape_partition_unittest.cc" "graph/manager/graph_manager_unittest.cc" @@ -824,6 +829,7 @@ set(PROFILING_MNG_TEST_FILES set(HYBRID_TEST_FILES "hybrid/ge_hybrid_unittest.cc" "hybrid/known_node_executor_unittest.cc" + "hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/executor/subgraph_executor_unittest.cc" "hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/model/hybrid_model_builder_unittest.cc" diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index 2a0f2405..c9b0b579 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -44,7 +44,8 @@ using domi::GetContext; class UtestMemoryAssignerTest : public testing::Test { public: - ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", int64_t size = 1024) { + ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", + int64_t size = 1024) { ge::OpDescPtr op_def = make_shared(name, type); auto desc_temp_ptr = make_shared(); auto desc_temp = *desc_temp_ptr; @@ -214,7 +215,8 @@ class UtestMemoryAssignerTest : public testing::Test { return builder.GetGraph(); } - void make_ffts_reuse_graph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId, + + void MakeFftsReuseGraph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId, int32_t thread_scope_id_2 = kInvalidThreadScopeId) { ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); @@ -253,28 +255,119 @@ class UtestMemoryAssignerTest : public testing::Test { graph->TopologicalSorting(); } + void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + std::vector workspace_bytes; + workspace_bytes.push_back(1024); + workspace_bytes.push_back(512); + op_def_c->SetWorkspaceBytes(workspace_bytes); + vector workspace_no_reuse_scope = { 0 , 1 }; + (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + + vector workspace_no_reuse_scope_e = { 1 }; + (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e); + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } + + void MakeContinuousReuseGraph(ge::ComputeGraphPtr graph, bool nopading = false) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + if (nopading) { + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, true); + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_OUTPUT_REUSE_INPUT, true); + (void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, 0); + } else { + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_INPUT, true); + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_OUTPUT, true); + } + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } + + void MakeMultiBatchReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + (void)ge::AttrUtils::SetStr(op_def_b, ATTR_NAME_BATCH_LABEL, "Batch_0"); + (void)ge::AttrUtils::SetStr(op_def_c, ATTR_NAME_BATCH_LABEL, "Batch_0"); + (void)ge::AttrUtils::SetStr(op_def_e, ATTR_NAME_BATCH_LABEL, "Batch_1"); + (void)ge::AttrUtils::SetStr(op_def_f, ATTR_NAME_BATCH_LABEL, "Batch_1"); + vector workspace_no_reuse_scope = { 1 }; + (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } + protected: void SetUp() {} void TearDown() { GetContext().out_nodes_map.clear(); } }; -/* -TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { - ge::ComputeGraphPtr graph = make_shared(""); - ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); - ge::NodePtr node_a = graph->AddNode(op_def_a); - MemoryBlock* memory_block = new MemoryBlock(0); - memory_block->Init(1, kOutput, node_a, 0, 1); - memory_block->real_size_list_.clear(); - memory_block->Resize(); - - EXPECT_EQ(memory_block->Size(), 0); - - delete memory_block; -} -*/ - namespace ge { class MockBlockMemAssigner : public BlockMemAssigner { @@ -313,12 +406,44 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); } +TEST_F(UtestMemoryAssignerTest, block_memory_assign_nopading_continuous_memory) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeContinuousReuseGraph(graph, true); + HybridMemAssigner hybridMemAssigner(graph); + ge::Status ret = hybridMemAssigner.Assign(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } + + EXPECT_EQ(offset, 8192); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestMemoryAssignerTest, block_memory_assign_continuous_memory) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeContinuousReuseGraph(graph); + map mem_offset; + size_t zero_copy_mem_size = 0; + MemoryAssigner memoryAssigner(graph); + ge::Status ret = memoryAssigner.AssignMemory(false, mem_offset, zero_copy_mem_size); + size_t offset = 0; + auto it = mem_offset.find(RT_MEMORY_HBM); + if (it != mem_offset.end()) { + offset = it->second; + } + + EXPECT_EQ(offset, 11264); + EXPECT_EQ(ret, SUCCESS); +} + TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) { ge::ComputeGraphPtr graph = make_shared(""); MakeGraph(graph); auto node_f = graph->FindNode("F"); MemoryAssigner memory_assigner(graph); - map mem_offset; + map mem_offset; size_t zero_memory_size = 0; EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); @@ -335,7 +460,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) { std::string value = "A"; (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); MemoryAssigner memory_assigner(graph); - map mem_offset; + map mem_offset; size_t zero_memory_size = 0; VarManager::Instance(0)->Init(0, 0, 0, 0); EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); @@ -356,7 +481,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) { std::string value = "M"; (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); MemoryAssigner memory_assigner(graph); - map mem_offset; + map mem_offset; size_t zero_memory_size = 0; VarManager::Instance(0)->Init(0, 0, 0, 0); EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); @@ -460,30 +585,86 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace) TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) { ge::ComputeGraphPtr graph = make_shared(""); - make_ffts_reuse_graph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId); + MakeFftsReuseGraph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId); HybridMemAssigner hybridMemAssigner(graph); ge::Status ret = hybridMemAssigner.Assign(); - size_t offset = hybridMemAssigner.GetMemOffset(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } EXPECT_EQ(offset, 5120); EXPECT_EQ(ret, SUCCESS); } TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) { ge::ComputeGraphPtr graph = make_shared(""); - make_ffts_reuse_graph(graph, 0, 1); + MakeFftsReuseGraph(graph, 0, 1); HybridMemAssigner hybridMemAssigner(graph); ge::Status ret = hybridMemAssigner.Assign(); - size_t offset = hybridMemAssigner.GetMemOffset(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } EXPECT_EQ(offset, 6656); EXPECT_EQ(ret, SUCCESS); } TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) { ge::ComputeGraphPtr graph = make_shared(""); - make_ffts_reuse_graph(graph, 0, kInvalidThreadScopeId); + MakeFftsReuseGraph(graph, 0, kInvalidThreadScopeId); HybridMemAssigner hybridMemAssigner(graph); ge::Status ret = hybridMemAssigner.Assign(); - size_t offset = hybridMemAssigner.GetMemOffset(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } EXPECT_EQ(offset, 5632); EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestMemoryAssignerTest, one_session_scope_op) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeSessionScopeReuseGraph(graph); + HybridMemAssigner hybridMemAssigner(graph); + ge::Status ret = hybridMemAssigner.Assign(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } + + auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); + size_t session_scope_offset = 0; + it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + session_scope_offset = it->second; + } + EXPECT_EQ(offset, 5120); + EXPECT_EQ(session_scope_offset, 1536); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestMemoryAssignerTest, multi_batch_reuse) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeMultiBatchReuseGraph(graph); + HybridMemAssigner hybridMemAssigner(graph); + ge::Status ret = hybridMemAssigner.Assign(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } + + auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); + size_t session_scope_offset = 0; + it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + session_scope_offset = it->second; + } + EXPECT_EQ(offset, 6656); + EXPECT_EQ(session_scope_offset, 1536); + EXPECT_EQ(ret, SUCCESS); } \ No newline at end of file diff --git a/tests/ut/ge/graph/build/model_builder_unittest.cc b/tests/ut/ge/graph/build/model_builder_unittest.cc index d5efc9bb..628d0fda 100644 --- a/tests/ut/ge/graph/build/model_builder_unittest.cc +++ b/tests/ut/ge/graph/build/model_builder_unittest.cc @@ -30,6 +30,7 @@ #define protected public #define private public #include "graph/build/model_builder.h" +#include "memory/memory_assigner.h" #undef protected #undef private @@ -127,6 +128,41 @@ class UtestModelBuilderTest : public testing::Test { graph->TopologicalSorting(); } +void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + std::vector workspace_bytes; + workspace_bytes.push_back(1024); + workspace_bytes.push_back(512); + op_def_c->SetWorkspaceBytes(workspace_bytes); + vector workspace_no_reuse_scope = { 0 , 1 }; + (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + + vector workspace_no_reuse_scope_e = { 1 }; + (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e); + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } protected: void SetUp() {} @@ -162,6 +198,24 @@ TEST_F(UtestModelBuilderTest, test_save_atomic_bin) { EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS); } +TEST_F(UtestModelBuilderTest, build_model_for_get_task) { + Graph2SubGraphInfoList subgraphs; + std::map stream_max_parallel_num; + ge::ComputeGraphPtr graph = make_shared(""); + MakeSessionScopeReuseGraph(graph); + std::map option; + ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false); + + MemoryAssigner mem_assigner(graph); + EXPECT_EQ(mem_assigner.AssignMemory(false, builder.mem_type_to_mem_offset_, builder.zero_copy_mem_size_), SUCCESS); + + ge::Model model; + EXPECT_EQ(builder.BuildModelDef(model), SUCCESS); + int64_t session_scope_mem_offset = 0; + ge::AttrUtils::GetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset); + EXPECT_EQ(session_scope_mem_offset, 1536); +} + TEST_F(UtestModelBuilderTest, test_model_save) { Graph2SubGraphInfoList subgraphs; std::map stream_max_parallel_num; diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 3969ad9c..19b8aeab 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -43,6 +43,7 @@ #include "graph/manager/graph_mem_allocator.h" #include "graph/utils/graph_utils.h" #include "proto/ge_ir.pb.h" +#include "graph/manager/graph_var_manager.h" #undef private #undef protected @@ -194,6 +195,11 @@ TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) { } TEST_F(UtestGeExecutor, execute_graph_with_stream) { + VarManager::Instance(0)->Init(0, 0, 0, 0); + map options; + options[GRAPH_MEMORY_MAX_SIZE] = "1048576"; + VarManager::Instance(0)->SetMemoryMallocSize(options); + DavinciModel model(0, nullptr); ComputeGraphPtr graph = make_shared("default"); @@ -278,7 +284,6 @@ TEST_F(UtestGeExecutor, execute_graph_with_stream) { OutputData output_data; vector outputs; EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS); - GraphExecutor graph_executer; graph_executer.init_flag_ = true; diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc index 7863a70f..5833a13a 100644 --- a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc @@ -28,8 +28,7 @@ #define protected public #define private public -#include "graph/manager/graph_caching_allocator.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #undef protected #undef private diff --git a/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc new file mode 100644 index 00000000..4a336af9 --- /dev/null +++ b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc @@ -0,0 +1,75 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/anchor.h" +#include "graph/attr_value.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "omg/omg_inner_types.h" + +#define protected public +#define private public +#include "graph/manager/graph_mem_manager.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +using namespace ge; +using domi::GetContext; + +class UtestSessionScopeMemAllocator : public testing::Test { + protected: + void SetUp() {} + + void TearDown() { GetContext().out_nodes_map.clear(); } +}; + +TEST_F(UtestSessionScopeMemAllocator, initialize_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestSessionScopeMemAllocator, malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(1000, 0); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestSessionScopeMemAllocator, free_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); + EXPECT_NE(nullptr, ptr); + ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); + EXPECT_NE(nullptr, ptr); + + EXPECT_EQ(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0)); + EXPECT_NE(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0)); + MemManager::Instance().Finalize(); +} diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc index d6af6de9..f1ea7a27 100644 --- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc @@ -35,7 +35,7 @@ #include "graph/manager/graph_context.h" #include "graph/optimize/graph_optimize.h" #include "graph/manager/util/variable_accelerate_ctrl.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph_builder_utils.h" #include "cce/dnn.h"