Browse Source

Support session scope memory

tags/v1.3.0
TangQunzhang 3 years ago
parent
commit
aa579530fb
41 changed files with 1102 additions and 427 deletions
  1. +4
    -0
      ge/CMakeLists.txt
  2. +2
    -0
      ge/executor/CMakeLists.txt
  3. +1
    -1
      ge/executor/ge_executor.cc
  4. +62
    -39
      ge/graph/build/memory/block_mem_assigner.cc
  5. +10
    -11
      ge/graph/build/memory/block_mem_assigner.h
  6. +20
    -6
      ge/graph/build/memory/graph_mem_assigner.cc
  7. +2
    -2
      ge/graph/build/memory/graph_mem_assigner.h
  8. +6
    -4
      ge/graph/build/memory/hybrid_mem_assigner.cc
  9. +2
    -4
      ge/graph/build/memory/hybrid_mem_assigner.h
  10. +1
    -1
      ge/graph/build/memory/memory_assigner.cc
  11. +15
    -2
      ge/graph/build/model_builder.cc
  12. +1
    -1
      ge/graph/build/model_builder.h
  13. +98
    -72
      ge/graph/load/model_manager/davinci_model.cc
  14. +2
    -6
      ge/graph/load/model_manager/davinci_model.h
  15. +16
    -3
      ge/graph/load/model_manager/model_utils.cc
  16. +18
    -2
      ge/graph/load/model_manager/task_info/task_info.h
  17. +2
    -2
      ge/graph/manager/graph_caching_allocator.cc
  18. +1
    -1
      ge/graph/manager/graph_caching_allocator.h
  19. +3
    -113
      ge/graph/manager/graph_mem_allocator.cc
  20. +2
    -106
      ge/graph/manager/graph_mem_allocator.h
  21. +114
    -0
      ge/graph/manager/graph_mem_manager.cc
  22. +141
    -0
      ge/graph/manager/graph_mem_manager.h
  23. +5
    -6
      ge/graph/manager/graph_var_manager.cc
  24. +1
    -1
      ge/graph/manager/memory_api.cc
  25. +2
    -1
      ge/graph/manager/rdma_pool_allocator.cc
  26. +85
    -0
      ge/graph/manager/session_scope_mem_allocator.cc
  27. +123
    -0
      ge/graph/manager/session_scope_mem_allocator.h
  28. +1
    -4
      ge/hybrid/common/npu_memory_allocator.cc
  29. +1
    -2
      ge/hybrid/model/hybrid_model_builder.cc
  30. +1
    -2
      ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
  31. +1
    -1
      ge/init/gelib.cc
  32. +6
    -0
      ge/session/inner_session.cc
  33. +1
    -2
      ge/single_op/single_op_manager.cc
  34. +1
    -1
      inc/framework/memory/memory_assigner.h
  35. +6
    -0
      tests/ut/ge/CMakeLists.txt
  36. +208
    -27
      tests/ut/ge/graph/build/mem_assigner_unittest.cc
  37. +54
    -0
      tests/ut/ge/graph/build/model_builder_unittest.cc
  38. +6
    -1
      tests/ut/ge/graph/ge_executor_unittest.cc
  39. +1
    -2
      tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc
  40. +75
    -0
      tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc
  41. +1
    -1
      tests/ut/ge/graph/passes/variable_op_pass_unittest.cc

+ 4
- 0
ge/CMakeLists.txt View File

@@ -173,10 +173,12 @@ set(TRAIN_SRC_LIST
"graph/manager/graph_manager_utils.cc"
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_caching_allocator.cc"
"graph/manager/session_scope_mem_allocator.cc"
"graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc"
"graph/manager/host_mem_allocator.cc"
"graph/manager/graph_mem_manager.cc"
"graph/manager/memory_api.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/trans_var_data_utils.cc"
@@ -478,6 +480,8 @@ set(INFER_SRC_LIST
"graph/manager/host_mem_allocator.cc"
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_caching_allocator.cc"
"graph/manager/session_scope_mem_allocator.cc"
"graph/manager/graph_mem_manager.cc"
"model/ge_model.cc"
"model/ge_root_model.cc"
"graph/common/transop_util.cc"


+ 2
- 0
ge/executor/CMakeLists.txt View File

@@ -28,6 +28,8 @@ set(SRC_LIST
"../graph/manager/graph_var_manager.cc"
"../graph/manager/graph_mem_allocator.cc"
"../graph/manager/graph_caching_allocator.cc"
"../graph/manager/session_scope_mem_allocator.cc"
"../graph/manager/graph_mem_manager.cc"
"../graph/manager/trans_var_data_utils.cc"
"../graph/manager/util/debug.cc"
"../graph/manager/rdma_pool_allocator.cc"


+ 1
- 1
ge/executor/ge_executor.cc View File

@@ -26,7 +26,7 @@
#include "graph/execute/graph_execute.h"
#include "graph/load/graph_loader.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "single_op/single_op_manager.h"
#include "graph/load/model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"


+ 62
- 39
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -500,6 +500,7 @@ string MemoryBlock::String() {
ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " ";
ss << "real_size_list: " << ToString(real_size_list_) << " ";
ss << "ref_count: " << ref_count_ << " ";
ss << "reuse_mem_: " << reuse_mem_ << " ";
ss << "members: ";
for (auto x : NodeTypeIndexList()) {
ss << "__node: " << ToString(x) << " ";
@@ -513,8 +514,8 @@ string MemoryBlock::String() {

BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<string, string> &anchor_to_symbol,
const map<string, list<NodeIndexIO>> &symbol_to_anchors)
: mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)),
symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {}
: compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors),
anchor_to_symbol_(anchor_to_symbol), life_time_(0) {}

BlockMemAssigner::~BlockMemAssigner() {
GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size());
@@ -1123,7 +1124,7 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) {
MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size,
OpMemoryType mem_type, const NodePtr &n, uint32_t out_index,
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem,
const bool continuous, int64_t memory_type) {
const bool continuous, uint64_t memory_type) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
n == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed");
@@ -1824,8 +1825,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false);
continue;
}
int64_t memory_type = RT_MEMORY_HBM;
if (!GetWorkSpaceMemoryType(n, i, memory_type)) {
uint64_t memory_type = RT_MEMORY_HBM;
if (!GetWorkSpaceMemoryType(n, i, memory_type, workspace_reuse_flag)) {
GELOGW("Get workspace memory type failed.");
return;
}
@@ -1860,7 +1861,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
}

void BlockMemAssigner::CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id,
MemoryBlock *mem_block, int64_t memory_type) {
MemoryBlock *mem_block, uint64_t memory_type) {
bool reuse_mem_flag =
((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true;
if (reuse_mem_flag) {
@@ -1992,24 +1993,29 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) {
}
}

void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) {
if (block.memory_type_ == RT_MEMORY_HBM) {
if (block.first_continuous_block_) {
mem_offset += MEM_ALIGN_SIZE;
}
block.Resize();
block.SetHeadOffset(mem_offset);
mem_offset += block.Size();
block.SetTailOffset(mem_offset - 1);
} else if (block.memory_type_ == RT_MEMORY_P2P_DDR) {
if (block.first_continuous_block_) {
p2p_mem_offset += MEM_ALIGN_SIZE;
void AddBlockMemOffset(std::map<uint64_t, size_t> &mem_offsets, MemoryBlock &block) {
auto it = mem_offsets.find(block.memory_type_);
if (it == mem_offsets.end()) {
auto result = mem_offsets.insert(std::pair<int64_t, size_t>(block.memory_type_, 0));
// Insert failure is unlikely
if (!result.second) {
return;
}
block.Resize();
block.SetHeadOffset(p2p_mem_offset);
p2p_mem_offset += block.Size();
block.SetTailOffset(p2p_mem_offset - 1);
it = result.first;
}

if (it == mem_offsets.end()) {
return;
}

auto &mem_offset = it->second;
if (block.first_continuous_block_) {
mem_offset += MEM_ALIGN_SIZE;
}
block.Resize();
block.SetHeadOffset(mem_offset);
mem_offset += block.Size();
block.SetTailOffset(mem_offset - 1);
}

bool DynamicBatchBlockReuse(MemoryBlock &block) {
@@ -2036,27 +2042,27 @@ void BlockMemAssigner::ResizeDynamicBatchBlocks() {
}
}

size_t max_mem_offset = mem_offset_;
size_t max_p2p_mem_offset = p2p_mem_offset_;
std::map<uint64_t, size_t> max_mem_offsets = mem_offsets_;
for (auto &batch_blocks : dynamic_batch_blocks) {
size_t mem_offset = mem_offset_;
size_t p2p_mem_offset = p2p_mem_offset_;
std::map<uint64_t, size_t> mem_offsets = mem_offsets_;
for (auto block : batch_blocks.second) {
if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) {
continue;
}
AddBlockMemOffset(mem_offset, p2p_mem_offset, *block);
AddBlockMemOffset(mem_offsets, *block);
}
if (mem_offset > max_mem_offset) {
max_mem_offset = mem_offset;
}
if (p2p_mem_offset > max_p2p_mem_offset) {
max_p2p_mem_offset = p2p_mem_offset;

for (auto &it : mem_offsets) {
auto itmax = max_mem_offsets.find(it.first);
if (itmax == max_mem_offsets.end()) {
max_mem_offsets[it.first] = it.second;
} else if (it.second > itmax->second) {
itmax->second = it.second;
}
GELOGI("Batch:%s memory type:%ld offset:%zu", batch_blocks.first.c_str(), it.first, it.second);
}
GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset);
}
mem_offset_ = max_mem_offset;
p2p_mem_offset_ = max_p2p_mem_offset;
mem_offsets_ = max_mem_offsets;
}

///
@@ -2074,11 +2080,13 @@ void BlockMemAssigner::ResizeMemoryBlocks() {
continue;
}

AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block);
AddBlockMemOffset(mem_offsets_, *memory_block);
}
ResizeDynamicBatchBlocks();
GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu,"
"theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_);
for (auto it : mem_offsets_) {
GELOGI("Memory type:%ld mem_offset exclude zero_copy_memory:%zu, theory_min_memory_size:%zu", it.first, it.second,
theory_min_memory_size_);
}
}

///
@@ -2217,7 +2225,8 @@ bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
(node_type == CONSTANTOP) || (node_type == HVDWAIT);
}

bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {
bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type,
vector<bool> &workspace_reuse_flag) {
memory_type = RT_MEMORY_HBM;
vector<int64_t> workspace_memory_type;
auto op_desc = node->GetOpDesc();
@@ -2233,6 +2242,20 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index,
return false;
}
memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM;

vector<int32_t> workspace_no_reuse_scope;
bool has_workspace_no_reuse_scope =
ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
if (has_workspace_no_reuse_scope && (index < workspace_no_reuse_scope.size())
&& (workspace_no_reuse_scope[index] == kSessionNoReuse)) {
memory_type |= kSessionScopeMemory;
if (workspace_reuse_flag.empty()) {
workspace_reuse_flag.assign(workspace_no_reuse_scope.size(), true);
}
// set to no reuse
workspace_reuse_flag[index] = false;
GELOGI("%s's workspace is session scope no reuse, memory type:%lu.", node->GetName().c_str(), memory_type);
}
return true;
}
} // namespace ge

+ 10
- 11
ge/graph/build/memory/block_mem_assigner.h View File

@@ -34,6 +34,10 @@
namespace ge {
const size_t kMaxLifeTime = 0xffffffff;
const int32_t kInvalidThreadScopeId = -1;
const uint64_t kSessionScopeMemory = 0x100000000;
const uint64_t kMemoryTypeMask = 0xffffffff;

enum MemoryNoReuseScope { kReuse, kSessionNoReuse, kGraphNoReuse };

using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>;

@@ -224,9 +228,7 @@ class BlockMemAssigner : public MemAssigner {

Status Assign() override;

size_t GetMemOffset() const { return mem_offset_; }

size_t GetP2PMemOffset() const { return p2p_mem_offset_; }
const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; }

int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; }

@@ -329,14 +331,10 @@ class BlockMemAssigner : public MemAssigner {
///
void UpdateOpTensorMemType(std::list<NodeIndexIO> node_index_io_list, int64_t memory_type);

size_t mem_offset_;
size_t p2p_mem_offset_;

std::map<uint64_t, size_t> mem_offsets_;
ge::ComputeGraphPtr compute_graph_;

std::vector<MemoryBlock *> memory_blocks_;
std::vector<MemoryBlock *> blocks_store_;

std::vector<NodeTypeIndex> zero_memory_list_;

// ref mapping
@@ -380,7 +378,7 @@ class BlockMemAssigner : public MemAssigner {
///
MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type,
const ge::NodePtr &n, uint32_t out_index, const std::vector<bool> &workspace_reuse_flag,
const bool is_op_reuse_mem, const bool continuous, int64_t memory_type);
const bool is_op_reuse_mem, const bool continuous, uint64_t memory_type);

///
/// @ingroup GE
@@ -394,7 +392,7 @@ class BlockMemAssigner : public MemAssigner {
/// @author
///
void CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id,
MemoryBlock *mem_block, int64_t memory_type);
MemoryBlock *mem_block, uint64_t memory_type);

///
/// @ingroup GE
@@ -457,7 +455,8 @@ class BlockMemAssigner : public MemAssigner {

bool IsContinuousOutput(const NodePtr &n);

bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type);
bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type,
vector<bool> &workspace_reuse_flag);

void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n);



+ 20
- 6
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -107,11 +107,22 @@ Status GraphMemoryAssigner::AssignMemory() {
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
return ge::FAILED;
}
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);

if (mem_assigner->GetP2PMemOffset() >= 0) {
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
for (auto pair : mem_assigner->GetMemOffsets()) {
MemoryOffset offset(pair.first, pair.second);
memory_offset_.emplace(pair.first, offset);
}

// base memtype offset must be exist
auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM);
if (it == mem_assigner->GetMemOffsets().end()) {
MemoryOffset memory_offset(RT_MEMORY_HBM, 0);
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
}

it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR);
if (it == mem_assigner->GetMemOffsets().end()) {
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0);
memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
}

@@ -224,7 +235,7 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out
return SUCCESS;
}

Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset) {
if (memory_offset_.empty()) {
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
@@ -264,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size
return SUCCESS;
}

Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
Status GraphMemoryAssigner::AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
if (priority_assigner == nullptr) {
REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s",
@@ -1398,6 +1409,9 @@ ge::Status GraphMemoryAssigner::SetInputOffset() {
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
}
for (auto pair : memory_offset_) {
if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) {
continue;
}
GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
pair.second.mem_offset_, pair.first);
}


+ 2
- 2
ge/graph/build/memory/graph_mem_assigner.h View File

@@ -103,9 +103,9 @@ class GraphMemoryAssigner {

ge::Status AssignMemory2HasRefAttrNode();

ge::Status ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset);
ge::Status ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset);

ge::Status AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size);
ge::Status AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size);

ge::Status SetInputOffset();



+ 6
- 4
ge/graph/build/memory/hybrid_mem_assigner.cc View File

@@ -23,7 +23,7 @@

namespace ge {
HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph)
: mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {}
: compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {}

Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size) {
vector<int64_t> ranges;
@@ -36,7 +36,10 @@ Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_

block_assigner->AssignMemoryWithReuse(ranges);

mem_size = block_assigner->GetMemOffset();
// total size
for (auto it : block_assigner->GetMemOffsets()) {
mem_size += it.second;
}
return SUCCESS;
}

@@ -73,8 +76,7 @@ Status HybridMemAssigner::Assign() {
}

priority_assigner->SetOpMemOffset(false);
mem_offset_ = priority_assigner->GetMemOffset();
p2p_mem_offset_ = priority_assigner->GetP2PMemOffset();
mem_offsets_ = priority_assigner->GetMemOffsets();
priority_assigner_ = std::move(priority_assigner);

return SUCCESS;


+ 2
- 4
ge/graph/build/memory/hybrid_mem_assigner.h View File

@@ -42,16 +42,14 @@ class HybridMemAssigner : public MemAssigner {

Status Assign() override;

size_t GetMemOffset() const { return mem_offset_; }
size_t GetP2PMemOffset() const { return p2p_mem_offset_; }
const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; }

BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; }

private:
Status AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size);

size_t mem_offset_;
size_t p2p_mem_offset_;
std::map<uint64_t, size_t> mem_offsets_;

ge::ComputeGraphPtr compute_graph_;



+ 1
- 1
ge/graph/build/memory/memory_assigner.cc View File

@@ -20,7 +20,7 @@
#include "graph/build/memory/graph_mem_assigner.h"

namespace ge {
Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) {
Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) {
GraphMemoryAssigner graph_mem_assigner(compute_graph_);

if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) {


+ 15
- 2
ge/graph/build/model_builder.cc View File

@@ -47,6 +47,7 @@
#include "omg/version.h"
#include "register/op_registry.h"
#include "graph/passes/set_input_output_offset_pass.h"
#include "graph/build/memory/block_mem_assigner.h"

using std::map;
using std::set;
@@ -398,9 +399,21 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str());
GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str());
return FAILED);
auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
size_t session_scope_mem_offset = 0;
auto it = mem_type_to_mem_offset_.find(mem_type_session_scope);
if (it != mem_type_to_mem_offset_.end()) {
session_scope_mem_offset = it->second;
}
if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) {
p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR];
}
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed",
ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE.c_str());
GELOGE(FAILED, "SetInt of ATTR_NAME_SESSION_SCOPE_MEMORY_SIZE failed.");
return FAILED);

GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_),
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str());
@@ -434,8 +447,8 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_OUT_NODES_NAME.c_str());
GELOGE(FAILED, "[Set][Str] %s in model failed.", ATTR_MODEL_OUT_NODES_NAME.c_str());
return FAILED);
GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_,
p2p_mem_offset_, zero_copy_mem_size_);
GELOGI("For model, max_mem_offset: %zu, p2p_mem_size: %zu, zero_copy_mem_size: %zu, session_scope_mem_size: %zu",
max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_, session_scope_mem_offset);
string fp_ceiling_mode;
if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) {
if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {


+ 1
- 1
ge/graph/build/model_builder.h View File

@@ -93,7 +93,7 @@ class ModelBuilder {

uint64_t session_id_;

map<int64_t, size_t> mem_type_to_mem_offset_;
map<uint64_t, size_t> mem_type_to_mem_offset_;

size_t weight_offset_;



+ 98
- 72
ge/graph/load/model_manager/davinci_model.cc View File

@@ -40,7 +40,7 @@
#include "graph/load/model_manager/cpu_queue_schedule.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/load/model_manager/tbe_handle_store.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/trans_var_data_utils.h"
#include "graph/manager/util/debug.h"
@@ -60,6 +60,8 @@
#include "graph/common/local_context.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "graph/common/omg_util.h"
#include "graph/build/memory/block_mem_assigner.h"
#include "graph/manager/session_scope_mem_allocator.h"

// create std::thread, catch exceptions using try/catch
#define CREATE_STD_THREAD(thread_id, func, args) \
@@ -168,7 +170,6 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
mem_base_(nullptr),
is_inner_mem_base_(false),
is_inner_weight_base_(false),
is_inner_p2p_mem_base_(false),
data_inputer_(nullptr),
load_begin_time_(0),
load_end_time_(0),
@@ -236,7 +237,7 @@ DavinciModel::~DavinciModel() {

FreeFeatureMapMem();

FreeP2PMem();
FreeExMem();

OpDebugUnRegister();

@@ -389,7 +390,6 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
is_feature_map_mem_has_inited_ = true;

std::size_t data_size = TotalMemSize();
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;

if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
REPORT_INNER_ERROR("E19999", "Param dev_ptr is nullptr or mem_size:%zu < ge_model.mem_size:%zu, "
@@ -400,7 +400,6 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
}

mem_base_ = static_cast<uint8_t *>(dev_ptr);
p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr);
is_inner_mem_base_ = false;

if (TotalMemSize() && mem_base_ == nullptr) {
@@ -422,24 +421,13 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
is_inner_mem_base_ = true;
}

if (p2p_data_size != 0) {
p2p_mem_base_ = MallocP2PMem(p2p_data_size);
if (p2p_mem_base_ == nullptr) {
REPORT_CALL_ERROR("E19999", "MallocFeatureMapMem fail, p2p_data_size:%zu, model_id:%u, check invalid",
p2p_data_size, model_id_);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][Memory] for p2p failed, size:%zu, model_id:%u",
p2p_data_size, model_id_);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
p2p_mem_base_, p2p_data_size);
is_inner_p2p_mem_base_ = true;
if (!runtime_param_.memory_infos.empty()) {
GE_CHK_STATUS_RET(MallocExMem(), "MallocExMem failed.");
}

GE_CHK_STATUS_RET(InitVariableMem(), "[Init][VariableMemory] failed, model_id:%u", model_id_);
runtime_param_.mem_base = mem_base_;
runtime_param_.weight_base = weights_mem_base_;
runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_;
return SUCCESS;
}

@@ -465,7 +453,6 @@ Status DavinciModel::InitVariableMem() {
void DavinciModel::InitRuntimeParams() {
int64_t value = 0;
bool ret;
MemInfo p2p_mem_info;
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value);
runtime_param_.mem_size = ret ? (uint64_t)value : 0;
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value);
@@ -490,16 +477,18 @@ void DavinciModel::InitRuntimeParams() {
runtime_param_.var_size = ret ? (uint64_t)value : 0;
session_id_ = runtime_param_.session_id;
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value);
p2p_mem_info.memory_size = ret ? (uint64_t)value : 0;
MemInfo p2p_mem_info;
p2p_mem_info.memory_size = static_cast<size_t>(ret ? value : 0);
p2p_mem_info.memory_type = RT_MEMORY_P2P_DDR;
p2p_mem_info.memory_key = "_p";
runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info);

GELOGI(
"InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, "
"logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, "
"memory_size:%lu, weight_size:%lu, var_size:%lu",
runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num,
runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base,
runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size);
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, value);
MemInfo session_scope_mem_info;
session_scope_mem_info.memory_size = static_cast<size_t>(ret ? value : 0);
runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info);

GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str());
}

void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) {
@@ -4089,14 +4078,15 @@ Status DavinciModel::InitEntryTask() {
uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) {
uint8_t *mem_base = nullptr;
const string purpose("feature map,used for op input and output.");
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK) {
data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize());
string memory_key = std::to_string(0) + "_f";
mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId());
mem_base =
MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, data_size, GetDeviceId());
} else {
mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, data_size, GetDeviceId());
mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, data_size, GetDeviceId());
}

if (mem_base != nullptr) {
@@ -4105,83 +4095,119 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) {
return mem_base;
}

uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) {
uint8_t *p2p_mem_base = nullptr;
const string purpose("p2p memory, used for some op related to hcom");
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
string p2p_memory_key = std::to_string(0) + "_p";
p2p_mem_base =
MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId());
} else {
p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId());
Status DavinciModel::MallocExMem() {
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
for (auto it : runtime_param_.memory_infos) {
auto mem_size = it.second.memory_size;
if (mem_size == 0) {
continue;
}
bool sessoion_scope = ((kSessionScopeMemory & it.first) == kSessionScopeMemory);
auto mem_type = it.first & kMemoryTypeMask;
uint8_t *mem_base = nullptr;
const string purpose("p2p memory, used for some op related to hcom or session scope memory");
if (sessoion_scope) {
mem_base = MemManager::Instance().SessionScopeMemInstance(mem_type).Malloc(mem_size, runtime_param_.session_id);
} else if (res_static_memory == EN_OK) {
string memory_key = std::to_string(0) + it.second.memory_key;
mem_base =
MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, memory_key, mem_size, GetDeviceId());
} else {
mem_base = MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, mem_size, GetDeviceId());
}

if (mem_base == nullptr) {
REPORT_CALL_ERROR("E19999", "MallocExMem fail, type:%ld size:%zu, model_id:%u, check invalid",
mem_type, mem_size, model_id_);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc ex memory failed, type:%ld size: %zu", mem_type, mem_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
it.second.memory_base = mem_base;
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] mem_type[%ld] mem_addr[%p] mem_size[%zu]",
runtime_param_.graph_id, mem_type, mem_base, mem_size);
}
return p2p_mem_base;
return SUCCESS;
}

uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) {
uint8_t *weights_mem_base = nullptr;
const string purpose("weights memory in inference network.");
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK) {
string weight_memory_key = std::to_string(0) + "_w";
weights_mem_base =
MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId());
weights_mem_base = MemManager::Instance()
.MemInstance(RT_MEMORY_HBM)
.MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId());
} else {
weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId());
weights_mem_base =
MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, weights_size, GetDeviceId());
}
return weights_mem_base;
}

void DavinciModel::FreeFeatureMapMem() {
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK && is_inner_mem_base_) {
string weight_memory_key = std::to_string(0) + "_f";
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()),
"[Free][Memory] failed, model_id:%u", model_id_);
if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(weight_memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weight_memory_key, GetDeviceId()),
"failed to free weight memory");
}
mem_base_ = nullptr;
} else {
GE_IF_BOOL_EXEC(mem_base_ != nullptr && is_inner_mem_base_,
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(mem_base_, GetDeviceId()),
"[Free][Memory] failed, model_id:%u", model_id_);
mem_base_ = nullptr);
GE_IF_BOOL_EXEC(
mem_base_ != nullptr && is_inner_mem_base_,
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(mem_base_, GetDeviceId()),
"failed to free feature_map memory");
mem_base_ = nullptr);
}
}

void DavinciModel::FreeP2PMem() {
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
std::string p2p_memory_key = std::to_string(0) + "_p";
if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()),
"[Free][Memory] failed, model_id:%u", model_id_);
void DavinciModel::FreeExMem() {
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
for (auto it : runtime_param_.memory_infos) {
// free when session destory
if ((kSessionScopeMemory & it.first) == kSessionScopeMemory) {
continue;
}
auto mem_type = it.first & kMemoryTypeMask;
if (res_static_memory == EN_OK) {
std::string memory_key = std::to_string(0) + it.second.memory_key;
if (MemManager::Instance().MemInstance(mem_type).GetMemoryAddr(memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(memory_key, GetDeviceId()),
"failed to free memory");
}
it.second.memory_base = nullptr;
} else {
GE_IF_BOOL_EXEC(
it.second.memory_base != nullptr,
GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(it.second.memory_base, GetDeviceId()),
"failed to free memory");
it.second.memory_base = nullptr);
}
p2p_mem_base_ = nullptr;
} else {
GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_,
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()),
"[Free][Memory] failed, model_id:%u", model_id_);
p2p_mem_base_ = nullptr);
}
}

void DavinciModel::FreeWeightsMem() {
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00};
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK) {
string memory_key = std::to_string(0) + "_w";
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()),
"[Free][Memory] failed, model_id:%u", model_id_);
if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key, GetDeviceId()),
"failed to free feature_map memory");
}
weights_mem_base_ = nullptr;
} else {
GE_IF_BOOL_EXEC(weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_,
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weights_mem_base_, GetDeviceId()),
"[Free][Memory] failed, model_id:%u", model_id_);
weights_mem_base_ = nullptr);
GE_IF_BOOL_EXEC(
weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_,
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weights_mem_base_, GetDeviceId()),
"failed to free weight memory");
weights_mem_base_ = nullptr);
}
}



+ 2
- 6
ge/graph/load/model_manager/davinci_model.h View File

@@ -248,8 +248,6 @@ class DavinciModel {
// get total mem size
size_t TotalMemSize() const { return runtime_param_.mem_size; }

const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; }

// model name
string Name() const { return name_; }

@@ -586,10 +584,8 @@ class DavinciModel {
// memory address of model
uintptr_t fixed_mem_base_; // Initial of mem_base_, keep forever.
uint8_t *mem_base_;
uint8_t *p2p_mem_base_;
bool is_inner_mem_base_;
bool is_inner_weight_base_;
bool is_inner_p2p_mem_base_;
// input data manager
DataInputer *data_inputer_;
int64_t load_begin_time_;
@@ -668,13 +664,13 @@ class DavinciModel {

uint8_t *MallocWeightsMem(size_t weights_size);

uint8_t *MallocP2PMem(size_t p2p_data_size);
Status MallocExMem();

void FreeFeatureMapMem();

void FreeWeightsMem();

void FreeP2PMem();
void FreeExMem();

void ReleaseTask();



+ 16
- 3
ge/graph/load/model_manager/model_utils.cc View File

@@ -21,6 +21,7 @@
#include "graph/utils/tensor_utils.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/types.h"
#include "graph/build/memory/block_mem_assigner.h"

#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \
do { \
@@ -514,10 +515,16 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type);
bool has_mem_type_workspace =
ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type);

vector<int32_t> workspace_no_reuse_scope;
bool has_workspace_no_reuse_scope =
ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);

for (size_t i = 0; i < v_workspace_bytes.size(); ++i) {
// Temporary solution, the aicpu workspace of multiple images cannot be shared.
if (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] &&
!model_param.is_single_op) {
bool aicpu_work_space = (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] &&
!model_param.is_single_op);
if (aicpu_work_space) {
void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]);
v_workspace_data_addr.push_back(mem_addr);
GELOGI(
@@ -548,7 +555,13 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]);
} else {
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]);
uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i];
uint8_t *mem_addr = nullptr;
bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size());
if (session_scope_memory) {
mem_addr = model_param.memory_infos.at(kSessionScopeMemory | RT_MEMORY_HBM).memory_base + v_workspace_offset[i];
} else {
mem_addr = model_param.mem_base + v_workspace_offset[i];
}
v_workspace_data_addr.push_back(mem_addr);
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i],


+ 18
- 2
ge/graph/load/model_manager/task_info/task_info.h View File

@@ -18,6 +18,7 @@
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_H_

#include <vector>
#include <sstream>

#include "cce/customize.h"
#include "framework/common/taskdown_common.h"
@@ -28,9 +29,11 @@

namespace ge {
struct MemInfo {
uint64_t memory_size = 0;
size_t memory_size = 0;
uint64_t logic_memory_base = 0;
uint8_t *memory_base = nullptr;
uint32_t memory_type = RT_MEMORY_HBM;
std::string memory_key = "";
};

struct RuntimeParam {
@@ -40,6 +43,19 @@ struct RuntimeParam {
}
~RuntimeParam() = default;

std::string ToString() {
std::stringstream ss;
ss << "session_id:" << session_id << ", stream_num:" << stream_num << ", event_num:" << event_num
<< ", label_num:" << label_num << ", logic_mem_base:" << logic_mem_base
<< ", logic_weight_base:" << logic_weight_base << ", logic_var_base:" << logic_var_base
<< ", memory_size:" << mem_size << ", weight_size:" << weight_size << ", var_size:" << var_size
<< ", ex_memory_info:";
for (auto it : memory_infos) {
ss << "[memory_type:" << it.first << ", memory_size:" << it.second.memory_size << "]";
}
return ss.str();
}

uint64_t mem_size = 0;
uint64_t logic_mem_base = 0;
uint8_t *mem_base = nullptr;
@@ -49,7 +65,7 @@ struct RuntimeParam {
uint64_t var_size = 0;
uint64_t logic_var_base = 0;
uint8_t *var_base = nullptr;
std::map<uint32_t, MemInfo> memory_infos;
std::map<uint64_t, MemInfo> memory_infos;
uint32_t batch_num = 0;
uint32_t stream_num = 0;
uint32_t event_num = 0;


+ 2
- 2
ge/graph/manager/graph_caching_allocator.cc View File

@@ -21,7 +21,7 @@
#include <utility>

#include "framework/common/debug/ge_log.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"

namespace ge {
const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize,
@@ -117,7 +117,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) {
}
free_block_bins_[i] = bin_ptr;
}
memory_allocator_ = MemManager::Instance(memory_type_);
memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
if (memory_allocator_ == nullptr) {
return ACL_ERROR_GE_INTERNAL_ERROR;
}


+ 1
- 1
ge/graph/manager/graph_caching_allocator.h View File

@@ -88,8 +88,8 @@ class CachingAllocator {
///
/// @ingroup ge_graph
/// @brief free memory
/// @param [in] memory_ptr memory address ptr
/// @param [in] device_id device id
/// @param [out] memory_ptr memory address ptr
/// @return Status result of function
///
Status Free(uint8_t *memory_addr, uint32_t device_id = 0);


+ 3
- 113
ge/graph/manager/graph_mem_allocator.cc View File

@@ -17,11 +17,9 @@
#include "graph/manager/graph_mem_allocator.h"

#include <string>
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#include "graph/manager/host_mem_allocator.h"

namespace ge {
void MemoryAllocator::Initialize(uint32_t device_id) {
Status MemoryAllocator::Initialize(uint32_t device_id) {
GELOGI("MemoryAllocator::Initialize");

// when redo Initialize free memory
@@ -31,6 +29,7 @@ void MemoryAllocator::Initialize(uint32_t device_id) {
}
}
memory_base_map_.clear();
return SUCCESS;
}

void MemoryAllocator::Finalize(uint32_t device_id) {
@@ -152,113 +151,4 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic

return it->second.memory_addr_;
}

MemManager::MemManager() {}

MemManager::~MemManager() { Finalize(); }

MemManager &MemManager::Instance() {
static MemManager mem_manager;
return mem_manager;
}

MemoryAllocator *MemManager::Instance(rtMemType_t memory_type) { return Instance().GetMemoryAllocator(memory_type); }

Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
MemoryAllocator *memory_allocator = nullptr;
for (unsigned int index : memory_type) {
auto it = memory_allocator_map_.find(index);
if (it == memory_allocator_map_.end()) {
memory_allocator = new (std::nothrow) MemoryAllocator(index);

if (memory_allocator != nullptr) {
memory_allocator_map_[index] = memory_allocator;
GELOGI("Create MemoryAllocator memory type[%u] success.", index);
} else {
REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed.");
}
} else {
memory_allocator = it->second;
}

if (memory_allocator == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
} else {
memory_allocator->Initialize(0);
}
}

auto ret = InitAllocator(memory_type, caching_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create CachingAllocator failed.");
return ret;
}

ret = InitAllocator(memory_type, rdma_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create RdmaAllocator failed.");
return ret;
}

ret = InitAllocator(memory_type, host_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create HostMemAllocator failed.");
return ret;
}
return SUCCESS;
}

template <typename T>
void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) {
for (auto &allocator : allocate_map) {
if (allocator.second != nullptr) {
allocator.second->Finalize();
delete allocator.second;
allocator.second = nullptr;
}
}
allocate_map.clear();
}

void MemManager::Finalize() noexcept {
GELOGI("Finalize.");
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
// caching and rdma allocator use memory allocator, so finalize them first
FinalizeAllocatorMap(caching_allocator_map_);
FinalizeAllocatorMap(rdma_allocator_map_);
FinalizeAllocatorMap(host_allocator_map_);
FinalizeAllocatorMap(memory_allocator_map_);
}

MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) {
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
MemoryAllocator *memory_allocator = nullptr;
auto it = memory_allocator_map_.find(memory_type);
if (it != memory_allocator_map_.end()) {
memory_allocator = it->second;
}

// Usually impossible
if (memory_allocator == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type);
static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED);
return &default_memory_allocator;
}

return memory_allocator;
}

CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, caching_allocator_map_);
}

RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, rdma_allocator_map_);
}
HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, host_allocator_map_);
}
} // namespace ge

+ 2
- 106
ge/graph/manager/graph_mem_allocator.h View File

@@ -26,7 +26,6 @@

#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "graph/manager/host_mem_allocator.h"
#include "graph/node.h"
#include "runtime/mem.h"

@@ -71,9 +70,9 @@ class MemoryAllocator {
/// @ingroup ge_graph
/// @brief memory allocator init
/// @param [in] options user config params
/// @return void
/// @return Status of init
///
void Initialize(uint32_t device_id = 0);
Status Initialize(uint32_t device_id = 0);

///
/// @ingroup ge_graph
@@ -136,109 +135,6 @@ class MemoryAllocator {
bool mem_malloced_;
map<string, MemoryInfo> memory_base_map_;
};

using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
class CachingAllocator;
class RdmaPoolAllocator;
class MemManager {
public:
MemManager();
virtual ~MemManager();
static MemManager &Instance();
static MemoryAllocator *Instance(rtMemType_t memory_type);
CachingAllocator &CachingInstance(rtMemType_t memory_type);
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
MemManager(const MemManager &) = delete;
MemManager &operator=(const MemManager &) = delete;
///
/// @ingroup ge_graph
/// @brief memory allocator manager init
/// @param [in] options user config params
/// @return Status result of function
///
Status Initialize(const std::vector<rtMemType_t> &memory_type);

///
/// @ingroup ge_graph
/// @brief memory allocator finalize
/// @return void
///
void Finalize() noexcept;

private:
///
/// @ingroup ge_graph
/// @brief ge memory allocator
/// @param [in] memory_type memory type
/// @return MemoryAllocator ptr
///
MemoryAllocator *GetMemoryAllocator(rtMemType_t memory_type);

///
/// @ingroup ge_graph
/// @param [in] memory_type memory type
/// @param [in] allocate_map memory allocator map
/// @return Status result of function
///
template <typename T>
Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) {
T *allocator = nullptr;
for (unsigned int index : memory_type) {
auto it = allocate_map.find(index);
if (it == allocate_map.end()) {
allocator = new (std::nothrow) T(index);
if (allocator != nullptr) {
allocate_map[index] = allocator;
GELOGI("Create Allocator memory type[%u] success.", index);
} else {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed.");
}
} else {
allocator = it->second;
}

if (allocator == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
} else {
if (allocator->Initialize() != SUCCESS) {
return ACL_ERROR_GE_INTERNAL_ERROR;
}
}
}
return SUCCESS;
}
///
/// @ingroup ge_graph
/// @param [in] memory_type memory type
/// @param [in] allocate_map memory allocator map
/// @return Allocator ptr
///
template <typename T>
T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) {
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
T *allocator = nullptr;
auto it = allocate_map.find(memory_type);
if (it != allocate_map.end()) {
allocator = it->second;
}

// Usually impossible
if (allocator == nullptr) {
GELOGW("Get allocator failed, memory type is %u.", memory_type);
static T default_allocator(RT_MEMORY_RESERVED);
return default_allocator;
}
return *allocator;
}

std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
std::recursive_mutex allocator_mutex_;
};
} // namespace ge

#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_

+ 114
- 0
ge/graph/manager/graph_mem_manager.cc View File

@@ -0,0 +1,114 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/manager/graph_mem_manager.h"

#include <string>

namespace ge {
MemManager::MemManager() {}

MemManager::~MemManager() { Finalize(); }

MemManager &MemManager::Instance() {
static MemManager mem_manager;
return mem_manager;
}

Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
if (init_) {
GELOGW("MemManager has been inited.");
return SUCCESS;
}

auto ret = InitAllocator(memory_type, memory_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create MemoryAllocator failed.");
return ret;
}

ret = InitAllocator(memory_type, caching_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create CachingAllocator failed.");
return ret;
}

ret = InitAllocator(memory_type, rdma_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create RdmaAllocator failed.");
return ret;
}

ret = InitAllocator(memory_type, host_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create HostMemAllocator failed.");
return ret;
}

ret = InitAllocator(memory_type, session_scope_allocator_map_);
if (ret != SUCCESS) {
GELOGE(ret, "Create HostMemAllocator failed.");
return ret;
}
init_ = true;
return SUCCESS;
}

template <typename T>
void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) {
for (auto &allocator : allocate_map) {
if (allocator.second != nullptr) {
allocator.second->Finalize();
delete allocator.second;
allocator.second = nullptr;
}
}
allocate_map.clear();
}

void MemManager::Finalize() noexcept {
GELOGI("Finalize.");
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
// caching and rdma allocator use memory allocator, so finalize them first
FinalizeAllocatorMap(session_scope_allocator_map_);
FinalizeAllocatorMap(caching_allocator_map_);
FinalizeAllocatorMap(rdma_allocator_map_);
FinalizeAllocatorMap(host_allocator_map_);
FinalizeAllocatorMap(memory_allocator_map_);
init_ = false;
}

MemoryAllocator &MemManager::MemInstance(rtMemType_t memory_type) {
return GetAllocator(memory_type, memory_allocator_map_);
}

CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
return GetAllocator(memory_type, caching_allocator_map_);
}

RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
return GetAllocator(memory_type, rdma_allocator_map_);
}

HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
return GetAllocator(memory_type, host_allocator_map_);
}

SessionScopeMemAllocator &MemManager::SessionScopeMemInstance(rtMemType_t memory_type) {
return GetAllocator(memory_type, session_scope_allocator_map_);
}
} // namespace ge

+ 141
- 0
ge/graph/manager/graph_mem_manager.h View File

@@ -0,0 +1,141 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_
#define GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_

#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <vector>

#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#include "graph/manager/session_scope_mem_allocator.h"
#include "graph/node.h"
#include "runtime/mem.h"

namespace ge {
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;

class MemManager {
public:
MemManager();
virtual ~MemManager();
static MemManager &Instance();
MemoryAllocator &MemInstance(rtMemType_t memory_type);
CachingAllocator &CachingInstance(rtMemType_t memory_type);
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
SessionScopeMemAllocator &SessionScopeMemInstance(rtMemType_t memory_type);
MemManager(const MemManager &) = delete;
MemManager &operator=(const MemManager &) = delete;
///
/// @ingroup ge_graph
/// @brief memory allocator manager init
/// @param [in] options user config params
/// @return Status result of function
///
Status Initialize(const std::vector<rtMemType_t> &memory_type);

///
/// @ingroup ge_graph
/// @brief memory allocator finalize
/// @return void
///
void Finalize() noexcept;

const std::vector<rtMemType_t> &GetAllMemoryType() const { return memory_type_; }

private:
///
/// @ingroup ge_graph
/// @param [in] memory_type memory type
/// @param [in] allocate_map memory allocator map
/// @return Status result of function
///
template <typename T>
Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) {
T *allocator = nullptr;
for (unsigned int index : memory_type) {
auto it = allocate_map.find(index);
if (it == allocate_map.end()) {
allocator = new (std::nothrow) T(index);
if (allocator != nullptr) {
allocate_map[index] = allocator;
GELOGI("Create Allocator memory type[%u] success.", index);
} else {
REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed.");
}
} else {
allocator = it->second;
}

if (allocator == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
} else {
if (allocator->Initialize() != SUCCESS) {
return ACL_ERROR_GE_INTERNAL_ERROR;
}
}
}
return SUCCESS;
}
///
/// @ingroup ge_graph
/// @param [in] memory_type memory type
/// @param [in] allocate_map memory allocator map
/// @return Allocator ptr
///
template <typename T>
T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) {
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_);
T *allocator = nullptr;
auto it = allocate_map.find(memory_type);
if (it != allocate_map.end()) {
allocator = it->second;
}

// Usually impossible
if (allocator == nullptr) {
GELOGW("Get allocator failed, memory type is %u.", memory_type);
static T default_allocator(RT_MEMORY_RESERVED);
return default_allocator;
}
return *allocator;
}

std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
std::map<rtMemType_t, SessionScopeMemAllocator *> session_scope_allocator_map_;
std::recursive_mutex allocator_mutex_;
std::vector<rtMemType_t> memory_type_;
bool init_ = false;
};
} // namespace ge

#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_

+ 5
- 6
ge/graph/manager/graph_var_manager.cc View File

@@ -17,8 +17,7 @@
#include "graph/manager/graph_var_manager.h"

#include "graph/debug/ge_attr_define.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h"
#include "graph/utils/type_utils.h"

@@ -728,7 +727,7 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) {
var_memory_size = (var_memory_size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize;

const string purpose("variables and constant op memory in training network.");
var_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, var_memory_size);
var_mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, var_memory_size);
if (var_mem_base == nullptr) {
GELOGE(ge::INTERNAL_ERROR,
"VarManager::MallocVarMemory failed "
@@ -745,7 +744,7 @@ uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) {
return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr();
}
string memory_key = std::to_string(session_id_);
return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key);
return MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(memory_key);
}

uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) {
@@ -754,7 +753,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty
return logic_addr;
}
string mem_key = std::to_string(session_id_);
uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key);
uint8_t *mem_base = MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(mem_key);
if (mem_base == nullptr) {
return nullptr;
}
@@ -766,7 +765,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty
ge::Status VarManager::FreeVarMemory() {
std::lock_guard<std::recursive_mutex> lock(mutex_);
string memory_key = std::to_string(SessionId());
return MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key);
return MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key);
}

ge::Status VarManager::SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) {


+ 1
- 1
ge/graph/manager/memory_api.cc View File

@@ -19,7 +19,7 @@
#include <memory>

#include "common/ge/plugin_manager.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "graph/manager/host_mem_manager.h"
#include "graph/manager/rdma_pool_allocator.h"
#include "graph/utils/type_utils.h"


+ 2
- 1
ge/graph/manager/rdma_pool_allocator.cc View File

@@ -20,6 +20,7 @@
#include "framework/common/debug/ge_log.h"
#include "graph/ge_context.h"
#include "runtime/dev.h"
#include "graph/manager/graph_mem_manager.h"

namespace {
const size_t kAlignedSize = 512;
@@ -49,7 +50,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type)
})) {}

Status RdmaPoolAllocator::Initialize() {
memory_allocator_ = MemManager::Instance(memory_type_);
memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
if (memory_allocator_ == nullptr) {
return ACL_ERROR_GE_INTERNAL_ERROR;
}


+ 85
- 0
ge/graph/manager/session_scope_mem_allocator.cc View File

@@ -0,0 +1,85 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/manager/session_scope_mem_allocator.h"

#include <set>
#include <string>
#include <utility>

#include "framework/common/debug/ge_log.h"
#include "graph/manager/graph_mem_manager.h"

namespace ge {

SessionScopeMemAllocator::SessionScopeMemAllocator(rtMemType_t memory_type)
: memory_type_(memory_type), memory_allocator_(nullptr) {}

Status SessionScopeMemAllocator::Initialize(uint32_t device_id) {
GELOGI("Device id %u", device_id);
// when redo Initialize free old memory
FreeAllMemory();
std::lock_guard<std::recursive_mutex> lock(mutex_);
memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
if (memory_allocator_ == nullptr) {
return ACL_ERROR_GE_INTERNAL_ERROR;
}
return ge::SUCCESS;
}

void SessionScopeMemAllocator::Finalize(uint32_t device_id) {
GELOGI("Device id %u", device_id);
FreeAllMemory();
}

uint8_t *SessionScopeMemAllocator::Malloc(size_t size, uint64_t session_id, uint32_t device_id) {
GELOGI("Start malloc memory, size:%zu, session id:%lu device id:%u", size, session_id, device_id);
const std::string purpose = "Memory for session scope.";
auto ptr = memory_allocator_->MallocMemory(purpose, size, device_id);
if (ptr == nullptr) {
GELOGE(ge::FAILED, "Malloc failed, no enough memory for size:%zu, session_id:%lu device_id:%u", size,
session_id, device_id);
return nullptr;
}
std::lock_guard<std::recursive_mutex> lock(mutex_);
std::shared_ptr<uint8_t> mem_ptr(ptr, [&](uint8_t *p) { (void)memory_allocator_->FreeMemory(p); });
allocated_memory_[session_id].emplace_back(size, mem_ptr);
return ptr;
}

Status SessionScopeMemAllocator::Free(uint64_t session_id, uint32_t device_id) {
GELOGI("Free session:%lu memory, device id:%u.", session_id, device_id);
std::lock_guard<std::recursive_mutex> lock(mutex_);
auto it = allocated_memory_.find(session_id);
if (it == allocated_memory_.end()) {
REPORT_INNER_ERROR("E19999", "Param memory not allocated before, session_id:%lu device_id:%u, check invalid",
session_id, device_id);
GELOGE(PARAM_INVALID, "Invalid session_id");
return ge::PARAM_INVALID;
}
allocated_memory_.erase(it);
return ge::SUCCESS;
}

void SessionScopeMemAllocator::FreeAllMemory() {
GELOGI("Free all memory");
std::lock_guard<std::recursive_mutex> lock(mutex_);
for (auto &session_mem : allocated_memory_) {
session_mem.second.clear();
}
allocated_memory_.clear();
}
} // namespace ge

+ 123
- 0
ge/graph/manager/session_scope_mem_allocator.h View File

@@ -0,0 +1,123 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_
#define GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_

#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#include <unordered_map>
#include <functional>

#include "framework/common/ge_inner_error_codes.h"
#include "graph/node.h"
#include "graph/manager/block_memory.h"
#include "runtime/mem.h"
#include "graph/manager/graph_mem_allocator.h"

namespace ge {
class SessionScopeMemoryInfo {
public:
SessionScopeMemoryInfo(size_t size, const std::shared_ptr<uint8_t> &ptr) : size(size), ptr(ptr) {}
SessionScopeMemoryInfo() = delete;
virtual ~SessionScopeMemoryInfo() = default;

SessionScopeMemoryInfo(const SessionScopeMemoryInfo &other) {
if (&other == this) {
return;
}
size = other.size;
ptr = other.ptr;
};

SessionScopeMemoryInfo &operator=(const SessionScopeMemoryInfo &other) {
if (&other == this) {
return *this;
}
size = other.size;
ptr = other.ptr;
};

private:
size_t size = 0;
std::shared_ptr<uint8_t> ptr = nullptr;
};

class SessionScopeMemAllocator {
public:
explicit SessionScopeMemAllocator(rtMemType_t memory_type);

SessionScopeMemAllocator(const SessionScopeMemAllocator &) = delete;

SessionScopeMemAllocator &operator=(const SessionScopeMemAllocator &) = delete;

virtual ~SessionScopeMemAllocator() = default;

///
/// @ingroup ge_graph
/// @brief caching allocator init
/// @param [in] device id
/// @return Status of init
///
Status Initialize(uint32_t device_id = 0);

///
/// @ingroup ge_graph
/// @brief memory allocator finalize, release all memory
/// @return void
///
void Finalize(uint32_t device_id = 0);

///
/// @ingroup ge_graph
/// @brief malloc memory
/// @param [in] size memory size
/// @param [in] session_id session id
/// @param [in] device id
/// @return memory address
///
uint8_t *Malloc(size_t size, uint64_t session_id, uint32_t device_id = 0);

///
/// @ingroup ge_graph
/// @brief free memory
/// @param [in] session_id session id
/// @param [in] device_id device id
/// @return Status result of function
///
Status Free(uint64_t session_id, uint32_t device_id = 0);

private:
void FreeAllMemory();

private:
rtMemType_t memory_type_;

// device memory allocator
MemoryAllocator *memory_allocator_;

// lock around all operations
mutable std::recursive_mutex mutex_;

// allocated blocks by memory pointer
std::unordered_map<uint64_t, std::vector<SessionScopeMemoryInfo>> allocated_memory_;
};
} // namespace ge
#endif // GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_

+ 1
- 4
ge/hybrid/common/npu_memory_allocator.cc View File

@@ -17,10 +17,7 @@
#include "npu_memory_allocator.h"
#include <mutex>
#include "framework/common/debug/log.h"
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"

namespace ge {
namespace hybrid {


+ 1
- 2
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -26,8 +26,7 @@
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/host_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "graph/utils/graph_utils.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/node_executor/node_executor.h"


+ 1
- 2
ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc View File

@@ -18,8 +18,7 @@
#include "hybrid/node_executor/host_cpu/kernel_factory.h"
#include "graph/passes/folding_pass.h"
#include "hybrid/model/hybrid_model.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "ge_local_engine/engine/host_cpu_engine.h"

namespace ge {


+ 1
- 1
ge/init/gelib.cc View File

@@ -39,7 +39,7 @@
#include "graph/ge_context.h"
#include "graph/ge_global_options.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "graph/manager/host_mem_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "runtime/kernel.h"


+ 6
- 0
ge/session/inner_session.cc View File

@@ -32,6 +32,7 @@
#include "graph/common/local_context.h"
#include "graph/load/model_manager/model_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/graph_mem_manager.h"
#include "graph/utils/tensor_adapter.h"
#include "runtime/mem.h"

@@ -155,6 +156,11 @@ Status InnerSession::Finalize() {
// release var memory
GELOGI("VarManager free var memory.");
(void)VarManager::Instance(session_id_)->FreeVarMemory();

for (auto memory_type : MemManager::Instance().GetAllMemoryType()) {
(void)MemManager::Instance().SessionScopeMemInstance(memory_type).Free(session_id_);
}

// release analyzer saved info(Session Level)
Analyzer::GetInstance()->DestroySessionJsonObject(session_id_);



+ 1
- 2
ge/single_op/single_op_manager.cc View File

@@ -19,8 +19,7 @@
#include <mutex>
#include <string>

#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/graph_mem_manager.h"

namespace ge {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() {


+ 1
- 1
inc/framework/memory/memory_assigner.h View File

@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner {

MemoryAssigner &operator=(const MemoryAssigner &) = delete;

Status AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
Status AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);

private:
ge::ComputeGraphPtr compute_graph_;


+ 6
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -337,8 +337,10 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
"${GE_CODE_DIR}/ge/graph/common/local_context.cc"
"${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
"${GE_CODE_DIR}/ge/common/model_saver.cc"
"${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc"
@@ -396,8 +398,10 @@ set(GRAPH_LOAD_COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc"
"${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc"
"${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc"
"${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc"
"${GE_CODE_DIR}/ge/common/thread_pool.cc"
)

@@ -792,6 +796,7 @@ set(MULTI_PARTS_TEST_FILES
"graph/preprocess/graph_preprocess_unittest.cc"
"graph/manager/hcom_util_unittest.cc"
"graph/manager/graph_caching_allocator_unittest.cc"
"graph/manager/session_scope_mem_allocator_unittest.cc"
"graph/manager/run_graph_unittest.cc"
"graph/partition/dynamic_shape_partition_unittest.cc"
"graph/manager/graph_manager_unittest.cc"
@@ -824,6 +829,7 @@ set(PROFILING_MNG_TEST_FILES
set(HYBRID_TEST_FILES
"hybrid/ge_hybrid_unittest.cc"
"hybrid/known_node_executor_unittest.cc"
"hybrid/executor/worker/execution_engine_unittest.cc"
"hybrid/executor/subgraph_executor_unittest.cc"
"hybrid/executor/worker/execution_engine_unittest.cc"
"hybrid/model/hybrid_model_builder_unittest.cc"


+ 208
- 27
tests/ut/ge/graph/build/mem_assigner_unittest.cc View File

@@ -44,7 +44,8 @@ using domi::GetContext;

class UtestMemoryAssignerTest : public testing::Test {
public:
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", int64_t size = 1024) {
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some",
int64_t size = 1024) {
ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
auto desc_temp = *desc_temp_ptr;
@@ -214,7 +215,8 @@ class UtestMemoryAssignerTest : public testing::Test {

return builder.GetGraph();
}
void make_ffts_reuse_graph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId,

void MakeFftsReuseGraph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId,
int32_t thread_scope_id_2 = kInvalidThreadScopeId) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
@@ -253,28 +255,119 @@ class UtestMemoryAssignerTest : public testing::Test {
graph->TopologicalSorting();
}

void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);

std::vector<int64_t> workspace_bytes;
workspace_bytes.push_back(1024);
workspace_bytes.push_back(512);
op_def_c->SetWorkspaceBytes(workspace_bytes);
vector<int32_t> workspace_no_reuse_scope = { 0 , 1 };
(void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);

vector<int32_t> workspace_no_reuse_scope_e = { 1 };
(void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e);

ge::NodePtr node_a = graph->AddNode(op_def_a);
ge::NodePtr node_b = graph->AddNode(op_def_b);
ge::NodePtr node_c = graph->AddNode(op_def_c);
ge::NodePtr node_d = graph->AddNode(op_def_d);
ge::NodePtr node_e = graph->AddNode(op_def_e);
ge::NodePtr node_f = graph->AddNode(op_def_f);
ge::NodePtr node_g = graph->AddNode(op_def_g);

ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
graph->TopologicalSorting();
}

void MakeContinuousReuseGraph(ge::ComputeGraphPtr graph, bool nopading = false) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);

if (nopading) {
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true);
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, true);
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_OUTPUT_REUSE_INPUT, true);
(void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, 0);
} else {
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_INPUT, true);
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_OUTPUT, true);
}

ge::NodePtr node_a = graph->AddNode(op_def_a);
ge::NodePtr node_b = graph->AddNode(op_def_b);
ge::NodePtr node_c = graph->AddNode(op_def_c);
ge::NodePtr node_d = graph->AddNode(op_def_d);
ge::NodePtr node_e = graph->AddNode(op_def_e);
ge::NodePtr node_f = graph->AddNode(op_def_f);
ge::NodePtr node_g = graph->AddNode(op_def_g);

ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
graph->TopologicalSorting();
}

void MakeMultiBatchReuseGraph(ge::ComputeGraphPtr graph) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL);
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);

(void)ge::AttrUtils::SetStr(op_def_b, ATTR_NAME_BATCH_LABEL, "Batch_0");
(void)ge::AttrUtils::SetStr(op_def_c, ATTR_NAME_BATCH_LABEL, "Batch_0");
(void)ge::AttrUtils::SetStr(op_def_e, ATTR_NAME_BATCH_LABEL, "Batch_1");
(void)ge::AttrUtils::SetStr(op_def_f, ATTR_NAME_BATCH_LABEL, "Batch_1");
vector<int32_t> workspace_no_reuse_scope = { 1 };
(void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);
(void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);

ge::NodePtr node_a = graph->AddNode(op_def_a);
ge::NodePtr node_b = graph->AddNode(op_def_b);
ge::NodePtr node_c = graph->AddNode(op_def_c);
ge::NodePtr node_d = graph->AddNode(op_def_d);
ge::NodePtr node_e = graph->AddNode(op_def_e);
ge::NodePtr node_f = graph->AddNode(op_def_f);
ge::NodePtr node_g = graph->AddNode(op_def_g);

ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
graph->TopologicalSorting();
}

protected:
void SetUp() {}

void TearDown() { GetContext().out_nodes_map.clear(); }
};

/*
TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
ge::NodePtr node_a = graph->AddNode(op_def_a);
MemoryBlock* memory_block = new MemoryBlock(0);
memory_block->Init(1, kOutput, node_a, 0, 1);
memory_block->real_size_list_.clear();
memory_block->Resize();

EXPECT_EQ(memory_block->Size(), 0);

delete memory_block;
}
*/

namespace ge {

class MockBlockMemAssigner : public BlockMemAssigner {
@@ -313,12 +406,44 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) {
EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600);
}

TEST_F(UtestMemoryAssignerTest, block_memory_assign_nopading_continuous_memory) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeContinuousReuseGraph(graph, true);
HybridMemAssigner hybridMemAssigner(graph);
ge::Status ret = hybridMemAssigner.Assign();
size_t offset = 0;
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
offset = it->second;
}

EXPECT_EQ(offset, 8192);
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestMemoryAssignerTest, block_memory_assign_continuous_memory) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeContinuousReuseGraph(graph);
map<uint64_t, size_t> mem_offset;
size_t zero_copy_mem_size = 0;
MemoryAssigner memoryAssigner(graph);
ge::Status ret = memoryAssigner.AssignMemory(false, mem_offset, zero_copy_mem_size);
size_t offset = 0;
auto it = mem_offset.find(RT_MEMORY_HBM);
if (it != mem_offset.end()) {
offset = it->second;
}

EXPECT_EQ(offset, 11264);
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeGraph(graph);
auto node_f = graph->FindNode("F");
MemoryAssigner memory_assigner(graph);
map<int64_t, size_t> mem_offset;
map<uint64_t, size_t> mem_offset;
size_t zero_memory_size = 0;
EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);

@@ -335,7 +460,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) {
std::string value = "A";
(void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value);
MemoryAssigner memory_assigner(graph);
map<int64_t, size_t> mem_offset;
map<uint64_t, size_t> mem_offset;
size_t zero_memory_size = 0;
VarManager::Instance(0)->Init(0, 0, 0, 0);
EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
@@ -356,7 +481,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) {
std::string value = "M";
(void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value);
MemoryAssigner memory_assigner(graph);
map<int64_t, size_t> mem_offset;
map<uint64_t, size_t> mem_offset;
size_t zero_memory_size = 0;
VarManager::Instance(0)->Init(0, 0, 0, 0);
EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
@@ -460,30 +585,86 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace)

TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
make_ffts_reuse_graph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId);
MakeFftsReuseGraph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId);
HybridMemAssigner hybridMemAssigner(graph);
ge::Status ret = hybridMemAssigner.Assign();
size_t offset = hybridMemAssigner.GetMemOffset();
size_t offset = 0;
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
offset = it->second;
}
EXPECT_EQ(offset, 5120);
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
make_ffts_reuse_graph(graph, 0, 1);
MakeFftsReuseGraph(graph, 0, 1);
HybridMemAssigner hybridMemAssigner(graph);
ge::Status ret = hybridMemAssigner.Assign();
size_t offset = hybridMemAssigner.GetMemOffset();
size_t offset = 0;
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
offset = it->second;
}
EXPECT_EQ(offset, 6656);
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
make_ffts_reuse_graph(graph, 0, kInvalidThreadScopeId);
MakeFftsReuseGraph(graph, 0, kInvalidThreadScopeId);
HybridMemAssigner hybridMemAssigner(graph);
ge::Status ret = hybridMemAssigner.Assign();
size_t offset = hybridMemAssigner.GetMemOffset();
size_t offset = 0;
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
offset = it->second;
}
EXPECT_EQ(offset, 5632);
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestMemoryAssignerTest, one_session_scope_op) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeSessionScopeReuseGraph(graph);
HybridMemAssigner hybridMemAssigner(graph);
ge::Status ret = hybridMemAssigner.Assign();
size_t offset = 0;
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
offset = it->second;
}

auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
size_t session_scope_offset = 0;
it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
session_scope_offset = it->second;
}
EXPECT_EQ(offset, 5120);
EXPECT_EQ(session_scope_offset, 1536);
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestMemoryAssignerTest, multi_batch_reuse) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeMultiBatchReuseGraph(graph);
HybridMemAssigner hybridMemAssigner(graph);
ge::Status ret = hybridMemAssigner.Assign();
size_t offset = 0;
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
offset = it->second;
}

auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM);
size_t session_scope_offset = 0;
it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope);
if (it != hybridMemAssigner.GetMemOffsets().end()) {
session_scope_offset = it->second;
}
EXPECT_EQ(offset, 6656);
EXPECT_EQ(session_scope_offset, 1536);
EXPECT_EQ(ret, SUCCESS);
}

+ 54
- 0
tests/ut/ge/graph/build/model_builder_unittest.cc View File

@@ -30,6 +30,7 @@
#define protected public
#define private public
#include "graph/build/model_builder.h"
#include "memory/memory_assigner.h"
#undef protected
#undef private

@@ -127,6 +128,41 @@ class UtestModelBuilderTest : public testing::Test {
graph->TopologicalSorting();
}

void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0);
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512);
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512);
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024);
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512);
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0);

std::vector<int64_t> workspace_bytes;
workspace_bytes.push_back(1024);
workspace_bytes.push_back(512);
op_def_c->SetWorkspaceBytes(workspace_bytes);
vector<int32_t> workspace_no_reuse_scope = { 0 , 1 };
(void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope);

vector<int32_t> workspace_no_reuse_scope_e = { 1 };
(void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e);

ge::NodePtr node_a = graph->AddNode(op_def_a);
ge::NodePtr node_b = graph->AddNode(op_def_b);
ge::NodePtr node_c = graph->AddNode(op_def_c);
ge::NodePtr node_d = graph->AddNode(op_def_d);
ge::NodePtr node_e = graph->AddNode(op_def_e);
ge::NodePtr node_f = graph->AddNode(op_def_f);
ge::NodePtr node_g = graph->AddNode(op_def_g);

ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
graph->TopologicalSorting();
}

protected:
void SetUp() {}
@@ -162,6 +198,24 @@ TEST_F(UtestModelBuilderTest, test_save_atomic_bin) {
EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS);
}

TEST_F(UtestModelBuilderTest, build_model_for_get_task) {
Graph2SubGraphInfoList subgraphs;
std::map<std::string, int> stream_max_parallel_num;
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeSessionScopeReuseGraph(graph);
std::map<std::string, std::string> option;
ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false);

MemoryAssigner mem_assigner(graph);
EXPECT_EQ(mem_assigner.AssignMemory(false, builder.mem_type_to_mem_offset_, builder.zero_copy_mem_size_), SUCCESS);

ge::Model model;
EXPECT_EQ(builder.BuildModelDef(model), SUCCESS);
int64_t session_scope_mem_offset = 0;
ge::AttrUtils::GetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset);
EXPECT_EQ(session_scope_mem_offset, 1536);
}

TEST_F(UtestModelBuilderTest, test_model_save) {
Graph2SubGraphInfoList subgraphs;
std::map<std::string, int> stream_max_parallel_num;


+ 6
- 1
tests/ut/ge/graph/ge_executor_unittest.cc View File

@@ -43,6 +43,7 @@
#include "graph/manager/graph_mem_allocator.h"
#include "graph/utils/graph_utils.h"
#include "proto/ge_ir.pb.h"
#include "graph/manager/graph_var_manager.h"
#undef private
#undef protected

@@ -194,6 +195,11 @@ TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) {
}

TEST_F(UtestGeExecutor, execute_graph_with_stream) {
VarManager::Instance(0)->Init(0, 0, 0, 0);
map<string, string> options;
options[GRAPH_MEMORY_MAX_SIZE] = "1048576";
VarManager::Instance(0)->SetMemoryMallocSize(options);

DavinciModel model(0, nullptr);
ComputeGraphPtr graph = make_shared<ComputeGraph>("default");

@@ -278,7 +284,6 @@ TEST_F(UtestGeExecutor, execute_graph_with_stream) {
OutputData output_data;
vector<Tensor> outputs;
EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);

GraphExecutor graph_executer;
graph_executer.init_flag_ = true;


+ 1
- 2
tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc View File

@@ -28,8 +28,7 @@
#define protected public
#define private public
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#undef protected
#undef private


+ 75
- 0
tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc View File

@@ -0,0 +1,75 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <memory>

#include "graph/anchor.h"
#include "graph/attr_value.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "omg/omg_inner_types.h"

#define protected public
#define private public
#include "graph/manager/graph_mem_manager.h"
#undef protected
#undef private

using namespace std;
using namespace testing;
using namespace ge;
using domi::GetContext;

class UtestSessionScopeMemAllocator : public testing::Test {
protected:
void SetUp() {}

void TearDown() { GetContext().out_nodes_map.clear(); }
};

TEST_F(UtestSessionScopeMemAllocator, initialize_success) {
std::vector<rtMemType_t> mem_type;
mem_type.push_back(RT_MEMORY_HBM);
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
MemManager::Instance().Finalize();
}

TEST_F(UtestSessionScopeMemAllocator, malloc_success) {
std::vector<rtMemType_t> mem_type;
mem_type.push_back(RT_MEMORY_HBM);
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(1000, 0);
EXPECT_NE(nullptr, ptr);
MemManager::Instance().Finalize();
}

TEST_F(UtestSessionScopeMemAllocator, free_success) {
std::vector<rtMemType_t> mem_type;
mem_type.push_back(RT_MEMORY_HBM);
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS);
uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
EXPECT_NE(nullptr, ptr);
ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0);
EXPECT_NE(nullptr, ptr);

EXPECT_EQ(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0));
EXPECT_NE(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0));
MemManager::Instance().Finalize();
}

+ 1
- 1
tests/ut/ge/graph/passes/variable_op_pass_unittest.cc View File

@@ -35,7 +35,7 @@
#include "graph/manager/graph_context.h"
#include "graph/optimize/graph_optimize.h"
#include "graph/manager/util/variable_accelerate_ctrl.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/graph_mem_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "graph_builder_utils.h"
#include "cce/dnn.h"


Loading…
Cancel
Save