Browse Source

Continuous memory optimization, code refactoring

tags/v1.2.0
TangQunzhang 3 years ago
parent
commit
14732acd6f
8 changed files with 503 additions and 708 deletions
  1. +2
    -2
      ge/graph/build/memory/binary_block_mem_assigner.cc
  2. +175
    -89
      ge/graph/build/memory/block_mem_assigner.cc
  3. +40
    -14
      ge/graph/build/memory/block_mem_assigner.h
  4. +277
    -578
      ge/graph/build/memory/graph_mem_assigner.cc
  5. +4
    -20
      ge/graph/build/memory/graph_mem_assigner.h
  6. +3
    -3
      ge/graph/load/new_model_manager/davinci_model.cc
  7. +1
    -1
      metadef
  8. +1
    -1
      parser

+ 2
- 2
ge/graph/build/memory/binary_block_mem_assigner.cc View File

@@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
GELOGW("Vector all_memory_size is empty!");
return SUCCESS;
}
if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) {
GELOGE(FAILED, "dividend is 0!");
if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) {
GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front());
return FAILED;
}
// Memory size is 512 aligned, so it is not necessary to take less than 512


+ 175
- 89
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -65,10 +65,7 @@ void AlignMemOffset(size_t &mem_align_size) {
}

static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) {
auto left_node_op_desc = left.node->GetOpDesc();
auto right_node_op_desc = right.node->GetOpDesc();
if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)
&& (left_node_op_desc->GetId() < right_node_op_desc->GetId())) {
if (left.GetLifeBegin() < right.GetLifeBegin()) {
return true;
}
return false;
@@ -100,14 +97,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) {
auto left_node_op_desc = left.node->GetOpDesc();
auto right_node_op_desc = right.node->GetOpDesc();
if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) {
if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) {
if (left.life_time_end >= static_cast<size_t>(right_node_op_desc->GetId())) {
if (left.GetLifeBegin() < right.GetLifeBegin()) {
if (left.life_time_end >= right.GetLifeBegin()) {
return true;
}
} else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) {
} else if (left.GetLifeBegin() == right.GetLifeBegin()) {
return true;
} else {
if (right.life_time_end >= static_cast<size_t>(left_node_op_desc->GetId())) {
if (right.life_time_end >= left.GetLifeBegin()) {
return true;
}
}
@@ -325,12 +322,7 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_
size_t MemoryBlock::GetLifeBegin() {
size_t life_time = 0;
if (!node_type_index_list_.empty()) {
if (node_type_index_list_.front().node != nullptr) {
auto node_op_desc = node_type_index_list_.front().node->GetOpDesc();
if (node_op_desc != nullptr) {
life_time = node_op_desc->GetId();
}
}
life_time = node_type_index_list_.front().GetLifeBegin();
}
return life_time;
}
@@ -417,7 +409,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_
depend_stream_life_[stream_id_] = GetLifeBegin();
}

size_t MemoryBlock::GetLifeEnd() {
size_t MemoryBlock::GetLifeEnd() const {
if (!node_type_index_list_.empty()) {
return node_type_index_list_.back().life_time_end;
}
@@ -571,32 +563,29 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {

for (auto &out_anchor : n->GetAllOutDataAnchors()) {
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx());
bool reuse_input = false;
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS,
GELOGI("Get reuse_input failed"));

if (!reuse_input) {
int64_t size = 0;
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
batch_all_memory_size[batch_label].emplace_back(size);
if (batch_total_size.find(batch_label) == batch_total_size.end()) {
batch_total_size[batch_label] = size;
} else {
batch_total_size[batch_label] += size;
}
int64_t size = 0;
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.",
node_op_desc->GetName().c_str(), size);
return;);
batch_all_memory_size[batch_label].emplace_back(size);
if (batch_total_size.find(batch_label) == batch_total_size.end()) {
batch_total_size[batch_label] = size;
} else {
batch_total_size[batch_label] += size;
}

if (!anchor_to_symbol_.empty()) {
auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString());
if (iter1 == anchor_to_symbol_.end()) {
continue;
}
const std::string &symbol = iter1->second;
auto iter2 = symbol_size_.find(symbol);
if (iter2 == symbol_size_.end()) {
symbol_size_[symbol] = size;
} else if (size > static_cast<int64_t>(iter2->second)) {
iter2->second = size;
}
if (!anchor_to_symbol_.empty()) {
auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString());
if (iter1 == anchor_to_symbol_.end()) {
continue;
}
const std::string &symbol = iter1->second;
auto iter2 = symbol_size_.find(symbol);
if (iter2 == symbol_size_.end()) {
symbol_size_[symbol] = size;
} else if (size > static_cast<int64_t>(iter2->second)) {
iter2->second = size;
}
}
}
@@ -637,35 +626,17 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) {
return false;
}

void AddReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) {
string key = std::to_string(mem_block.Size());
key += "_" + std::to_string(mem_block.stream_id_);
key += "_" + std::to_string(mem_block.memory_type_);
auto it = reusable_block_counts.find(key);
if (it != reusable_block_counts.end()) {
it->second++;
} else {
reusable_block_counts[key] = 1;
}
}

void ReduceReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) {
string key = std::to_string(mem_block.Size());
key += "_" + std::to_string(mem_block.stream_id_);
key += "_" + std::to_string(mem_block.memory_type_);
auto it = reusable_block_counts.find(key);
if (it != reusable_block_counts.end()) {
if (it->second > 0) {
it->second--;
}
}
}

bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block,
size_t block_size, size_t real_size, bool continuous) {
bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) {
bool can_reuse = false;
if (reusable_block.Size() == block_size) {
can_reuse = true;
// in some continuous input case, continuous first input node's is not same as topo first node.
if (continuous_life_begin > 0) {
if (continuous_life_begin > reusable_block.GetLifeEnd()) {
can_reuse = true;
}
} else {
can_reuse = true;
}
}
return can_reuse;
}
@@ -676,6 +647,13 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) {
return false;
}
auto node_desc = n->GetOpDesc();
GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str());
return false;);
std::vector<int64_t> offsets_for_fusion = {};
bool has_lx_fusion_attr =
AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);

if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) {
auto out_anchor = n->GetOutDataAnchor(out_index);
GE_IF_BOOL_EXEC(out_anchor == nullptr,
@@ -698,16 +676,17 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
return false;);

// If GetBool fail, is_input_continuous is false.
bool is_input_continuous_no_padding = false;
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT,
is_input_continuous_no_padding);
if (is_input_continuous_no_padding) {
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous);
if (is_input_continuous) {
reset_zero_copy_flag = true;
return false;
has_lx_fusion_attr = true;
} else {
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
}
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);

GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()),
// lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later
GE_IF_BOOL_EXEC(is_input_continuous &&
(CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))),
GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index);
no_need_assign_memory = true;
return false;);
@@ -721,6 +700,10 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
// Only set attr one times.
if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) {
(void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
// lx fusion case assign max size for first block, so reuse as none continuous
GE_IF_BOOL_EXEC(has_lx_fusion_attr,
is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index);
return false;);
node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize();
}
peer_input_index = peer_in_anchor->GetIdx();
@@ -733,6 +716,95 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
return false;
}

bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) {
if (n == nullptr) {
return false;
}

int64_t max_node_life_time = 0;
int64_t continuous_input_node_life_time = 0;
if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) {
auto out_anchor = n->GetOutDataAnchor(out_index);
if(out_anchor == nullptr) {
return false;
}

// continuous input node's life time should be max
for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){
return false;
}
auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc();
GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr,
GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index);
return false;);

if(peer_in_node_desc->GetId() > max_node_life_time) {
max_node_life_time = peer_in_node_desc->GetId();
}

// If GetBool fail, is_input_continuous is false.
bool is_input_continuous = false;
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous);
if (!is_input_continuous) {
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
}
if (is_input_continuous) {
continuous_input_node_life_time = peer_in_node_desc->GetId();
}
}
}
return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ;
}

///
/// @ingroup GE
/// @brief Check continuous memory reuseable
/// @return void
///
bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) {
// n,peer_node_desc have been checked
auto node_desc = n->GetOpDesc();
auto peer_node_desc = peer_node->GetOpDesc();
continuous_life_begin_ = static_cast<size_t>(node_desc->GetId());
// lx fusion case check all continuous input node, firt input node's life time should be min
for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) {
if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) ||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) ||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) {
GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index);
return false;
}
auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc();
///
/// node2 node1 node3
/// | / / |
/// node5 node6
/// firt input node's life time is not min
/// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse
///
if (static_cast<size_t>(peer_out_node_desc->GetId()) < continuous_life_begin_) {
continuous_life_begin_ = static_cast<size_t>(peer_out_node_desc->GetId());
GELOGI(
"Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time,"
"min is node[%s] life[%zu]",
n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(),
peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_);
}
// when node3's output node5's life time is not max(node6 > node5), not reuse
if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(),
in_anchor->GetPeerOutAnchor()->GetIdx())) {
GELOGI(
"Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s "
"max life node",
n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(),
peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx());
return false;
}
}
return true;
}

///
/// @ingroup GE
/// @brief Check pre_reuse flag & post_reuse glag for each symbol
@@ -1018,8 +1090,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue);

// A node can reuse blocks of the same stream and preorder streams
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) {
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size);
if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) {
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_},
real_size, no_align_size);
if (mem_type == kOutput) {
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString());
if (iter != anchor_to_symbol_.end()) {
@@ -1028,7 +1101,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
}
reusable_block->continuous_block_ = continuous;
reusable_block->ref_count_++;
ReduceReusableBlockCount(*reusable_block, reusable_block_counts_);
reusable_blocks_[memory_type][stream_id].erase((++it).base());
return reusable_block;
}
@@ -1041,8 +1113,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,

// Data and netoutput need zero copy block
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);

block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId());
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size);
block->stream_id_ = node_op_desc->GetStreamId();
block->ref_count_++;
block->continuous_block_ = continuous;
@@ -1142,8 +1213,23 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
std::string symbol;
if (IsSymbolExist(node_index_io, symbol)) {
block = symbol_blocks_[symbol];
block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size);
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str());
return nullptr);
// reduce old size
size_t align_size = block->Size();
AlignMemOffset(align_size);
theory_memory_size_ -= align_size;

auto block_size = GetBlockSize(size, ranges);
block->SetSize(block_size);
block->SetLifeTimeEnd(life_time_);
block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size);
block->ref_count_++;

// add new size
align_size = block_size;
AlignMemOffset(align_size);
theory_memory_size_ += align_size;
} else {
int64_t max_size = size;
int64_t memory_type = RT_MEMORY_HBM;
@@ -1196,7 +1282,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
GELOGI("Get dst_reuse_input_index failed"));
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size());
out_count_reuse_input += 1;
reuse_input = true;
}
@@ -1237,7 +1322,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i
if (static_cast<uint32_t>(index) == output_index) {
if (node->GetOwnerComputeGraph() != nullptr) {
string graph_name = node->GetOwnerComputeGraph()->GetName();
GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(),
GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(),
op_desc->GetName().c_str(), index, op_desc->GetStreamId());
}
return true;
@@ -1275,7 +1360,6 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock
if (to_release->same_stream_) {
to_release->SetLifeTimeEnd(life_time_);
reusable_memory.emplace_back(to_release);
AddReusableBlockCount(*to_release, reusable_block_counts_);
}
}
}
@@ -1375,6 +1459,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
}

is_op_reuse_mem_ = true;
continuous_life_begin_ = 0;
if (op_reuse_env_valid_ == true) {
vector<string>::iterator it_name =
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName());
@@ -1426,7 +1511,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
continue;
}
// atomic can't be reused
bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic;
bool need_change = is_op_reuse_mem_ && is_atomic;
if (need_change) {
is_op_reuse_mem_ = false;
}
@@ -1819,11 +1904,12 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
}
op_desc->SetWorkspace(workspace_list);
}
GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] "
"life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(),
op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),
block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_,
block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input,
GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] "
"noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]",
graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(),
node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_,
block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level,
block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input,
block->batch_label_.c_str());
}



+ 40
- 14
ge/graph/build/memory/block_mem_assigner.h View File

@@ -39,14 +39,15 @@ using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>;
enum OpMemoryType { kOutput, kWorkspace };

struct NodeTypeIndex {
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false)
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {}
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0)
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {}

ge::NodePtr node = nullptr;
OpMemoryType mem_type = kOutput;
uint32_t index = 0;
size_t life_time_end = kMaxLifeTime;
bool ref_input = false;
size_t life_time_begin = 0;
size_t life_time_end = kMaxLifeTime;
const string GetMemType() const {
if (mem_type == kOutput) {
return "output";
@@ -55,6 +56,34 @@ struct NodeTypeIndex {
}
return "unknown";
}

size_t GetLifeBegin() const {
if ((node == nullptr) || (node->GetOpDesc() == nullptr)) {
return 0;
}

if ((life_time_begin > 0) && (life_time_begin < static_cast<size_t>(node->GetOpDesc()->GetId()))) {
return life_time_begin;
} else {
return node->GetOpDesc()->GetId();
}
}

std::string GetLifeBeginDesc() const {
if (node == nullptr) {
return "";
}
auto node_op_desc = node->GetOpDesc();
if (node_op_desc != nullptr) {
auto life_begin = GetLifeBegin();
if (life_begin != static_cast<size_t>(node_op_desc->GetId())) {
return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId());
} else {
return std::to_string(node_op_desc->GetId());
}
}
return "";
}
};

class MemoryBlock {
@@ -86,16 +115,13 @@ class MemoryBlock {
symbol_list_.clear();
}

void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size,
int64_t stream_id) {
real_size_list_.emplace_back(real_size);
no_align_size_list_.emplace_back(no_align_size);
node_type_index_list_.emplace_back(node, type, out_index, false);
if (stream_id != stream_id_) {
same_stream_ = false;
size_t Size() const { return block_size_; }

void SetSize(size_t size) {
if (size > block_size_) {
block_size_ = size;
}
}
size_t Size() const { return block_size_; }

size_t AlignSize() const;

@@ -143,7 +169,7 @@ class MemoryBlock {

size_t GetLifeBegin();

size_t GetLifeEnd();
size_t GetLifeEnd() const;

void AddDependLifeBegin(DependStreamLife &node_depend_stream_life);

@@ -406,6 +432,7 @@ class BlockMemAssigner : public MemAssigner {
bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name,
uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag);

bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index);
///
/// @ingroup GE
/// @|+++++++++block1++++++++| |+++++++++block1++++++++|
@@ -425,8 +452,6 @@ class BlockMemAssigner : public MemAssigner {

std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;

std::map<std::string, uint64_t> reusable_block_counts_;

std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_;

std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_;
@@ -456,6 +481,7 @@ class BlockMemAssigner : public MemAssigner {

std::string max_batch_label_;

size_t continuous_life_begin_ = 0;
///
/// @ [stream1][nodeid]
/// @[nodeid] [stream2][nodeid]


+ 277
- 578
ge/graph/build/memory/graph_mem_assigner.cc
File diff suppressed because it is too large
View File


+ 4
- 20
ge/graph/build/memory/graph_mem_assigner.h View File

@@ -119,31 +119,15 @@ class GraphMemoryAssigner {
///
ge::Status ReAssignContinuousMemory(bool is_loop_graph);

ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory();

ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory();

ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse);

ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse);

ge::Status ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map, int32_t mem_reuse_model);

ge::Status GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
int32_t mem_reuse_model, string &max_batch_label);

ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index,
int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size);

ge::Status ReAssignAtomicMemory(bool is_loop_graph);

ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
map<string, vector<NodePtr>> &connecting_output_atomic_nodes);

ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
int64_t &continuous_mem_size, int64_t memory_type);
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type);

ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node);
ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type);

///
/// @brief check the input of node whether support atomic attr
@@ -169,10 +153,10 @@ class GraphMemoryAssigner {
ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes);

ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
const std::vector<int64_t> &mem_offset_end);
const std::vector<int64_t> &mem_offset_end, int64_t memory_type);

ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start,
const std::vector<int64_t> &atomic_mem_size);
const std::vector<int64_t> &atomic_mem_size, int64_t memory_type);

ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node);



+ 3
- 3
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -1820,7 +1820,7 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_inp
///
Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
if (!op_desc->HasAttr(ATTR_NAME_AIPP)) {
GELOGW("there is not AIPP related with index %u.", index);
GELOGW("There is not AIPP related with index %u.", index);
return SUCCESS;
}

@@ -1829,7 +1829,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) {
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
"Data node do not contain param aipp!");
GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed");
GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u",
GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank());

AippConfigInfo aipp_info;
@@ -2492,7 +2492,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
uint64_t buffer_length = buffer.length;
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data));

GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]",
GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]",
runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length);
GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind));
idx++;


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit dc6cceb67bc82b567bcbd6f415776644253e1467
Subproject commit fcd0833cffcd201701f71d17db0c696c1bb01715

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit 4e72aae41e78af1a19cd965da4a45cbd988b9a75
Subproject commit 1601d66b6187c83cbf38e762beb5538ce2c7c573

Loading…
Cancel
Save