From 9998947de6598af966704c833ec134e083aea074 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Wed, 3 Feb 2021 20:34:02 +0800 Subject: [PATCH] add continous input memory assign for cascade --- ge/graph/build/memory/graph_mem_assigner.cc | 189 +++++++++++++++----- ge/graph/build/memory/graph_mem_assigner.h | 8 + 2 files changed, 152 insertions(+), 45 deletions(-) diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index ca64c869..579036bc 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -374,63 +374,43 @@ bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op // If GetBool fail, is_peer_reference is false. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); GE_IF_BOOL_EXEC(is_peer_reference, - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + std::string warning = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return true;); + " is ref. There may be conflict between the two."; + GELOGW("%s", warning.c_str()); + return false;); return false; } Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { Status ret; + // Stored nodes which need assign continuous input memory in `reverse topo order` + std::vector nodes_stack; + std::map node_2_continuous_type; + + // Traverse nodes for (auto &node : compute_graph_->GetAllNodes()) { GE_CHECK_NOTNULL(node); - auto continuous_type = GetContinuousMemoryType(node->GetOpDesc()); - + uint32_t continuous_type; + auto iter = node_2_continuous_type.find(node); + if (iter == node_2_continuous_type.end()) { + continuous_type = GetContinuousMemoryType(node->GetOpDesc()); + node_2_continuous_type.emplace(node, continuous_type); + } else { + continuous_type = iter->second; + } // Assign continuous input memory bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); - int64_t memory_type = RT_MEMORY_HBM; if (continuous_input) { - int64_t mem_clean_start = 0; - int64_t mem_clean_size = 0; - GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); - if (ret != ge::SUCCESS) { - GELOGE(ret, "Assign continuous input memory failed!"); - return ret; - } - - // Clean up atomic address, eg, hcom node - vector input_indexes; - // If GetListInt fail, input_indexes is empty. - (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); - if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { - // check whether there is an atomic conflict between the current node and the peer out node - if (!CheckInputIsSupportAtomic(node)) { - GELOGE(ge::FAILED, - "There is an atomic conflict between the current node and the peer out node, not supported!"); - return ge::FAILED; - } - - const auto &in_control_anchor = node->GetInControlAnchor(); - GE_CHECK_NOTNULL(in_control_anchor); - for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { - GE_CHECK_NOTNULL(peer_out_control_anchor); - auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); - if (peer_out_node->GetType() == ATOMICADDRCLEAN) { - ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); - if (ret != SUCCESS) { - GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); - return ret; - } - } - } + if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) { + GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type), + "Assign node %s continuous input memory failed.", node->GetName().c_str()) + } else { + nodes_stack.push_back(node); } } - // Assign continuous output memory + int64_t memory_type = RT_MEMORY_HBM; bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); if (continuous_output) { GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed."); @@ -441,6 +421,18 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } } + // Assign continuous input memory in `reverse topo order` which stored before + while (!nodes_stack.empty()){ + auto node = nodes_stack.back(); + nodes_stack.pop_back(); + auto iter = node_2_continuous_type.find(node); + if (iter == node_2_continuous_type.end()) { + GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); + return FAILED; + } + GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second), + "Assign node %s continuous input memory failed.", node->GetName().c_str()) + } for (auto pair : memory_offset_) { GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, pair.second.mem_offset_); @@ -463,7 +455,15 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t mem_offset = iter->second.mem_offset_; int64_t extra_memory_size = 0; bool is_continuous_input_allocated = false; - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + vector output_list_this = op_desc->GetOutputOffset(); + if (output_list_this.empty()) { + std::string error = "node:" + FmtToStr(op_desc->GetName()) + "has no output offset"; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); @@ -505,6 +505,17 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, // when continuous input has been allocated first input is beginning offset bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); if (is_allocated_first_input) { + std::map out2ins; + GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); + // output is beginning offset, set offset for input; only support this case now + if (out2ins.size() == 1 && out2ins.begin()->second == 0) { + output_list.at(peer_out_data_anchor->GetIdx()) == output_list_this.at(out2ins.begin()->first); + peer_op_desc->SetOutputOffset(output_list); + } else { + GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first, + out2ins.begin()->second, out2ins.size()); + } + // first input is beginning offset mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); } else { @@ -882,7 +893,7 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) { if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) || (peer_op_desc->GetType() == VARIABLE)) { std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" + - FmtToStr(peer_op_desc->GetName()) + " is invalid, only support Constant/AippData/Variable"; + FmtToStr(peer_op_desc->GetName()) + " is invalid, Constant/AippData/Variable is not supported"; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return false; } @@ -1502,4 +1513,92 @@ void GraphMemoryAssigner::PrintMemoryOffset() { pair.first, pair.second.mem_offset_); } } + +ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map &out2ins) { + for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { + int32_t reuse_in_index = -1; + bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index); + if (reuse_input_flag) { + if (node->GetInDataAnchor(reuse_in_index) != nullptr) { + out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index); + } else { + GELOGE(FAILED, "Invalid reuse_input value %d on output %d of node %s, please check attr reuse_input", + reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); + return FAILED; + } + } + } + + return ge::SUCCESS; +} + +bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( + const NodePtr &input_continuous_node, map &node_2_continuous_type) { + for (const auto &in_node : input_continuous_node->GetInDataNodes()) { + auto iter = node_2_continuous_type.find(in_node); + // In node's topo order in the front, so function can not be exception + auto continuous_type = iter->second; + bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); + if (continuous_input) { + GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.", + input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); + return false; + } + } + for (const auto &out_node : input_continuous_node->GetOutDataNodes()) { + auto continuous_type = GetContinuousMemoryType(out_node->GetOpDesc()); + node_2_continuous_type.emplace(out_node, continuous_type); + bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); + if (continuous_input) { + GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.", + input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); + return false; + } + } + + return true; +} + +ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, + uint32_t continuous_type) { + int64_t mem_clean_start = 0; + int64_t mem_clean_size = 0; + int64_t memory_type = RT_MEMORY_HBM; + + GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); + auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type); + if (ret != ge::SUCCESS) { + GELOGE(ret, "Assign continuous input memory failed!"); + return ret; + } + + // Clean up atomic address, eg, hcom node + vector input_indexes; + // If GetListInt fail, input_indexes is empty. + (void)ge::AttrUtils::GetListInt(input_continuous_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); + if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { + // check whether there is an atomic conflict between the current node and the peer out node + if (!CheckInputIsSupportAtomic(input_continuous_node)) { + GELOGE(ge::FAILED, "There is an atomic conflict between the current node and the peer out node, not supported!"); + return ge::FAILED; + } + + const auto &in_control_anchor = input_continuous_node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_control_anchor); + for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { + GE_CHECK_NOTNULL(peer_out_control_anchor); + auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); + if (peer_out_node->GetType() == ATOMICADDRCLEAN) { + ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); + if (ret != SUCCESS) { + GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); + return ret; + } + } + } + } + + return ge::SUCCESS; +} + } // namespace ge diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index be6c47b0..b56c3716 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -125,6 +125,14 @@ class GraphMemoryAssigner { ge::Status ReAssignAtomicMemory(bool is_loop_graph); + ge::Status GetAllRef(const NodePtr &node, std::map &out2ins); + + bool AssignContinuousInputMemoryWithAtomicProcessDirectly(const NodePtr &input_continuous_node, + std::map &node_2_continuous_type); + + ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, + uint32_t continuous_type); + ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes);