Browse Source

!1075 add continuous input memory assign for cascade

From: @ni100die
Reviewed-by: @tangqunzhang,@ji_chen,@xchu42
Signed-off-by: @ji_chen
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
cde30eaa26
2 changed files with 152 additions and 45 deletions
  1. +144
    -45
      ge/graph/build/memory/graph_mem_assigner.cc
  2. +8
    -0
      ge/graph/build/memory/graph_mem_assigner.h

+ 144
- 45
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -374,63 +374,43 @@ bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op
// If GetBool fail, is_peer_reference is false.
(void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
GE_IF_BOOL_EXEC(is_peer_reference,
std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
std::string warning = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
" requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
" requires continuous output. There may be conflict between the two." +
"This node is not supported now.";
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
return true;);
" is ref. There may be conflict between the two.";
GELOGW("%s", warning.c_str());
return false;);
return false;
}

Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
Status ret;
// Stored nodes which need assign continuous input memory in `reverse topo order`
std::vector<NodePtr> nodes_stack;
std::map<NodePtr, uint32_t> node_2_continuous_type;

// Traverse nodes
for (auto &node : compute_graph_->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
auto continuous_type = GetContinuousMemoryType(node->GetOpDesc());

uint32_t continuous_type;
auto iter = node_2_continuous_type.find(node);
if (iter == node_2_continuous_type.end()) {
continuous_type = GetContinuousMemoryType(node->GetOpDesc());
node_2_continuous_type.emplace(node, continuous_type);
} else {
continuous_type = iter->second;
}
// Assign continuous input memory
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
int64_t memory_type = RT_MEMORY_HBM;
if (continuous_input) {
int64_t mem_clean_start = 0;
int64_t mem_clean_size = 0;
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed.");
ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type);
if (ret != ge::SUCCESS) {
GELOGE(ret, "Assign continuous input memory failed!");
return ret;
}

// Clean up atomic address, eg, hcom node
vector<int32_t> input_indexes;
// If GetListInt fail, input_indexes is empty.
(void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
// check whether there is an atomic conflict between the current node and the peer out node
if (!CheckInputIsSupportAtomic(node)) {
GELOGE(ge::FAILED,
"There is an atomic conflict between the current node and the peer out node, not supported!");
return ge::FAILED;
}

const auto &in_control_anchor = node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_control_anchor);
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
GE_CHECK_NOTNULL(peer_out_control_anchor);
auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
if (ret != SUCCESS) {
GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
return ret;
}
}
}
if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) {
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type),
"Assign node %s continuous input memory failed.", node->GetName().c_str())
} else {
nodes_stack.push_back(node);
}
}

// Assign continuous output memory
int64_t memory_type = RT_MEMORY_HBM;
bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
if (continuous_output) {
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed.");
@@ -441,6 +421,18 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
}
}
}
// Assign continuous input memory in `reverse topo order` which stored before
while (!nodes_stack.empty()){
auto node = nodes_stack.back();
nodes_stack.pop_back();
auto iter = node_2_continuous_type.find(node);
if (iter == node_2_continuous_type.end()) {
GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str());
return FAILED;
}
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second),
"Assign node %s continuous input memory failed.", node->GetName().c_str())
}
for (auto pair : memory_offset_) {
GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
pair.second.mem_offset_);
@@ -463,7 +455,15 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
int64_t mem_offset = iter->second.mem_offset_;
int64_t extra_memory_size = 0;
bool is_continuous_input_allocated = false;
(void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
vector<int64_t> output_list_this = op_desc->GetOutputOffset();
if (output_list_this.empty()) {
std::string error = "node:" + FmtToStr(op_desc->GetName()) + "has no output offset";
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
return FAILED;
}
(void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
@@ -505,6 +505,17 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
// when continuous input has been allocated first input is beginning offset
bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0);
if (is_allocated_first_input) {
std::map<int32_t, int32_t> out2ins;
GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str());
// output is beginning offset, set offset for input; only support this case now
if (out2ins.size() == 1 && out2ins.begin()->second == 0) {
output_list.at(peer_out_data_anchor->GetIdx()) == output_list_this.at(out2ins.begin()->first);
peer_op_desc->SetOutputOffset(output_list);
} else {
GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first,
out2ins.begin()->second, out2ins.size());
}
// first input is beginning offset
mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
} else {
@@ -882,7 +893,7 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
(peer_op_desc->GetType() == VARIABLE)) {
std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" +
FmtToStr(peer_op_desc->GetName()) + " is invalid, only support Constant/AippData/Variable";
FmtToStr(peer_op_desc->GetName()) + " is invalid, Constant/AippData/Variable is not supported";
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
return false;
}
@@ -1502,4 +1513,92 @@ void GraphMemoryAssigner::PrintMemoryOffset() {
pair.first, pair.second.mem_offset_);
}
}

ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int32_t> &out2ins) {
for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) {
int32_t reuse_in_index = -1;
bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index);
if (reuse_input_flag) {
if (node->GetInDataAnchor(reuse_in_index) != nullptr) {
out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
} else {
GELOGE(FAILED, "Invalid reuse_input value %d on output %d of node %s, please check attr reuse_input",
reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
return FAILED;
}
}
}

return ge::SUCCESS;
}

bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
auto iter = node_2_continuous_type.find(in_node);
// In node's topo order in the front, so function can not be exception
auto continuous_type = iter->second;
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
if (continuous_input) {
GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.",
input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
return false;
}
}
for (const auto &out_node : input_continuous_node->GetOutDataNodes()) {
auto continuous_type = GetContinuousMemoryType(out_node->GetOpDesc());
node_2_continuous_type.emplace(out_node, continuous_type);
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
if (continuous_input) {
GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.",
input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
return false;
}
}

return true;
}

ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
uint32_t continuous_type) {
int64_t mem_clean_start = 0;
int64_t mem_clean_size = 0;
int64_t memory_type = RT_MEMORY_HBM;

GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed.");
auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type);
if (ret != ge::SUCCESS) {
GELOGE(ret, "Assign continuous input memory failed!");
return ret;
}

// Clean up atomic address, eg, hcom node
vector<int32_t> input_indexes;
// If GetListInt fail, input_indexes is empty.
(void)ge::AttrUtils::GetListInt(input_continuous_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
// check whether there is an atomic conflict between the current node and the peer out node
if (!CheckInputIsSupportAtomic(input_continuous_node)) {
GELOGE(ge::FAILED, "There is an atomic conflict between the current node and the peer out node, not supported!");
return ge::FAILED;
}

const auto &in_control_anchor = input_continuous_node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_control_anchor);
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
GE_CHECK_NOTNULL(peer_out_control_anchor);
auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
if (ret != SUCCESS) {
GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
return ret;
}
}
}
}

return ge::SUCCESS;
}

} // namespace ge

+ 8
- 0
ge/graph/build/memory/graph_mem_assigner.h View File

@@ -125,6 +125,14 @@ class GraphMemoryAssigner {

ge::Status ReAssignAtomicMemory(bool is_loop_graph);

ge::Status GetAllRef(const NodePtr &node, std::map<int32_t, int32_t> &out2ins);

bool AssignContinuousInputMemoryWithAtomicProcessDirectly(const NodePtr &input_continuous_node,
std::map<NodePtr, uint32_t> &node_2_continuous_type);

ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
uint32_t continuous_type);

ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
map<string, vector<NodePtr>> &connecting_output_atomic_nodes);



Loading…
Cancel
Save