From 15aa9afb8bf1ba45bda189f6e4d5c91d0f1a8fec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=AC=91=E5=A4=A9?= Date: Fri, 27 Nov 2020 16:49:08 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!434=20:?= =?UTF-8?q?=20atomic=20loop=20condition=20bug=20fix'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/graph/passes/atomic_addr_clean_pass.cc | 84 +++++------------------ ge/graph/passes/atomic_addr_clean_pass.h | 5 +- 2 files changed, 19 insertions(+), 70 deletions(-) diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 4cbc42d1..60742eb1 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -33,12 +33,10 @@ namespace ge { Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); GELOGD("AtomicAddrCleanPass begin."); - bool is_unknown_graph = graph->GetGraphUnknownFlag(); - // 1.Recoginze atomic and loop mark vector atomic_node_vec; for (NodePtr &node : graph->GetDirectNode()) { - if (IsAtomicOp(node, is_unknown_graph)) { + if (IsAtomicOp(node)) { atomic_node_vec.push_back(node); } if (!is_loop_graph_ && node->GetType() == LOOPCOND) { @@ -52,6 +50,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { return SUCCESS; } + bool is_unknown_graph = graph->GetGraphUnknownFlag(); if (is_unknown_graph) { GELOGD("Graph[%s] is unknown graph. It will call fe interface to compile op.", graph->GetName().c_str()); GE_CHK_STATUS_RET(CompileUnknownGraphOp(atomic_node_vec)); @@ -243,101 +242,55 @@ Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr return SUCCESS; } -bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node, bool is_unknown_graph) { +bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { GE_IF_BOOL_EXEC(node == nullptr, GELOGE(FAILED, "node is null."); return false); OpDescPtr op_desc = node->GetOpDesc(); if (op_desc == nullptr) { return false; } - - if (CheckAtomicFromOpsKernel(node)) { - return true; - } - - // 2.Check atomic attr in node - std::map> node_workspace_offset; - bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); - bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); - node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); - if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) { - return false; - } - - if (!is_unknown_graph) { - if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) { - std::vector atomic_output_index; - (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); - bool is_all_output_peer_also_atomic = true; - for (const auto &output_index : atomic_output_index) { - if (!IsOutputIndexPeerInputAtomic(node, output_index)) { - is_all_output_peer_also_atomic = false; - break; - } - } - if (is_all_output_peer_also_atomic) { - GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str()); - return false; - } - } - } - - graphStatus ret = op_desc->SetAttr(ATOMIC_ATTR_IS_ATOMIC_NODE, GeAttrValue::CreateFrom(true)); - if (ret != GRAPH_SUCCESS) { - GELOGW("set attr ATOMIC_ATTR_IS_ATOMIC_NODE fail."); - } - GELOGD("Recognized atomic op %s from attr.", op_desc->GetName().c_str()); - return true; -} - -// just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input -bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { // 1.Check if isAtomic attrs exist for HCOM std::shared_ptr instance_ptr = GELib::GetInstance(); if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { - GELOGW("GELib not initialized, atomic from ops kernel judge false, node_name: %s", node->GetName().c_str()); + GELOGW("GELib not initialized"); return false; } OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj(); - vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(node->GetType()); + vector op_info_vec = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType()); for (const auto &op_info : op_info_vec) { if (op_info.isAtomic) { + GELOGI("Recognized atomic op %s from DNN_HCCL engine.", op_desc->GetName().c_str()); // check peer input is DATA for (auto &in_data_anchor : node->GetAllInDataAnchors()) { if (in_data_anchor->GetPeerOutAnchor() != nullptr && in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); if (peer_in_node->GetType() == DATA) { - GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), op_info.engine.c_str()); + GELOGI("Recognized atomic op %s from DNN_HCCL engine and input is DATA.", op_desc->GetName().c_str()); return false; } } } - GELOGI("Recognized atomic op %s from %s engine.", node->GetName().c_str(), op_info.engine.c_str()); hcom_node_vec_.push_back(node); return true; } } - return false; -} - -bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index) { - auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); - if (out_data_anchor == nullptr) { + // 2.Check atomic attr in node + std::map> node_workspace_offset; + bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX); + bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX); + node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset); + if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) { return false; } - for (const auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) { - auto output_node = input_anchor->GetOwnerNode(); - // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input - // hccl's attr ATOMIC_ATTR_INPUT_INDEX mark on CalcOpRunningParam, can't be get here - if (CheckAtomicFromOpsKernel(output_node)) { - return true; - } + graphStatus ret = op_desc->SetAttr(ATOMIC_ATTR_IS_ATOMIC_NODE, GeAttrValue::CreateFrom(true)); + if (ret != GRAPH_SUCCESS) { + GELOGW("set attr ATOMIC_ATTR_IS_ATOMIC_NODE fail."); } - return false; + GELOGD("Recognized atomic op %s from FE engine.", op_desc->GetName().c_str()); + return true; } - /// /// @brief Clear Status, used for subgraph pass /// @return SUCCESS @@ -345,7 +298,6 @@ bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int6 Status AtomicAddrCleanPass::ClearStatus() { hcom_node_vec_.clear(); return SUCCESS; - } Status AtomicAddrCleanPass::CompileUnknownGraphOp(const vector &atomic_node_vec) { diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index 64bc604b..ad60b7b5 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -72,7 +72,7 @@ class AtomicAddrCleanPass : public GraphPass { * @param node * @return */ - bool IsAtomicOp(const NodePtr &node, bool is_unknown_graph); + bool IsAtomicOp(const NodePtr &node); /** * Handle atomic node in unknown graph @@ -84,9 +84,6 @@ class AtomicAddrCleanPass : public GraphPass { Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector &atomic_node_vec, std::vector &common_atomic_nodes); - bool CheckAtomicFromOpsKernel(const NodePtr &node); - - bool IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index); vector hcom_node_vec_; bool is_loop_graph_ = false;