| @@ -215,6 +215,7 @@ set(TRAIN_SRC_LIST | |||||
| "graph/passes/dimension_compute_pass.cc" | "graph/passes/dimension_compute_pass.cc" | ||||
| "graph/passes/dropout_pass.cc" | "graph/passes/dropout_pass.cc" | ||||
| "graph/passes/hccl_group_pass.cc" | "graph/passes/hccl_group_pass.cc" | ||||
| "graph/passes/hccl_tailing_optimization_pass.cc" | |||||
| "graph/passes/enter_pass.cc" | "graph/passes/enter_pass.cc" | ||||
| "graph/passes/assign_remove_pass.cc" | "graph/passes/assign_remove_pass.cc" | ||||
| "graph/passes/inplace_support_check_pass.cc" | "graph/passes/inplace_support_check_pass.cc" | ||||
| @@ -612,6 +613,7 @@ set(INFER_SRC_LIST | |||||
| "graph/passes/link_gen_mask_nodes_pass.cc" | "graph/passes/link_gen_mask_nodes_pass.cc" | ||||
| "graph/passes/replace_with_empty_const_pass.cc" | "graph/passes/replace_with_empty_const_pass.cc" | ||||
| "graph/passes/hccl_group_pass.cc" | "graph/passes/hccl_group_pass.cc" | ||||
| "graph/passes/hccl_tailing_optimization_pass.cc" | |||||
| "graph/passes/memcpy_addr_async_pass.cc" | "graph/passes/memcpy_addr_async_pass.cc" | ||||
| "graph/passes/set_input_output_offset_pass.cc" | "graph/passes/set_input_output_offset_pass.cc" | ||||
| "graph/passes/parallel_group_pass.cc" | "graph/passes/parallel_group_pass.cc" | ||||
| @@ -55,6 +55,7 @@ | |||||
| #include "graph/passes/dimension_compute_pass.h" | #include "graph/passes/dimension_compute_pass.h" | ||||
| #include "graph/passes/flow_ctrl_pass.h" | #include "graph/passes/flow_ctrl_pass.h" | ||||
| #include "graph/passes/fuse_data_nodes_with_common_input_pass.h" | #include "graph/passes/fuse_data_nodes_with_common_input_pass.h" | ||||
| #include "graph/passes/hccl_tailing_optimization_pass.h" | |||||
| #include "graph/passes/identity_pass.h" | #include "graph/passes/identity_pass.h" | ||||
| #include "graph/passes/input_output_connection_identify_pass.h" | #include "graph/passes/input_output_connection_identify_pass.h" | ||||
| #include "graph/passes/iterator_op_pass.h" | #include "graph/passes/iterator_op_pass.h" | ||||
| @@ -2252,6 +2253,14 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
| // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory | // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory | ||||
| GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
| graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) | graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) | ||||
| std::string hccl_tailing_optimize; | |||||
| if (GetContext().GetOption("ge.exec.hccl_tailing_optimize", hccl_tailing_optimize) == SUCCESS && | |||||
| hccl_tailing_optimize == "1") { | |||||
| GELOGI("Add hccl tailing optimize stage"); | |||||
| GE_CHK_STATUS_RET( | |||||
| graph_pass.AddPass("OptimizeStage1_3::HcclTailingOptimizationPass", new (std::nothrow) HcclTailingOptimizationPass)) | |||||
| } | |||||
| } | } | ||||
| GE_TIMESTAMP_START(graph_pass); | GE_TIMESTAMP_START(graph_pass); | ||||
| ret = graph_pass.Run(compute_graph); | ret = graph_pass.Run(compute_graph); | ||||
| @@ -0,0 +1,72 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "hccl_tailing_optimization_pass.h" | |||||
| #include "graph/common/transop_util.h" | |||||
| namespace ge { | |||||
| Status HcclTailingOptimizationPass::Run(ComputeGraphPtr graph) { | |||||
| for (const auto &node : graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(node); | |||||
| if (node->GetType() != HCOMALLREDUCE) { | |||||
| continue; | |||||
| } | |||||
| for (auto &out_node : node->GetOutDataNodes()) { | |||||
| if (!TransOpUtil::IsTransOp(out_node)) { | |||||
| continue; | |||||
| } | |||||
| GE_CHK_STATUS_RET_NOLOG(CopyControlEdgesForTransOp(out_node)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HcclTailingOptimizationPass::CopyControlEdgesForTransOp(NodePtr &first_trans_op) { | |||||
| auto dst_in_ctrl_anchor = first_trans_op->GetInControlAnchor(); | |||||
| GE_CHECK_NOTNULL(dst_in_ctrl_anchor); | |||||
| std::set<OutControlAnchorPtr> src_out_ctrl_anchors; | |||||
| std::vector<NodePtr> trans_op_nodes{first_trans_op}; | |||||
| while (!trans_op_nodes.empty()) { | |||||
| auto trans_op_node = trans_op_nodes.back(); | |||||
| trans_op_nodes.pop_back(); | |||||
| for (auto &next_node : trans_op_node->GetOutDataNodes()) { | |||||
| auto in_ctrl_anchor = next_node->GetInControlAnchor(); | |||||
| GE_CHECK_NOTNULL(in_ctrl_anchor); | |||||
| auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors(); | |||||
| for (auto src_ctrl_anchor : peer_out_ctrl_anchors) { | |||||
| GE_CHECK_NOTNULL(src_ctrl_anchor->GetOwnerNode()); | |||||
| src_out_ctrl_anchors.emplace(src_ctrl_anchor); | |||||
| } | |||||
| if (TransOpUtil::IsTransOp(next_node)) { | |||||
| trans_op_nodes.emplace_back(next_node); | |||||
| } | |||||
| } | |||||
| } | |||||
| for (auto &src_out_ctrl_anchor : src_out_ctrl_anchors) { | |||||
| if (!src_out_ctrl_anchor->IsLinkedWith(dst_in_ctrl_anchor)) { | |||||
| GE_CHK_GRAPH_STATUS_RET( | |||||
| GraphUtils::AddEdge(src_out_ctrl_anchor, dst_in_ctrl_anchor), "Failed to add edge between %s->%s", | |||||
| src_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), first_trans_op->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,34 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_PASSES_HCCL_TAILING_OPTIMIZATION_PASS_H_ | |||||
| #define GE_GRAPH_PASSES_HCCL_TAILING_OPTIMIZATION_PASS_H_ | |||||
| #include <string> | |||||
| #include "inc/graph_pass.h" | |||||
| namespace ge { | |||||
| class HcclTailingOptimizationPass : public GraphPass { | |||||
| public: | |||||
| Status Run(ComputeGraphPtr graph) override; | |||||
| private: | |||||
| Status CopyControlEdgesForTransOp(NodePtr &first_trans_op); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_PASSES_HCCL_TAILING_OPTIMIZATION_PASS_H_ | |||||
| @@ -270,6 +270,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/passes/link_gen_mask_nodes_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/link_gen_mask_nodes_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/replace_with_empty_const_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/replace_with_empty_const_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/hccl_group_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/hccl_tailing_optimization_pass.cc" | |||||
| "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/memcpy_addr_async_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | ||||