|
- /**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "graph/passes/base_pass.h"
-
- #include <queue>
- #include <unordered_set>
-
- #include "common/debug/log.h"
- #include "graph/utils/graph_utils.h"
-
- namespace ge {
- namespace {
- constexpr int kMaxRePassTimes = 10000;
- constexpr size_t kMaxOneInNodes = 1000;
- // Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later
- constexpr int kMaxRecursiveDepth = 20;
-
- void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph,
- GEPass::GraphLevelState &g_state) {
- for (auto &node : graph->GetDirectNode()) {
- if (node == nullptr) {
- continue;
- }
- size_t in_nums = node->GetInNodes().size();
- if (in_nums == 0) {
- g_state.AddNodeToQueueIfNotSeen(node);
- } else if (in_nums > kMaxOneInNodes) {
- g_state.nodes_last.insert(node);
- }
- }
- }
-
- bool AllNodesIn(const Node::Vistor<NodePtr> &nodes, const std::unordered_set<NodePtr> &nodes_set) {
- return !std::any_of(nodes.begin(), nodes.end(), [&](const NodePtr &n) {
- return nodes_set.count(n) > 0;
- });
- }
-
- void AddNextIterNodes(const NodePtr &cur_node, GEPass::GraphLevelState &g_state) {
- const auto &nodes_suspend = g_state.nodes_suspend;
- for (auto &node : cur_node->GetOutNodes()) {
- if (node == nullptr) {
- continue;
- }
- if (g_state.nodes_last.count(node) != 0) {
- continue;
- }
- if (nodes_suspend.count(node) > 0) {
- GELOGD("The node %s has suspend by pass, skip it.", node->GetName().c_str());
- continue;
- }
-
- if (node->IsAllInNodesSeen(g_state.nodes_seen) && AllNodesIn(node->GetInAllNodes(), nodes_suspend)) {
- g_state.AddNodeToQueueIfNotSeen(node);
- }
- }
- }
-
- void AddImmediateRepassNodesToQueue(NodePtr &cur_node, const std::pair<std::string, BaseNodePass *> &name_to_pass,
- const std::unordered_set<NodePtr> &nodes_im_re_pass,
- GEPass::GraphLevelState &g_state) {
- for (const auto &node : nodes_im_re_pass) {
- if (node == nullptr) {
- GELOGW("Found null immediately re-pass node when executing pass %s on node %s type %s", name_to_pass.first.c_str(),
- cur_node->GetName().c_str(), cur_node->GetType().c_str());
- continue;
- }
- if (g_state.nodes_passed.count(node) > 0) {
- g_state.AddNodeToQueueFront(node);
- continue;
- }
- // exp: constant folding add new const need repass immediate
- if (AllNodesIn(node->GetInAllNodes(), g_state.nodes_passed)) {
- g_state.AddNodeToQueueFront(node);
- continue;
- }
- GELOGW("The node %s specified by pass %s has un-passed in_nodes, it will not repass immediately",
- node->GetName().c_str(), name_to_pass.first.c_str());
- }
- }
-
- void AddLastNodesToQueue(GEPass::GraphLevelState &g_state) {
- for (auto &node : g_state.nodes_last) {
- // todo 为什么会在node_seen中看到node_last,blame一下看看历史合入记录
- if (node->IsAllInNodesSeen(g_state.nodes_seen)) {
- g_state.AddNodeToQueueIfNotSeen(node);
- }
- }
- g_state.nodes_last.clear();
- }
-
- void SuspendAndResume(const std::string &pass_name,
- const std::unordered_set<NodePtr> &nodes_suspend,
- const std::unordered_set<NodePtr> &nodes_resume,
- GEPass::GraphLevelState &g_state) {
- // TODO 当前没有记录NodePass中suspend和resume的顺序,因此无法辨别NodePass中是先做Suspend还是Resume。
- // 因此此处的简单处理是如果在NodePass的过程中,触发了suspend/resume,那么框架以resume为准
- // 更好的处理方式是,在NodePass做suspend/resume时,做顺序的记录,在此函数中按序做回放
- for (const auto &node : nodes_suspend) {
- GELOGD("The iteration suspend of node %s has been set by pass %s", node->GetName().c_str(), pass_name.c_str());
- g_state.nodes_suspend.insert(node);
- }
-
- for (const auto &node : nodes_resume) {
- if (g_state.nodes_suspend.erase(node) > 0) {
- if (g_state.nodes_seen.count(node.get()) > 0 || node->IsAllInNodesSeen(g_state.nodes_seen)) {
- g_state.nodes.push_back(node);
- GELOGD("Node %s has been resumed by pass %s", node->GetName().c_str(), pass_name.c_str());
- }
- }
- }
- }
-
- void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass,
- std::unordered_set<Node *> &nodes_seen, const std::unordered_set<NodePtr> &nodes_to_re_pass,
- std::unordered_set<NodePtr> &nodes_re_pass) {
- for (const auto &node_to_re_pass : nodes_to_re_pass) {
- if (node_to_re_pass == nullptr) {
- GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(),
- node->GetName().c_str(), node->GetType().c_str());
- continue;
- }
- if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) {
- GELOGD("The node %s will be re-pass.", node_to_re_pass->GetName().c_str());
- nodes_re_pass.insert(node_to_re_pass);
- } else {
- GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str());
- }
- }
- }
-
- void SetFlagOption(NodePassOption option, NamesToPass names_to_pass) {
- for (auto &name_to_pass : names_to_pass) {
- name_to_pass.second->SetOption(option, "");
- }
- }
-
- void ClearOption(NamesToPass names_to_pass) {
- for (auto &name_to_pass : names_to_pass) {
- name_to_pass.second->ClearOptions();
- }
- }
-
- bool ShouldNodePassActually(const NodePtr &node, const GEPass::GraphLevelState &g_state) {
- if (node == nullptr) {
- GELOGW("node is null");
- return false;
- }
- // 因为在PassNode之前,会首先将node的输出节点添加queue,因此若在pass node时,删除了node的输出节点,
- // 那么会出现:已经删除的节点出现在queue中,并且被pop出来,因此这里做确认,如果node已经被删除过了,就跳过pass
- if (g_state.nodes_deleted.count(node) > 0) {
- GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str());
- return false;
- }
-
- // 因为在PassNode之前,会首先将node的输出节点添加queue,因此若在pass node时,suspend了node的输出节点,后续逻辑与上面相同
- // TODO 需要注意的是,这里的保证是一次”尽力而为“,若pass node时,将node之前的节点`A`添加到了suspend,
- // 那么`A`节点的后继和间接后继节点的pass不会受到suspend的影响
- // 理论上来说,如果在pass node之前,首先收集node的输出节点,在pass后,将输出节点做suspend、delete的去除,然后加queue,
- // 这样处理就不需要在这里做额外的确认了
- if (g_state.nodes_suspend.count(node) > 0) {
- GELOGD("The node %s has been added to suspend-iteration nodes list, the iteration of it will be suspend.",
- node->GetName().c_str());
- return false;
- }
- if (!AllNodesIn(node->GetInAllNodes(), g_state.nodes_suspend)) {
- GELOGD("The node %s has been added to suspend-iteration nodes list, the iteration of it will be suspend.",
- node->GetName().c_str());
- return false;
- }
- return true;
- }
- } // namespace
-
- Status BaseNodePass::IsolateAndDeleteNode(NodePtr &node, const std::vector<int> &io_map,
- bool is_repass_io_immediately) {
- if (node == nullptr) {
- REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid.");
- GELOGE(FAILED, "[Check][Param] parameter node is nullptr.");
- return FAILED;
- }
- GELOGI("Prepare to isolate and delete node, name:%s, type:%s.", node->GetName().c_str(),
- node->GetType().c_str());
- ComputeGraphPtr graph = node->GetOwnerComputeGraph();
- if (graph == nullptr) {
- REPORT_INNER_ERROR("E19999", "The owner graph of node:%s must not be null.", node->GetName().c_str());
- GELOGE(FAILED, "[Get][OwnerComputeGraph] failed, The owner graph of node:%s must not be null.",
- node->GetName().c_str());
- return FAILED;
- }
-
- is_repass_io_immediately ? AddImmediateRePassNodesWithInOut(node) : AddRePassNodesWithInOut(node);
-
- if (GraphUtils::IsolateNode(node, io_map) != GRAPH_SUCCESS) {
- REPORT_CALL_ERROR("E19999", "Isolate Node:%s failed", node->GetName().c_str());
- GELOGE(FAILED, "[Isolate][Node] %s failed.", node->GetName().c_str());
- return FAILED;
- }
-
- if (GraphUtils::RemoveNodeWithoutRelink(graph, node) != SUCCESS) {
- REPORT_CALL_ERROR("E19999", "call RemoveNodeWithoutRelink for node:%s failed.", node->GetName().c_str());
- GELOGE(FAILED, "[Call][RemoveNodeWithoutRelink] for node:%s failed.", node->GetName().c_str());
- return FAILED;
- }
-
- AddNodeDeleted(node);
- return SUCCESS;
- }
-
- Status GEPass::Run(const NamesToPass &names_to_passes) {
- if (graph_ == nullptr) {
- REPORT_INNER_ERROR("E19999", "graph_ is nullptr, check invalid.");
- GELOGE(INTERNAL_ERROR, "[Check][Param] The graph is nullptr");
- return INTERNAL_ERROR;
- }
- if (names_to_passes.empty()) {
- GELOGW("No passes input, the GEPass will do nothing");
- return INTERNAL_ERROR;
- }
- for (const auto &name_to_pass : names_to_passes) {
- if (name_to_pass.second == nullptr) {
- GELOGE(INTERNAL_ERROR, "[Check][Param] There is null pointer in passes(%s)", name_to_pass.first.c_str());
- return INTERNAL_ERROR;
- }
- }
-
- if (depth_ > kMaxRecursiveDepth) {
- GELOGE(PARAM_INVALID,
- "[Check][Param] The pass for root graph %s will be terminated because too many nesting"
- " levels(%d) of subgraphs, last subgraph is %s",
- root_graph_->GetName().c_str(), depth_, graph_->GetName().c_str());
- return PARAM_INVALID;
- }
-
- return RunPassesOneGraph(names_to_passes);
- }
-
- void NotifyPassGraphStart(const ComputeGraphPtr &graph, const NamesToPass &names_to_pass) {
- for (auto &name_to_pass : names_to_pass) {
- name_to_pass.second->OnStartPassGraph(graph);
- }
- }
-
- Status GEPass::HandleLeakedSuspendNodes(const NamesToPass &names_to_passes, GraphLevelState &g_state) {
- for (auto &name_to_pass : names_to_passes) {
- name_to_pass.second->init();
- auto ret = name_to_pass.second->OnSuspendNodesLeaked();
- if (ret != SUCCESS) {
- // todo error
- return ret;
- }
- SuspendAndResume(name_to_pass.first,
- name_to_pass.second->GetNodesSuspend(),
- name_to_pass.second->GetNodesResume(),
- g_state);
- }
- return SUCCESS;
- }
-
- Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
- GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size());
- NotifyPassGraphStart(graph_, names_to_passes);
- GraphLevelState g_state;
- g_state.re_pass_times = 0;
- GetAllNodesNoInputEdge(graph_, g_state);
- GELOGD("Start points count %zu", g_state.nodes.size());
-
- do {
- if (!g_state.nodes_suspend.empty()) {
- auto ret = HandleLeakedSuspendNodes(names_to_passes, g_state);
- if (ret != SUCCESS) {
- // todo log
- return ret;
- }
- if (g_state.nodes.empty()) {
- // todo 报错,因为suspend泄露场景,没有子类做进一步的resume,此处可能已经彻底泄露,需要报错
- return INTERNAL_ERROR;
- }
- }
- auto ret = RunPassesGraphRepass(names_to_passes, g_state);
- if (ret != SUCCESS) {
- return ret;
- }
- } while (!g_state.nodes_suspend.empty());
-
- return SUCCESS;
- }
-
-
- Status GEPass::RunPassesGraphRepass(const NamesToPass &names_to_passes, GraphLevelState &g_state) {
- RepassLevelState rp_state;
- do {
- for (auto &node : rp_state.nodes_re_pass) {
- g_state.AddNodeToQueue(node);
- }
- rp_state.nodes_re_pass.clear();
-
- while (!g_state.nodes.empty()) {
- auto node = g_state.PopFront();
-
- (void)rp_state.nodes_re_pass.erase(node); // todo 回忆一下为什么
- if (!ShouldNodePassActually(node, g_state)) {
- continue;
- }
- g_state.nodes_seen.insert(node.get()); // todo 为什么这里seen
- AddNextIterNodes(node, g_state);
-
- auto ret = RunPassesNodeOnce(node, names_to_passes, g_state, rp_state);
- if (ret != SUCCESS) {
- GELOGE(ret, "[Process][Passes] on node %s type %s failed, error code:%u", node->GetName().c_str(),
- node->GetType().c_str(), ret);
- return ret;
- }
- }
- AddLastNodesToQueue(g_state);
- } while ((!rp_state.nodes_re_pass.empty() || !g_state.nodes.empty()) && ++g_state.re_pass_times < kMaxRePassTimes);
-
- if (g_state.re_pass_times == kMaxRePassTimes) {
- GELOGW("re_pass_times should not come to %d", kMaxRePassTimes);
- }
- GELOGD("All passes runs end");
- return SUCCESS;
- }
-
- Status GEPass::RunPassesOnSubGraph(const NodePtr &node, const NamesToPass &names_to_passes, bool &has_sub_graph) {
- auto sub_graph_names = node->GetOpDesc()->GetSubgraphInstanceNames();
- has_sub_graph = false;
- for (const auto &name : sub_graph_names) {
- auto graph = root_graph_->GetSubgraph(name);
- if (graph == nullptr) {
- GELOGW("Can not find the sub graph %s from node %s, the pass-process will skip it",
- name.c_str(), node->GetName().c_str());
- continue;
- }
- has_sub_graph = true;
- GELOGI("Begin to run passes on the sub graph %s of node %s", name.c_str(), node->GetName().c_str());
- GEPass pass(graph, root_graph_, depth_ + 1);
- auto ret = pass.Run(names_to_passes);
- if (ret != SUCCESS) {
- GELOGE(ret, "[Run][Passes] for sub graph:%s from node:%s failed", name.c_str(), node->GetName().c_str());
- return ret;
- }
- }
- return SUCCESS;
- }
-
- Status GEPass::RunPassesNodeOnce(NodePtr &node, const NamesToPass &names_to_passes,
- GraphLevelState &g_state, RepassLevelState &rp_state) {
- auto ret = RunPassesOnNode(node, names_to_passes, g_state, rp_state);
- if (ret != SUCCESS) {
- GELOGE(ret, "[Process][Passes] on node %s type %s failed, error code:%u", node->GetName().c_str(),
- node->GetType().c_str(), ret);
- return ret;
- }
-
- bool has_sub_graph = false;
- ret = RunPassesOnSubGraph(node, names_to_passes, has_sub_graph);
- if (ret != SUCCESS) {
- GELOGE(ret, "[Run][Passes] on the sub graph of node %s failed", node->GetName().c_str());
- return ret;
- }
-
- if (has_sub_graph) {
- GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str());
- SetFlagOption(kOptimizeAfterSubGraph, names_to_passes);
- ret = RunPassesOnNode(node, names_to_passes, g_state, rp_state);
- if (ret != SUCCESS) {
- GELOGE(ret, "[Process][Passes] on node %s type %s failed, error code: %u", node->GetName().c_str(),
- node->GetType().c_str(), ret);
- return ret;
- }
-
- // There is only one option scene, so set and clear options around the `RunPasses` func.
- // if there are more than one scene to set options, the `ClearOption` function
- // should be called each time at the begin of the iteration
- ClearOption(names_to_passes);
- }
- return SUCCESS;
- }
-
- Status GEPass::RunPassesOnNode(NodePtr &node, const NamesToPass &names_to_passes, GraphLevelState &g_state,
- RepassLevelState &rp_state) {
- if (node == nullptr) {
- REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid.");
- GELOGE(FAILED, "[Check][Param] parameter node is nullptr.");
- return FAILED;
- }
- GELOGD("Begin to run pass for node %s", node->GetName().c_str());
- for (const auto &name_to_pass : names_to_passes) {
- GELOGD("Begin to run pass %s for node %s", name_to_pass.first.c_str(), node->GetName().c_str());
- name_to_pass.second->init();
- auto result = name_to_pass.second->Run(node);
- if (result != SUCCESS) {
- REPORT_CALL_ERROR("E19999", "process pass %s on node:%s failed, ret:%u",
- name_to_pass.first.c_str(), node->GetName().c_str(), result);
- GELOGE(INTERNAL_ERROR, "[Process][Pass] %s on node %s failed, result "
- "%u, the passes will be terminated immediately.",
- name_to_pass.first.c_str(), node->GetName().c_str(), result);
- return result;
- }
- if (name_to_pass.second->GetNodesDeleted().count(node) > 0) {
- GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(),
- name_to_pass.first.c_str());
- break;
- }
- }
-
- g_state.nodes_passed.insert(node);
-
- for (const auto &name_to_pass : names_to_passes) {
- PushToRePassIfSeen(node, name_to_pass, g_state.nodes_seen,
- name_to_pass.second->GetNodesNeedRePass(),
- rp_state.nodes_re_pass);
-
- AddImmediateRepassNodesToQueue(node, name_to_pass,
- name_to_pass.second->GetNodesNeedRePassImmediately(),
- g_state);
- SuspendAndResume(name_to_pass.first,
- name_to_pass.second->GetNodesSuspend(),
- name_to_pass.second->GetNodesResume(),
- g_state);
-
- const auto &nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted();
- g_state.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end());
- }
-
- return SUCCESS;
- }
- } // namespace ge
|