You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

engine_place.cc 5.7 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/partition/engine_place.h"
  17. #include <mutex>
  18. #include "framework/common/op/ge_op_utils.h"
  19. #include "graph/utils/graph_utils.h"
  20. #include "graph/utils/op_desc_utils.h"
  21. #include "init/gelib.h"
  22. namespace ge {
  23. namespace {
  24. std::mutex check_support_cost_mutex;
  25. }
  26. Status EnginePlacer::Check() const {
  27. if (compute_graph_ == nullptr) {
  28. REPORT_INNER_ERROR("E19999", "compute_graph_ is nullptr, check invalid.");
  29. GELOGE(GE_GRAPH_NULL_INPUT, "[Check][Param] compute_graph_ is nullptr.");
  30. return FAILED;
  31. }
  32. std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  33. if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
  34. REPORT_INNER_ERROR("E19999", "GELib instance is nullptr or it is not InitFlag, check invalid.");
  35. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib] Run enginePlacer failed, because GELib is invalid.");
  36. return FAILED;
  37. }
  38. return SUCCESS;
  39. }
  40. Status EnginePlacer::Run(bool direct_node_flag) {
  41. std::lock_guard<std::mutex> lock(check_support_cost_mutex);
  42. GELOGD("Engine placer starts.");
  43. if (Check() != SUCCESS) {
  44. return FAILED;
  45. }
  46. bool is_check_support_success = true;
  47. // Assign engine for each node in the graph
  48. GELib::GetInstance()->DNNEngineManagerObj().InitPerformanceStatistic();
  49. for (const auto &node_ptr : compute_graph_->GetNodes(direct_node_flag)) {
  50. GE_CHECK_NOTNULL(node_ptr);
  51. auto op_desc = node_ptr->GetOpDesc();
  52. GE_CHECK_NOTNULL(op_desc);
  53. std::string engine_name;
  54. std::string kernel_name;
  55. // Check if this node has assigned engine
  56. bool has_engine_attr =
  57. AttrUtils::GetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, engine_name) && !engine_name.empty();
  58. bool has_kernel_attr =
  59. AttrUtils::GetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name) && !kernel_name.empty();
  60. bool use_exist_engine_name = !op_desc->GetOpKernelLibName().empty() || (has_kernel_attr && has_engine_attr);
  61. if (use_exist_engine_name) {
  62. if (op_desc->GetOpEngineName().empty()) {
  63. GELOGI("Op %s set engine_name %s engine_name %s from attrs", op_desc->GetName().c_str(), engine_name.c_str(),
  64. kernel_name.c_str());
  65. op_desc->SetOpEngineName(engine_name);
  66. op_desc->SetOpKernelLibName(kernel_name);
  67. }
  68. engine_name = op_desc->GetOpEngineName();
  69. } else {
  70. // Call placer cost model to get the "best" engine for this node
  71. engine_name = GELib::GetInstance()->DNNEngineManagerObj().GetDNNEngineName(node_ptr);
  72. // If can't get op's engine name, keep check support finish and return failed
  73. if (engine_name.empty()) {
  74. is_check_support_success = false;
  75. ErrorManager::GetInstance().ATCReportErrMessage(
  76. "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()});
  77. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][Param] Can not find engine of op name %s type %s",
  78. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  79. continue;
  80. }
  81. }
  82. // Record the node assigned atomic_engine name
  83. GELOGD("Assigning DNNEngine %s to node %s, op type %s", engine_name.c_str(), node_ptr->GetName().c_str(),
  84. node_ptr->GetType().c_str());
  85. node_atomic_engine_map_.insert(std::make_pair(node_ptr, engine_name));
  86. }
  87. for (auto &it : GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) {
  88. GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second);
  89. }
  90. GELOGD("Engine placer ends.");
  91. return is_check_support_success ? SUCCESS : FAILED;
  92. }
  93. Status EnginePlacer::AssignCompoundEngine() {
  94. if (GELib::GetInstance()->DNNEngineManagerObj().GetCompoundEngineContains().empty()) {
  95. GELOGI("No compound engine registers, ignore assign compound engine");
  96. return SUCCESS;
  97. }
  98. std::vector<ComputeGraphPtr> subgraphs;
  99. if (GraphUtils::GetSubgraphs(compute_graph_, subgraphs) != GRAPH_SUCCESS) {
  100. REPORT_CALL_ERROR("E19999", "Get subgraphs contained in graph %s failed", compute_graph_->GetName().c_str());
  101. GELOGE(FAILED, "[Get][Subgraphs] Get subgraphs contained in graph %s failed", compute_graph_->GetName().c_str());
  102. return FAILED;
  103. }
  104. for (const auto &subgraph : subgraphs) {
  105. (void)subgraph->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME);
  106. }
  107. std::reverse(subgraphs.begin(), subgraphs.end());
  108. subgraphs.emplace_back(compute_graph_);
  109. for (const auto &subgraph : subgraphs) {
  110. for (const auto &node : subgraph->GetDirectNode()) {
  111. std::string compound_engine_name = GELib::GetInstance()->DNNEngineManagerObj().GetCompoundEngineName(node, 1);
  112. GELOGD("Assign compound engine %s to node %s, op type %s", compound_engine_name.c_str(),
  113. node->GetName().c_str(), node->GetType().c_str());
  114. node_compound_engine_map_.insert(std::make_pair(node, compound_engine_name));
  115. }
  116. }
  117. return SUCCESS;
  118. }
  119. const NodeEngineMap *EnginePlacer::GetNodeEngineMap(bool compound_engine_flag) const {
  120. return compound_engine_flag ? &node_compound_engine_map_ : &node_atomic_engine_map_;
  121. }
  122. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示