You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mds_pass.cc 7.7 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "./mds_pass.h"
  17. namespace ge {
  18. Status ModelDeploySchedulerPass::Run(ComputeGraphPtr graph) {
  19. GE_CHECK_NOTNULL(graph);
  20. compute_graph_ = graph;
  21. if (!MdsUtils::IsMDSNeeded()) {
  22. return SUCCESS;
  23. }
  24. GELOGI("[MDS][%s] start to deploy.", GetGraphName());
  25. MDS_REQUIRE_SUCCESS(SMDPProcess(), "[MDS][SMDPProcess] failed, graph_name:[%s]", GetGraphName());
  26. MDS_REQUIRE_SUCCESS(CutProcess(), "[MDS][CutProcess] failed, graph_name:[%s]", GetGraphName());
  27. MDS_REQUIRE_SUCCESS(SMDPProcess(false), "[MDS][SMDPProcess] failed, graph_name:[%s]", GetGraphName());
  28. MDS_REQUIRE_SUCCESS(SwapProcess(), "[MDS][SwapProcess] failed, graph_name:[%s]", GetGraphName());
  29. MDS_REQUIRE_SUCCESS(PiplineProcess(), "[MDS][PiplineProcess] failed, graph_name:[%s]", GetGraphName());
  30. MDS_REQUIRE_SUCCESS(SetDeployInfo(), "[MDS][SetDeployInfo] failed, graph_name:[%s]", GetGraphName());
  31. GELOGI("[MDS][%s] deploy successfully.", graph->GetName().c_str());
  32. return SUCCESS;
  33. }
  34. Status ModelDeploySchedulerPass::CutProcess() {
  35. GE_CHECK_NOTNULL(compute_graph_);
  36. if (!compute_graph_->GetAllSubgraphs().empty() || compute_graph_->GetParentGraph() != nullptr) {
  37. GELOGW("[MDS][CutProcess] graph with subgraphs is not supported now. graph_name:[%s]", GetGraphName());
  38. return SUCCESS;
  39. }
  40. auto type = MdsUtils::TryGetGraphCutType(compute_graph_);
  41. switch (type) {
  42. case kCutN:
  43. MDS_REQUIRE_SUCCESS(CutNProcessImply(compute_graph_), "[MDS][CutNProcessImply] failed, graph_name:[%s]",
  44. GetGraphName());
  45. break;
  46. case kCutH:
  47. MDS_REQUIRE_SUCCESS(CutHProcessImply(compute_graph_), "[MDS][CutHProcessImply] failed, graph_name:[%s]",
  48. GetGraphName());
  49. break;
  50. case kDynamicCutN:
  51. MDS_REQUIRE_SUCCESS(CutNProcessImply(compute_graph_, true), "[MDS][CutNProcessImply] failed, graph_name:[%s]",
  52. GetGraphName());
  53. break;
  54. case kDynamicCutH:
  55. MDS_REQUIRE_SUCCESS(CutHProcessImply(compute_graph_, true), "[MDS][CutHProcessImply] failed, graph_name:[%s]",
  56. GetGraphName());
  57. break;
  58. case kDynamicCutAll:
  59. MDS_REQUIRE_SUCCESS(DynamicCutAll(compute_graph_), "[MDS][DynamicCutAll] failed, graph_name:[%s]",
  60. GetGraphName());
  61. break;
  62. default:
  63. GELOGI("[MDS][CutProcess] could not cut, just return. graph_name:[%s]", GetGraphName());
  64. return SUCCESS;
  65. }
  66. }
  67. Status ModelDeploySchedulerPass::CutNProcessImply(const ComputeGraphPtr &compute_graph, bool is_dynamic) {
  68. GE_CHECK_NOTNULL(compute_graph);
  69. // step 0: Cut
  70. for (const auto &node : compute_graph->GetDirectNode()) {
  71. auto op_kernel = mds_cut_pass::GetKernelByType(node);
  72. if (op_kernel == nullptr) {
  73. op_kernel = DeploySchedulerKernel::Instance();
  74. }
  75. if (is_dynamic) {
  76. MDS_REQUIRE_SUCCESS(op_kernel->DynamicCutN(node), "[MDS][DYNAMIC_CUTN] failed, node:[%s]",
  77. node->GetName().c_str());
  78. } else {
  79. MDS_REQUIRE_SUCCESS(op_kernel->CutN(node), "[MDS][CUTN] failed, node:[%s]", node->GetName().c_str());
  80. }
  81. bool is_grad_compute_node = false;
  82. if (ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_GRADIENT_NODE, is_grad_compute_node) &&
  83. is_grad_compute_node) {
  84. grad_compute_nodes_.push_back(node);
  85. }
  86. }
  87. // TODO:for single output multi reference insertion allgather, allreduce nodes, do breadth fusion optimization
  88. MDS_REQUIRE_SUCCESS(HcomNodeFusionProcess(), "[MDS][CUTN][HcomNodeFusionProcess] failed, compute graph:[%s]",
  89. compute_graph->GetName().c_str());
  90. return SUCCESS;
  91. }
  92. Status ModelDeploySchedulerPass::CutHProcessImply(const ComputeGraphPtr &compute_graph, bool is_dynamic) {
  93. GE_CHECK_NOTNULL(compute_graph);
  94. for (NodePtr &node : compute_graph->GetDirectNode()) {
  95. auto op_kernel = mds_cut_pass::GetKernelByType(node);
  96. if (op_kernel == nullptr) {
  97. op_kernel = DeploySchedulerKernel::Instance();
  98. }
  99. if (is_dynamic) {
  100. MDS_REQUIRE_SUCCESS(op_kernel->DynamicCutH(node), "[MDS][DYNAMIC_CUTH] failed, node:[%s]",
  101. node->GetName().c_str());
  102. } else {
  103. MDS_REQUIRE_SUCCESS(op_kernel->CutH(node), "[MDS][CUTH] failed, node:[%s]", node->GetName().c_str());
  104. }
  105. }
  106. return SUCCESS;
  107. }
  108. Status ModelDeploySchedulerPass::DynamicCutAll(const ComputeGraphPtr &compute_graph) {
  109. std::vector<NodePtr> input_nodes;
  110. std::vector<NodePtr> output_nodes;
  111. auto compute_graph0 = GraphUtils::CloneGraph(compute_graph, "", input_nodes, output_nodes);
  112. auto compute_graph1 = GraphUtils::CloneGraph(compute_graph, "", input_nodes, output_nodes);
  113. MDS_REQUIRE_SUCCESS(CutNProcessImply(compute_graph0, true), "[MDS][CutNProcessImply] failed, graph_name:[%s]",
  114. compute_graph0->GetName().c_str());
  115. MDS_REQUIRE_SUCCESS(CutHProcessImply(compute_graph1, true), "[MDS][CutHProcessImply] failed, graph_name:[%s]",
  116. compute_graph1->GetName().c_str());
  117. // TODO:Create a case node, put the two graphs under the two branches of case
  118. return SUCCESS;
  119. }
  120. Status ModelDeploySchedulerPass::SMDPProcess(bool before_cut) {
  121. if (before_cut) {
  122. MDS_REQUIRE_SUCCESS(SMDPModelState(), "[SMDPProcess][SMDPModelState] failed, graph_name:[%s]", GetGraphName());
  123. MDS_REQUIRE_SUCCESS(SMDPWeight(), "[SMDPProcess][SMDPWeight] failed, graph_name:[%s]", GetGraphName());
  124. } else {
  125. MDS_REQUIRE_SUCCESS(SMDPGradient(), "[SMDPProcess][SMDPGradient] failed, graph_name:[%s]", GetGraphName());
  126. }
  127. return SUCCESS;
  128. }
  129. Status ModelDeploySchedulerPass::SetDeployInfo() {
  130. vector<GeAttrValue::NAMED_ATTRS> deployInfo;
  131. REQUIRE(!ge::AttrUtils::GetListNamedAttrs(compute_graph_, ATTR_NAME_DEPLOY_INFO, deployInfo),
  132. "%s already has deployed before!", GetGraphName());
  133. std::multimap<DeviceId, GraphInputs> deploys;
  134. for (int64_t j = 0; j < kDeployNumber; j++) {
  135. int64_t device_id = j;
  136. GraphInputs graph_inputs;
  137. // For now, only one input_node in input_nodes
  138. for (const auto &input_node : MdsUtils::GetInputNodes()) {
  139. GE_CHECK_NOTNULL(input_node);
  140. GeTensorPtr graph_input = MakeShared<GeTensor>(input_node->GetOpDesc()->GetOutputDesc(0));
  141. vector<uint8_t> data{static_cast<uint8_t>(device_id)};
  142. graph_input->SetData(data);
  143. graph_inputs.push_back(graph_input);
  144. }
  145. deploys.emplace(j, graph_inputs);
  146. }
  147. return MdsUtils::SetDeployInfo(compute_graph_, deploys);
  148. }
  149. Status ModelDeploySchedulerPass::SwapProcess() {
  150. return SUCCESS;
  151. }
  152. Status ModelDeploySchedulerPass::PiplineProcess() {
  153. return SUCCESS;
  154. }
  155. Status ModelDeploySchedulerPass::HcomNodeFusionProcess() {
  156. return SUCCESS;
  157. }
  158. Status ModelDeploySchedulerPass::SMDPModelState() {
  159. return SUCCESS;
  160. }
  161. Status ModelDeploySchedulerPass::SMDPWeight() {
  162. return SUCCESS;
  163. }
  164. Status ModelDeploySchedulerPass::SMDPGradient() {
  165. return SUCCESS;
  166. }
  167. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示