You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mds_pass.cc 8.1 kB

4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "./mds_pass.h"
  17. namespace ge {
  18. Status ModelDeploySchedulerPass::Run(ComputeGraphPtr graph) {
  19. GE_CHECK_NOTNULL(graph);
  20. compute_graph_ = graph;
  21. if (!MdsUtils::IsMDSNeeded()) {
  22. return SUCCESS;
  23. }
  24. GELOGI("[MDS][%s] start to deploy.", GetGraphName());
  25. MDS_REQUIRE_SUCCESS(SMDPProcess(), "[MDS][SMDPProcess] failed, graph_name:[%s]", GetGraphName());
  26. MDS_REQUIRE_SUCCESS(CutProcess(), "[MDS][CutProcess] failed, graph_name:[%s]", GetGraphName());
  27. MDS_REQUIRE_SUCCESS(SMDPProcess(false), "[MDS][SMDPProcess] failed, graph_name:[%s]", GetGraphName());
  28. MDS_REQUIRE_SUCCESS(SwapProcess(), "[MDS][SwapProcess] failed, graph_name:[%s]", GetGraphName());
  29. MDS_REQUIRE_SUCCESS(PiplineProcess(), "[MDS][PiplineProcess] failed, graph_name:[%s]", GetGraphName());
  30. MDS_REQUIRE_SUCCESS(SetDeployInfo(), "[MDS][SetDeployInfo] failed, graph_name:[%s]", GetGraphName());
  31. GELOGI("[MDS][%s] deploy successfully.", graph->GetName().c_str());
  32. return SUCCESS;
  33. }
  34. Status ModelDeploySchedulerPass::CutProcess() {
  35. GE_CHECK_NOTNULL(compute_graph_);
  36. if (!compute_graph_->GetAllSubgraphs().empty() || compute_graph_->GetParentGraph() != nullptr) {
  37. GELOGW("[MDS][CutProcess] graph with subgraphs is not supported now. graph_name:[%s]", GetGraphName());
  38. return SUCCESS;
  39. }
  40. auto type = MdsUtils::TryGetGraphCutType(compute_graph_);
  41. switch (type) {
  42. case kCutN:MDS_REQUIRE_SUCCESS(CutNProcessImply(compute_graph_),
  43. "[MDS][CutNProcessImply] failed, graph_name:[%s]",
  44. GetGraphName());
  45. break;
  46. case kCutH:MDS_REQUIRE_SUCCESS(CutHProcessImply(compute_graph_),
  47. "[MDS][CutHProcessImply] failed, graph_name:[%s]",
  48. GetGraphName());
  49. break;
  50. case kDynamicCutN:MDS_REQUIRE_SUCCESS(CutNProcessImply(compute_graph_, true),
  51. "[MDS][CutNProcessImply] failed, graph_name:[%s]",
  52. GetGraphName());
  53. break;
  54. case kDynamicCutH:MDS_REQUIRE_SUCCESS(CutHProcessImply(compute_graph_, true),
  55. "[MDS][CutHProcessImply] failed, graph_name:[%s]",
  56. GetGraphName());
  57. break;
  58. case kDynamicCutAll:MDS_REQUIRE_SUCCESS(DynamicCutAll(compute_graph_),
  59. "[MDS][DynamicCutAll] failed, graph_name:[%s]",
  60. GetGraphName());
  61. break;
  62. default:GELOGI("[MDS][CutProcess] could not cut, just return. graph_name:[%s]", GetGraphName());
  63. return SUCCESS;
  64. }
  65. }
  66. Status ModelDeploySchedulerPass::CutNProcessImply(const ComputeGraphPtr &compute_graph, bool is_dynamic) {
  67. GE_CHECK_NOTNULL(compute_graph);
  68. // step 0: Cut
  69. for (const auto &node : compute_graph->GetDirectNode()) {
  70. auto op_kernel = mds_cut_pass::GetKernelByType(node);
  71. if (op_kernel == nullptr) {
  72. op_kernel = DeploySchedulerKernel::Instance();
  73. }
  74. if (is_dynamic) {
  75. MDS_REQUIRE_SUCCESS(op_kernel->DynamicCutN(node),
  76. "[MDS][DYNAMIC_CUTN] failed, node:[%s]",
  77. node->GetName().c_str());
  78. } else {
  79. MDS_REQUIRE_SUCCESS(op_kernel->CutN(node), "[MDS][CUTN] failed, node:[%s]", node->GetName().c_str());
  80. }
  81. bool is_grad_compute_node = false;
  82. if (ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_GRADIENT_NODE, is_grad_compute_node)
  83. && is_grad_compute_node) {
  84. grad_compute_nodes_.push_back(node);
  85. }
  86. }
  87. //TODO:针对单输出多引用插入的allgather,allreduce节点做广度融合优化
  88. MDS_REQUIRE_SUCCESS(HcomNodeFusionProcess(),
  89. "[MDS][CUTN][HcomNodeFusionProcess] failed, compute graph:[%s]",
  90. compute_graph->GetName().c_str());
  91. return SUCCESS;
  92. }
  93. Status ModelDeploySchedulerPass::CutHProcessImply(const ComputeGraphPtr &compute_graph, bool is_dynamic) {
  94. GE_CHECK_NOTNULL(compute_graph);
  95. for (NodePtr &node : compute_graph->GetDirectNode()) {
  96. auto op_kernel = mds_cut_pass::GetKernelByType(node);
  97. if (op_kernel == nullptr) {
  98. op_kernel = DeploySchedulerKernel::Instance();
  99. }
  100. if (is_dynamic) {
  101. MDS_REQUIRE_SUCCESS(op_kernel->DynamicCutH(node),
  102. "[MDS][DYNAMIC_CUTH] failed, node:[%s]",
  103. node->GetName().c_str());
  104. } else {
  105. MDS_REQUIRE_SUCCESS(op_kernel->CutH(node), "[MDS][CUTH] failed, node:[%s]", node->GetName().c_str());
  106. }
  107. }
  108. return SUCCESS;
  109. }
  110. Status ModelDeploySchedulerPass::DynamicCutAll(const ComputeGraphPtr &compute_graph) {
  111. std::vector<NodePtr> input_nodes;
  112. std::vector<NodePtr> output_nodes;
  113. auto compute_graph0 = GraphUtils::CloneGraph(compute_graph, "", input_nodes, output_nodes);
  114. auto compute_graph1 = GraphUtils::CloneGraph(compute_graph, "", input_nodes, output_nodes);
  115. MDS_REQUIRE_SUCCESS(CutNProcessImply(compute_graph0, true),
  116. "[MDS][CutNProcessImply] failed, graph_name:[%s]",
  117. compute_graph0->GetName().c_str());
  118. MDS_REQUIRE_SUCCESS(CutHProcessImply(compute_graph1, true),
  119. "[MDS][CutHProcessImply] failed, graph_name:[%s]",
  120. compute_graph1->GetName().c_str());
  121. //TODO:创建case节点,把两个图放在case的两个分支下,case节点添加到原来的compute_graph中,构造case节点的输入
  122. return SUCCESS;
  123. }
  124. Status ModelDeploySchedulerPass::SMDPProcess(bool before_cut) {
  125. if (before_cut) {
  126. MDS_REQUIRE_SUCCESS(SMDPModelState(), "[SMDPProcess][SMDPModelState] failed, graph_name:[%s]", GetGraphName());
  127. MDS_REQUIRE_SUCCESS(SMDPWeight(), "[SMDPProcess][SMDPWeight] failed, graph_name:[%s]", GetGraphName());
  128. } else {
  129. MDS_REQUIRE_SUCCESS(SMDPGradient(), "[SMDPProcess][SMDPGradient] failed, graph_name:[%s]", GetGraphName());
  130. }
  131. return SUCCESS;
  132. }
  133. Status ModelDeploySchedulerPass::SetDeployInfo() {
  134. vector<GeAttrValue::NAMED_ATTRS> deployInfo;
  135. REQUIRE (!ge::AttrUtils::GetListNamedAttrs(compute_graph_, ATTR_NAME_DEPLOY_INFO, deployInfo),
  136. "%s already has deployed before!",
  137. GetGraphName());
  138. std::multimap<DeviceId, GraphInputs> deploys;
  139. for (int64_t j = 0; j < kDeployNumber; j++) {
  140. int64_t device_id = j;
  141. GraphInputs graph_inputs;
  142. // For now, only one input_node in input_nodes
  143. for (const auto &input_node : MdsUtils::GetInputNodes()) {
  144. GE_CHECK_NOTNULL(input_node);
  145. GeTensorPtr graph_input = MakeShared<GeTensor>(input_node->GetOpDesc()->GetOutputDesc(0));
  146. vector<uint8_t> data{static_cast<uint8_t>(device_id)};
  147. graph_input->SetData(data);
  148. graph_inputs.push_back(graph_input);
  149. }
  150. deploys.emplace(j, graph_inputs);
  151. }
  152. return MdsUtils::SetDeployInfo(compute_graph_, deploys);
  153. }
  154. Status ModelDeploySchedulerPass::SwapProcess() {
  155. return SUCCESS;
  156. }
  157. Status ModelDeploySchedulerPass::PiplineProcess() {
  158. return SUCCESS;
  159. }
  160. Status ModelDeploySchedulerPass::HcomNodeFusionProcess() {
  161. return SUCCESS;
  162. }
  163. Status ModelDeploySchedulerPass::SMDPModelState() {
  164. return SUCCESS;
  165. }
  166. Status ModelDeploySchedulerPass::SMDPWeight() {
  167. return SUCCESS;
  168. }
  169. Status ModelDeploySchedulerPass::SMDPGradient() {
  170. //TDOD:标识buffer poolid
  171. return SUCCESS;
  172. }
  173. }

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示