You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_builder.cc 34 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/graph_builder.h"
  17. #include "graph/build/memory/graph_mem_assigner.h"
  18. #include "common/ge/ge_util.h"
  19. #include "common/helper/model_helper.h"
  20. #include "graph/build/logical_stream_allocator.h"
  21. #include "graph/build/run_context.h"
  22. #include "graph/build/stream_graph_optimizer.h"
  23. #include "graph/common/ge_call_wrapper.h"
  24. #include "graph/ge_context.h"
  25. #include "graph/manager/graph_var_manager.h"
  26. #include "graph/passes/mark_same_addr_pass.h"
  27. #include "graph/utils/node_utils.h"
  28. #include "graph/utils/type_utils.h"
  29. #include "init/gelib.h"
  30. #include "model/ge_model.h"
  31. #include "graph/ge_context.h"
  32. #include "opskernel_manager/ops_kernel_builder_manager.h"
  33. #include "graph/utils/op_desc_utils.h"
  34. using domi::BuildMode;
  35. namespace {
  36. const int32_t kInvalidPerfLevel = -1;
  37. enum NodeType { kSubgraphData, kSubgraphNode, kOthers };
  38. } // namespace
  39. namespace ge {
  40. NodeType TransferNodeType(const NodePtr &node) {
  41. const std::string type = node->GetType();
  42. if (type == ge::DATA) {
  43. if (node->GetOwnerComputeGraph()->GetParentNode() == nullptr) {
  44. GELOGD("access src data node:%s", node->GetName().c_str());
  45. return kOthers;
  46. }
  47. GELOGD("access subgraph input node:%s", node->GetName().c_str());
  48. return kSubgraphData;
  49. } else if (type == PARTITIONEDCALL) {
  50. GELOGD("access subgraph node:%s", node->GetName().c_str());
  51. return kSubgraphNode;
  52. }
  53. GELOGD("access other node:%s", node->GetName().c_str());
  54. return kOthers;
  55. }
  56. Status HandleSubgraphNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) {
  57. auto subgraph = NodeUtils::GetSubgraph(*src_node, 0);
  58. GE_CHECK_NOTNULL(subgraph);
  59. const NodePtr &net_output_node = subgraph->FindFirstNodeMatchType(NETOUTPUT);
  60. GE_CHECK_NOTNULL(net_output_node);
  61. const InDataAnchorPtr &in_data_anchor = net_output_node->GetInDataAnchor(src_out_anchor->GetIdx());
  62. GE_CHECK_NOTNULL(in_data_anchor);
  63. const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  64. GE_CHECK_NOTNULL(peer_out_anchor);
  65. src_node = peer_out_anchor->GetOwnerNode();
  66. src_out_anchor = peer_out_anchor;
  67. return SUCCESS;
  68. }
  69. Status HandleSubgraphDataNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) {
  70. uint32_t index = 0;
  71. if (!AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, index)) {
  72. GELOGE(FAILED, "Get attr ATTR_NAME_PARENT_NODE_INDEX failed, node:%s.", src_node->GetName().c_str());
  73. return FAILED;
  74. }
  75. const NodePtr &parent_node = src_node->GetOwnerComputeGraph()->GetParentNode();
  76. GE_CHECK_NOTNULL(parent_node);
  77. const InDataAnchorPtr &in_data_anchor = parent_node->GetInDataAnchor(index);
  78. GE_CHECK_NOTNULL(in_data_anchor);
  79. const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  80. GE_CHECK_NOTNULL(peer_out_anchor);
  81. src_node = peer_out_anchor->GetOwnerNode();
  82. src_out_anchor = peer_out_anchor;
  83. return SUCCESS;
  84. }
  85. GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {}
  86. void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) {
  87. stream_max_parallel_num_ = options.stream_max_parallel_num;
  88. hcom_parallel_ = options.hcom_parallel;
  89. if (options.perf_level == kInvalidPerfLevel) {
  90. build_mode_ = static_cast<int>(BuildMode::GEN_TASK_WITH_FUSION);
  91. } else {
  92. build_mode_ = options.perf_level;
  93. }
  94. }
  95. Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
  96. GE_CHECK_NOTNULL(graph);
  97. auto instance_ptr = ge::GELib::GetInstance();
  98. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  99. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GraphBuilder: GE is not initialized");
  100. return GE_CLI_GE_NOT_INITIALIZED;
  101. }
  102. for (const auto &node_ptr : graph->GetNodes(graph->GetGraphUnknownFlag())) {
  103. GE_CHECK_NOTNULL(node_ptr->GetOpDesc());
  104. std::string kernel_lib_name = node_ptr->GetOpDesc()->GetOpKernelLibName();
  105. if (kernel_lib_name.empty()) {
  106. // reset op kernel lib
  107. (void)instance_ptr->DNNEngineManagerObj().GetDNNEngineName(node_ptr);
  108. kernel_lib_name = node_ptr->GetOpDesc()->GetOpKernelLibName();
  109. if (kernel_lib_name.empty()) {
  110. GELOGE(INTERNAL_ERROR, "Get node:%s(%s) kernel lib failed.", node_ptr->GetName().c_str(),
  111. node_ptr->GetType().c_str());
  112. return INTERNAL_ERROR;
  113. }
  114. }
  115. auto ret = SetInputSize(node_ptr);
  116. if (ret != SUCCESS) {
  117. GELOGE(ret, "Set node inputDesc size failed, node name is %s", node_ptr->GetName().c_str());
  118. return ret;
  119. }
  120. ret = OpsKernelBuilderManager::Instance().CalcOpRunningParam(*node_ptr);
  121. if (ret != SUCCESS) {
  122. GELOGE(ret, "Calculate op running param failed, node name is %s", node_ptr->GetName().c_str());
  123. return ret;
  124. }
  125. GE_CHK_STATUS_RET(AddOutputMemTypeForNode(node_ptr));
  126. }
  127. auto parent_node = graph->GetParentNode();
  128. if (parent_node == nullptr) {
  129. return SUCCESS;
  130. }
  131. GE_CHK_STATUS_RET(UpdateParentNodeOutputSize(graph, parent_node));
  132. GELOGI("Success to calculate op running param.");
  133. return SUCCESS;
  134. }
  135. Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr) {
  136. GELOGI("Begin to update parent node[%s] of graph[%s] output size.", parent_node_ptr->GetName().c_str(),
  137. graph->GetName().c_str());
  138. auto parent_op_desc = parent_node_ptr->GetOpDesc();
  139. GE_CHECK_NOTNULL(parent_op_desc);
  140. bool is_unknown_shape = graph->GetGraphUnknownFlag();
  141. if (is_unknown_shape) {
  142. GELOGI("Current graph[%s] is unknown, no need to update parent node[%s] output size.", graph->GetName().c_str(),
  143. parent_node_ptr->GetName().c_str());
  144. return SUCCESS;
  145. }
  146. for (const auto &node_ptr : graph->GetDirectNode()) {
  147. if (node_ptr->GetType() != NETOUTPUT) {
  148. continue;
  149. }
  150. auto op_desc = node_ptr->GetOpDesc();
  151. GE_CHECK_NOTNULL(op_desc);
  152. for (const auto &in_data_anchor : node_ptr->GetAllInDataAnchors()) {
  153. auto index = in_data_anchor->GetIdx();
  154. ge::GeTensorDesc desc_temp = op_desc->GetInputDesc(index);
  155. uint32_t parent_index = 0;
  156. if (!AttrUtils::GetInt(desc_temp, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  157. GELOGI("NetOutput input tensor %d, attr %s not found.", index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
  158. continue;
  159. }
  160. int64_t size = 0;
  161. GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc_temp, size) != SUCCESS, GELOGI("Get size failed!"));
  162. ge::GeTensorDesc parent_desc_temp = parent_op_desc->GetOutputDesc(parent_index);
  163. ge::TensorUtils::SetSize(parent_desc_temp, size);
  164. GE_CHK_STATUS_RET(parent_op_desc->UpdateOutputDesc(parent_index, parent_desc_temp));
  165. GELOGI("Update parent node[%s] output index[%u] to size[%ld].", parent_node_ptr->GetName().c_str(), parent_index,
  166. size);
  167. }
  168. }
  169. return SUCCESS;
  170. }
  171. Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
  172. GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
  173. if (comp_graph == nullptr) {
  174. GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null.");
  175. return GE_GRAPH_PARAM_NULLPTR;
  176. }
  177. ge_root_model_ptr = MakeShared<ge::GeRootModel>(comp_graph);
  178. if (ge_root_model_ptr == nullptr) {
  179. return MEMALLOC_FAILED;
  180. }
  181. GeModelPtr ge_model_ptr = nullptr;
  182. if (comp_graph->GetGraphUnknownFlag()) {
  183. GE_CHK_STATUS_RET(
  184. BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id),
  185. "Build for dynamic shape graph failed.");
  186. return SUCCESS;
  187. }
  188. GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id),
  189. "Build for known shape graph failed.");
  190. ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr);
  191. return SUCCESS;
  192. }
  193. Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list,
  194. GeModelPtr &ge_model_ptr, uint64_t session_id) {
  195. if (ge::GetContext().GetHostExecFlag()) {
  196. GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed.");
  197. return SUCCESS;
  198. }
  199. GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str());
  200. Status ret = SecondPartition(comp_graph, subgraph_list);
  201. GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str());
  202. auto subgraph_map = graph_partitioner_.GetSubGraphMap();
  203. GE_TIMESTAMP_START(BuildSubgraph);
  204. ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
  205. GE_DUMP(comp_graph, "BeforePreBuildModel");
  206. GE_TIMESTAMP_START(PreBuildModel);
  207. GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.",
  208. comp_graph->GetName().c_str());
  209. GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel");
  210. GE_DUMP(comp_graph, "AfterPreBuildModel");
  211. GE_TIMESTAMP_START(CalcOpParam);
  212. GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.",
  213. comp_graph->GetName().c_str());
  214. GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam");
  215. GE_DUMP(comp_graph, "AfterCalcOpParam");
  216. ModelPtr model_ptr = MakeShared<ge::Model>();
  217. if (model_ptr == nullptr) {
  218. return MEMALLOC_FAILED;
  219. }
  220. GE_TIMESTAMP_START(BuildModelForGetTask);
  221. GE_CHK_STATUS_RET(builder.BuildModelForGetTask(*model_ptr), "Graph[%s] builder BuildModelForGetTask() return fail.",
  222. comp_graph->GetName().c_str());
  223. GE_TIMESTAMP_END(BuildModelForGetTask, "GraphBuilder::BuildModelForGetTask");
  224. GE_DUMP(comp_graph, "AfterBuildModel");
  225. GE_TIMESTAMP_START(GetTaskInfo);
  226. ret = GetTaskInfo(builder, model_ptr, comp_graph, subgraph_map, session_id);
  227. GE_TIMESTAMP_END(GetTaskInfo, "GraphBuilder::GetTaskInfo");
  228. GE_DUMP(comp_graph, "AfterGetTask");
  229. if (ret != SUCCESS) {
  230. GELOGE(ret, "Graph[%s] builder GetTaskInfo() return fail.", comp_graph->GetName().c_str());
  231. return ret;
  232. }
  233. ge_model_ptr = MakeShared<ge::GeModel>();
  234. if (ge_model_ptr == nullptr) {
  235. return MEMALLOC_FAILED;
  236. }
  237. GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr),
  238. "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str());
  239. GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str());
  240. GE_TIMESTAMP_END(BuildSubgraph, "GraphBuilder::Build");
  241. return SUCCESS;
  242. }
  243. Status GraphBuilder::SetConstantInputOffset(ComputeGraphPtr &comp_graph) {
  244. for (auto &node : comp_graph->GetDirectNode()) {
  245. GE_CHECK_NOTNULL(node);
  246. auto op_desc = node->GetOpDesc();
  247. GE_CHECK_NOTNULL(op_desc);
  248. auto num_inputs = op_desc->GetInputsSize();
  249. std::vector<int64_t> input_offsets(num_inputs, 0);
  250. int valid_input_index = -1;
  251. for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) {
  252. auto in_anchor = node->GetInDataAnchor(i);
  253. auto peer_out_anchor = in_anchor->GetPeerOutAnchor();
  254. if (peer_out_anchor == nullptr) {
  255. continue;
  256. }
  257. ++valid_input_index;
  258. auto peer_node = peer_out_anchor->GetOwnerNode();
  259. if (peer_node == nullptr) {
  260. continue;
  261. }
  262. if (peer_node->GetType() != CONSTANT) {
  263. continue;
  264. }
  265. std::vector<GeTensorPtr> weights = OpDescUtils::MutableWeights(peer_node);
  266. if (weights.empty()) {
  267. GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str());
  268. return FAILED;
  269. }
  270. GeTensorPtr weight = weights[0];
  271. GE_CHECK_NOTNULL(weight);
  272. int64_t input_offset = 0;
  273. (void) TensorUtils::GetDataOffset(weight->MutableTensorDesc(), input_offset);
  274. // valid_input_index must smaller than num_inputs
  275. input_offsets[valid_input_index] = input_offset;
  276. GELOGD("[%s] input[%u] is const, offset = %ld", node->GetName().c_str(), valid_input_index, input_offset);
  277. }
  278. op_desc->SetInputOffset(input_offsets);
  279. std::vector<int64_t> output_offsets(op_desc->GetOutputsSize(), 0);
  280. op_desc->SetOutputOffset(output_offsets);
  281. }
  282. return SUCCESS;
  283. }
  284. Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
  285. uint64_t session_id) {
  286. GELOGI("Begin to build unknown shape graph[%s].", comp_graph->GetName().c_str());
  287. Graph2SubGraphInfoList subgraph_map;
  288. ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_);
  289. GE_DUMP(comp_graph, "BeforePreBuildModel");
  290. GE_TIMESTAMP_START(PreBuildModel);
  291. GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.",
  292. comp_graph->GetName().c_str());
  293. GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel");
  294. GE_DUMP(comp_graph, "AfterPreBuildModel");
  295. GE_TIMESTAMP_START(CalcOpParam);
  296. GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.",
  297. comp_graph->GetName().c_str());
  298. GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam");
  299. GE_DUMP(comp_graph, "AfterCalcOpParam");
  300. GE_TIMESTAMP_START(SetConstantInputOffset);
  301. GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph),
  302. "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str());
  303. GE_TIMESTAMP_END(SetConstantInputOffset, "GraphBuilder::SetConstantInputOffset");
  304. GE_TIMESTAMP_START(MergeWeights);
  305. GE_CHK_STATUS_RET(builder.MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str());
  306. GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights");
  307. ModelPtr model_ptr = MakeShared<ge::Model>();
  308. if (model_ptr == nullptr) {
  309. return MEMALLOC_FAILED;
  310. }
  311. GE_TIMESTAMP_START(BuildModelForGetDynShapeTask);
  312. GE_CHK_STATUS_RET(builder.BuildModelForGetDynShapeTask(*model_ptr),
  313. "Graph[%s] builder BuildModelForGetDynShapeTask() return fail.", comp_graph->GetName().c_str());
  314. GE_TIMESTAMP_END(BuildModelForGetDynShapeTask, "GraphBuilder::BuildModelForGetDynShapeTask");
  315. GE_TIMESTAMP_START(GetTaskInfo);
  316. Status ret = GetTaskInfo(builder, model_ptr, comp_graph, subgraph_map, session_id);
  317. GE_TIMESTAMP_END(GetTaskInfo, "GraphBuilder::GetTaskInfo");
  318. GraphUtils::DumpGEGraph(comp_graph, "AfterGetTask");
  319. GraphUtils::DumpGEGraphToOnnx(*comp_graph, "AfterGetTask");
  320. if (ret != SUCCESS) {
  321. GELOGE(ret, "Graph[%s] builder GetTaskInfo() return fail.", comp_graph->GetName().c_str());
  322. return ret;
  323. }
  324. ge_model_ptr = MakeShared<ge::GeModel>();
  325. if (ge_model_ptr == nullptr) {
  326. return MEMALLOC_FAILED;
  327. }
  328. GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr),
  329. "Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str());
  330. GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str());
  331. return SUCCESS;
  332. }
  333. Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) {
  334. return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
  335. }
  336. static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
  337. const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
  338. GE_CHECK_NOTNULL(out_anchor);
  339. NodePtr in_node = out_anchor->GetOwnerNode();
  340. GE_CHECK_NOTNULL(in_node);
  341. OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC);
  342. OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
  343. .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
  344. .Build();
  345. (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
  346. if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
  347. GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
  348. return FAILED;
  349. }
  350. return SUCCESS;
  351. }
  352. static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
  353. for (auto &node : graph->GetDirectNode()) {
  354. // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
  355. auto op_desc = node->GetOpDesc();
  356. if (op_desc == nullptr) {
  357. continue;
  358. }
  359. auto op_type = op_desc->GetType();
  360. if (op_type == NETOUTPUT) {
  361. for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
  362. const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  363. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  364. NodePtr in_node = peer_out_anchor->GetOwnerNode();
  365. GE_CHECK_NOTNULL(in_node);
  366. std::string in_node_op_type = in_node->GetType();
  367. if (in_node_op_type == CONSTANT) {
  368. GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
  369. std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
  370. if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
  371. GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
  372. in_node->GetName().c_str(), node->GetName().c_str());
  373. return FAILED;
  374. }
  375. }
  376. }
  377. }
  378. }
  379. return SUCCESS;
  380. }
  381. Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
  382. bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
  383. com_graph->SetGraphUnknownFlag(false);
  384. GELOGD("Start to mark profiling task attr for fp and bp.");
  385. TaskGenerator task_generator;
  386. ProfilingPoint profiling_point;
  387. std::vector<uint32_t> all_reduce_node_index;
  388. Status ret = task_generator.FindProfilingNodeIndex(com_graph, profiling_point, all_reduce_node_index);
  389. com_graph->SetGraphUnknownFlag(original_unknown_shape_flag);
  390. if (ret != SUCCESS) {
  391. GELOGW("Find profiling node index failed.");
  392. }
  393. if (profiling_point.fp_index == 0 || profiling_point.bp_index == 0 || profiling_point.end_index.empty()) {
  394. GELOGD("No need to mark fp bp profiling task attr.");
  395. return SUCCESS;
  396. }
  397. // mark profiling task attr for node
  398. uint32_t node_index = 0;
  399. for (const auto &node : com_graph->GetAllNodes()) {
  400. OpDescPtr op_desc = node->GetOpDesc();
  401. GE_CHECK_NOTNULL(node->GetOpDesc());
  402. node_index++;
  403. if (profiling_point.fp_index == node_index) {
  404. GELOGI("The first fp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
  405. (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true);
  406. }
  407. if (profiling_point.bp_index == node_index) {
  408. GELOGI("The bp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
  409. (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
  410. }
  411. for (size_t i = 0; i < all_reduce_node_index.size(); i++) {
  412. if (all_reduce_node_index[i] == node_index) {
  413. GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
  414. (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true);
  415. continue;
  416. }
  417. }
  418. if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) {
  419. GELOGI("The end node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index);
  420. (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, true);
  421. }
  422. }
  423. return SUCCESS;
  424. }
  425. Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
  426. std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
  427. GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
  428. uint64_t session_id) {
  429. GELOGI("Start to build BuildForDynamicShape for dynamic shape.");
  430. // Update Root Graph Data size
  431. for (auto &node : comp_graph->GetDirectNode()) {
  432. auto op_desc = node->GetOpDesc();
  433. GE_CHECK_NOTNULL(op_desc);
  434. op_desc->SetStreamId(kInvalidStream);
  435. if (node->GetType() == DATA) {
  436. GE_CHK_STATUS_RET(CalcDynShapeRootGraphDataSize(op_desc), "Calc dynamic shape root graph data[%s] size failed.",
  437. op_desc->GetName().c_str());
  438. }
  439. }
  440. // Set fp bp profiling task attr for graph
  441. if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) {
  442. GELOGE(FAILED, "Set fp bp profiling task attr for graph.");
  443. return FAILED;
  444. }
  445. auto all_graphs = comp_graph->GetAllSubgraphs();
  446. if (all_graphs.empty()) {
  447. all_graphs.push_back(comp_graph);
  448. }
  449. for (auto &sub_graph : all_graphs) {
  450. // exclude functional subgraph in known subgraph
  451. if (sub_graph->GetParentGraph() != nullptr && sub_graph->GetParentGraph() != comp_graph &&
  452. !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
  453. continue;
  454. }
  455. GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");
  456. if (sub_graph->GetGraphUnknownFlag()) {
  457. // unknown shape build flow
  458. GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),
  459. "Build for unknown shape graph failed.");
  460. } else {
  461. // reset functional subgraph parent graph as known subgraph
  462. for (const auto &node : sub_graph->GetDirectNode()) {
  463. for (const auto &sub_graph_name : node->GetOpDesc()->GetSubgraphInstanceNames()) {
  464. auto sub_sub_graph = comp_graph->GetSubgraph(sub_graph_name);
  465. GE_CHK_STATUS_RET(sub_graph->AddSubgraph(sub_sub_graph), "Failed add subgraph to known graph.");
  466. }
  467. }
  468. // known shape build flow
  469. GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id),
  470. "Build for known shape graph failed.");
  471. }
  472. ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr);
  473. }
  474. return SUCCESS;
  475. }
  476. Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr &model_ptr,
  477. ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map,
  478. uint64_t session_id) {
  479. GE_CHECK_NOTNULL(model_ptr);
  480. GE_CHECK_NOTNULL(comp_graph);
  481. int64_t memory_size = 0;
  482. if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_MEMORY_SIZE, memory_size)) {
  483. GELOGE(INTERNAL_ERROR, "Get memory size fail.");
  484. return INTERNAL_ERROR;
  485. }
  486. int64_t p2p_memory_size = 0;
  487. if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_memory_size)) {
  488. GELOGE(INTERNAL_ERROR, "Get p2p memory size fail.");
  489. return INTERNAL_ERROR;
  490. }
  491. int64_t weight_size = 0;
  492. if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_WEIGHT_SIZE, weight_size)) {
  493. GELOGE(INTERNAL_ERROR, "Get weight memory size fail.");
  494. return INTERNAL_ERROR;
  495. }
  496. auto var_manager = VarManager::Instance(session_id);
  497. // since var_mem_logic_base_ = graph_mem_max_size_ + kGraphMemoryBuffer in graph_var_manager.cc,
  498. // get_mem_base should not bigger than kGraphMemoryBuffer
  499. auto *get_mem_base = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(kGraphMemoryBuffer>>1));
  500. uint8_t *get_weight_mem_base = get_mem_base;
  501. if (weight_size > 0) {
  502. get_weight_mem_base = get_mem_base + memory_size + p2p_memory_size;
  503. }
  504. std::map<int64_t, uint8_t *> mem_type_to_data_mem_base;
  505. mem_type_to_data_mem_base[RT_MEMORY_HBM] = get_mem_base;
  506. if (p2p_memory_size == 0) {
  507. mem_type_to_data_mem_base[RT_MEMORY_P2P_DDR] = nullptr;
  508. } else {
  509. mem_type_to_data_mem_base[RT_MEMORY_P2P_DDR] = get_mem_base + memory_size;
  510. }
  511. std::map<int64_t, uint64_t> mem_type_to_data_mem_size;
  512. mem_type_to_data_mem_size[RT_MEMORY_HBM] = memory_size;
  513. mem_type_to_data_mem_size[RT_MEMORY_P2P_DDR] = p2p_memory_size;
  514. RunContextUtil run_context;
  515. Status ret = run_context.InitMemInfo(get_mem_base, memory_size, mem_type_to_data_mem_base, mem_type_to_data_mem_size,
  516. get_weight_mem_base, weight_size);
  517. if (ret != SUCCESS) {
  518. GELOGE(ret, "task_generator init mem info fail.");
  519. return ret;
  520. }
  521. auto weight_buffer = builder.GetWeightBuffer();
  522. ret = run_context.CreateRunContext(*model_ptr, comp_graph, weight_buffer, session_id);
  523. if (ret != SUCCESS) {
  524. GELOGE(ret, "runContext create run context fail.");
  525. return ret;
  526. }
  527. StreamGraphOptimizer stream_optimizer;
  528. ret = stream_optimizer.OptimizeStreamedSubGraph(comp_graph, subgraph_map, run_context.GetRunContext());
  529. if (ret != SUCCESS) {
  530. GELOGE(ret, "Optimize streamed subGraph fail.");
  531. return ret;
  532. }
  533. GE_DUMP(comp_graph, "AfterOptimizeStreamedSubGraph");
  534. auto *get_var_mem_base = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(var_manager->GetVarMemLogicBase()));
  535. uint64_t var_size = (var_manager->GetVarMemSize(RT_MEMORY_HBM) > 0) ? var_manager->GetVarMemMaxSize() : 0;
  536. TaskGenerator task_generator(get_var_mem_base, var_size);
  537. ret = task_generator.GetTaskInfo(*model_ptr, comp_graph, session_id, run_context.GetRunContext());
  538. return ret;
  539. }
  540. Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) {
  541. // Set the size of input_desc to 'src_node.output_desc.size'
  542. if (node_ptr->GetType() == DATA) {
  543. bool is_unknown_shape = false;
  544. GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node_ptr, is_unknown_shape),
  545. "Get data node[%s] shape status failed!", node_ptr->GetName().c_str());
  546. if (is_unknown_shape) {
  547. GELOGD("data node: %s is unknown shape, do not set input size!", node_ptr->GetName().c_str());
  548. return SUCCESS;
  549. }
  550. if (UpdateDataInputSize(node_ptr) != SUCCESS) {
  551. GELOGE(FAILED, "Update data input size failed.");
  552. return FAILED;
  553. }
  554. }
  555. for (const auto &in_data_anchor : node_ptr->GetAllInDataAnchors()) {
  556. const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  557. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  558. const auto &src_node = peer_out_anchor->GetOwnerNode();
  559. const auto &src_op = src_node->GetOpDesc();
  560. GE_IF_BOOL_EXEC(src_op == nullptr, continue);
  561. auto node_op_desc = node_ptr->GetOpDesc();
  562. GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
  563. // Set the input_desc of dst_node to 'src_node.output_desc'
  564. auto output_desc = src_op->GetOutputDescPtr(peer_out_anchor->GetIdx());
  565. int64_t size = 0;
  566. GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS, GELOGI("Get size failed!"));
  567. GELOGD("src node %s output desc, dim_size: %zu, mem_size: %ld, format: %s, type: %s.", src_node->GetName().c_str(),
  568. output_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(output_desc->GetFormat()).c_str(),
  569. TypeUtils::DataTypeToSerialString(output_desc->GetDataType()).c_str());
  570. for (size_t i = 0; i < output_desc->GetShape().GetDimNum(); ++i) {
  571. GELOGD("dims[%zu]: %ld", i, output_desc->GetShape().GetDim(i));
  572. }
  573. auto input_desc = node_op_desc->MutableInputDesc(in_data_anchor->GetIdx());
  574. GE_CHECK_NOTNULL(input_desc);
  575. (void) ge::TensorUtils::SetSize(*input_desc, size);
  576. GELOGD("%s input desc, dim_size: %zu, mem_size: %ld, format: %s, type: %s.", node_ptr->GetName().c_str(),
  577. input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(),
  578. TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str());
  579. // inherit some attr
  580. int64_t tensor_size_attr;
  581. if (AttrUtils::GetInt(output_desc, ATTR_NAME_SPECIAL_OUTPUT_SIZE, tensor_size_attr) && (tensor_size_attr > 0)) {
  582. GE_IF_BOOL_EXEC(!AttrUtils::SetInt(*input_desc, ATTR_NAME_SPECIAL_OUTPUT_SIZE, tensor_size_attr),
  583. GELOGW("Set size attr failed!"); continue);
  584. GELOGD("node[%s] [%d]th output has sepcial size[%ld], and update to node[%s] [%d]th input",
  585. src_op->GetName().c_str(), peer_out_anchor->GetIdx(), tensor_size_attr,
  586. node_op_desc->GetName().c_str(), in_data_anchor->GetIdx());
  587. }
  588. }
  589. return SUCCESS;
  590. }
  591. Status GraphBuilder::UpdateDataInputSize(const ge::NodePtr &node_ptr) {
  592. const auto &op_desc = node_ptr->GetOpDesc();
  593. if (op_desc == nullptr) {
  594. GELOGE(FAILED, "Op desc is nullptr.");
  595. return FAILED;
  596. }
  597. // data op only has one output anchor
  598. ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0);
  599. int64_t output_size = 0;
  600. if (ge::TensorUtils::GetSize(output_desc, output_size) != SUCCESS) {
  601. GELOGW("Get size failed!");
  602. }
  603. if (output_size > 0) {
  604. GELOGI("No need to update data input size.");
  605. return SUCCESS;
  606. } else {
  607. int64_t real_dim_size = 0;
  608. ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc, real_dim_size);
  609. if (graph_status != GRAPH_SUCCESS) {
  610. GELOGE(FAILED, "Get tensor size in bytes failed.");
  611. return FAILED;
  612. }
  613. // data op only has one input anchor
  614. ge::GeTensorDesc input_desc = op_desc->GetInputDesc(0);
  615. ge::TensorUtils::SetSize(input_desc, real_dim_size);
  616. if (op_desc->UpdateInputDesc(0, input_desc) != GRAPH_SUCCESS) {
  617. GELOGE(FAILED, "Update input desc size failed.");
  618. return FAILED;
  619. }
  620. }
  621. return SUCCESS;
  622. }
  623. Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) {
  624. GELOGI("Begin to calc dynamic shape graph data[%s] size.", op_desc->GetName().c_str());
  625. // data op only has one output anchor
  626. ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0);
  627. if (output_desc.MutableShape().IsUnknownShape()) {
  628. GELOGI("No need to update dynamic shape graph data output size for unknown shape data.");
  629. return SUCCESS;
  630. }
  631. int64_t output_size = 0;
  632. if (ge::TensorUtils::GetSize(output_desc, output_size) != SUCCESS) {
  633. GELOGW("Get size failed!");
  634. }
  635. if (output_size > 0) {
  636. GELOGI("No need to update dynamic shape graph data output size[%ld].", output_size);
  637. return SUCCESS;
  638. } else {
  639. int64_t real_dim_size = 0;
  640. ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc, real_dim_size);
  641. if (graph_status != GRAPH_SUCCESS) {
  642. GELOGE(FAILED, "Get tensor size in bytes failed.");
  643. return FAILED;
  644. }
  645. ge::TensorUtils::SetSize(output_desc, real_dim_size);
  646. GELOGI("Update dynamic shape graph data output size to [%ld].", real_dim_size);
  647. if (op_desc->UpdateOutputDesc(0, output_desc) != GRAPH_SUCCESS) {
  648. GELOGE(FAILED, "Update dynamic shape graph data output desc size failed.");
  649. return FAILED;
  650. }
  651. }
  652. return SUCCESS;
  653. }
  654. Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) {
  655. GE_TIMESTAMP_START(GraphPartition2);
  656. auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning);
  657. if (ret != SUCCESS) {
  658. GELOGE(ret, "Graph partition Failed");
  659. return ret;
  660. }
  661. GE_CHK_STATUS_RET(ret, "Graph partition Failed.");
  662. auto graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap();
  663. if (graph_2_subgraphlist.find(comp_graph) != graph_2_subgraphlist.end()) {
  664. subgraph_ptr_list = graph_2_subgraphlist[comp_graph];
  665. } else {
  666. GELOGE(FAILED, "Find subgraph failed.");
  667. return FAILED;
  668. }
  669. GE_TIMESTAMP_END(GraphPartition2, "GraphPartitioner::Partition2");
  670. return ret;
  671. }
  672. Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) {
  673. auto op_desc = node->GetOpDesc();
  674. GE_CHECK_NOTNULL(op_desc);
  675. uint32_t mem_type;
  676. if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) {
  677. return SUCCESS;
  678. }
  679. GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type);
  680. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  681. const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  682. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  683. bool valid_flag = false;
  684. auto src_node = peer_out_anchor->GetOwnerNode();
  685. auto src_out_anchor = peer_out_anchor;
  686. while (true) {
  687. const auto &src_desc = src_node->GetOpDesc();
  688. GE_IF_BOOL_EXEC(src_desc == nullptr, continue);
  689. GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(),
  690. mem_type);
  691. if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE,
  692. mem_type)) {
  693. GELOGE(INTERNAL_ERROR, "Set out_memory_type attr for [%s:%d] failed.", src_desc->GetName().c_str(),
  694. src_out_anchor->GetIdx());
  695. return INTERNAL_ERROR;
  696. }
  697. switch (TransferNodeType(src_node)) {
  698. case kSubgraphNode:
  699. GE_CHK_STATUS_RET(HandleSubgraphNode(src_node, src_out_anchor), "Handle subgraph node %s failed",
  700. src_node->GetName().c_str());
  701. break;
  702. case kSubgraphData:
  703. GE_CHK_STATUS_RET(HandleSubgraphDataNode(src_node, src_out_anchor), "Handle Data node %s in subgraph failed",
  704. src_node->GetName().c_str());
  705. break;
  706. case kOthers:
  707. default:
  708. valid_flag = true;
  709. break;
  710. }
  711. if (valid_flag) {
  712. break;
  713. }
  714. }
  715. }
  716. return SUCCESS;
  717. }
  718. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示