You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

atomic_addr_clean_pass.cc 19 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/passes/atomic_addr_clean_pass.h"
  17. #include <map>
  18. #include <memory>
  19. #include <string>
  20. #include <sstream>
  21. #include <vector>
  22. #include "common/ge_inner_error_codes.h"
  23. #include "common/ge/ge_util.h"
  24. #include "graph/common/ge_call_wrapper.h"
  25. #include "graph/debug/ge_attr_define.h"
  26. #include "graph/utils/node_utils.h"
  27. #include "init/gelib.h"
  28. namespace ge {
  29. Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) {
  30. GE_CHECK_NOTNULL(graph);
  31. GELOGD("AtomicAddrCleanPass begin.");
  32. // 1.Recoginze atomic and loop mark
  33. vector<NodePtr> atomic_node_vec;
  34. for (NodePtr &node : graph->GetDirectNode()) {
  35. if (IsAtomicOp(node)) {
  36. atomic_node_vec.push_back(node);
  37. }
  38. if (!is_loop_graph_ && node->GetType() == LOOPCOND) {
  39. // there is loop in this graph
  40. GELOGD("There is no loop node. It will insert clean node follow atomic node.");
  41. is_loop_graph_ = true;
  42. }
  43. }
  44. if (atomic_node_vec.empty()) {
  45. GELOGD("There is no atomic node. Ignore atomicAddrClean pass.");
  46. return SUCCESS;
  47. }
  48. bool is_unknown_graph = graph->GetGraphUnknownFlag();
  49. if (is_unknown_graph) {
  50. GELOGD("Graph[%s] is unknown graph. It will call fe interface to compile op.", graph->GetName().c_str());
  51. GE_CHK_STATUS_RET(CompileUnknownGraphOp(atomic_node_vec));
  52. return SUCCESS;
  53. }
  54. // 2.Insert clean node and link to atomic node
  55. Status ret;
  56. if (is_loop_graph_) {
  57. ret = HandleLoopGraph(graph, atomic_node_vec);
  58. if (ret != SUCCESS) {
  59. return ret;
  60. }
  61. } else {
  62. ret = HandleNormalGraph(graph, atomic_node_vec);
  63. if (ret != SUCCESS) {
  64. return ret;
  65. }
  66. }
  67. GELOGD("AtomicAddrCleanPass end.");
  68. return SUCCESS;
  69. }
  70. // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input
  71. bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) {
  72. // 1.Check if isAtomic attrs exist for HCOM
  73. std::shared_ptr<GELib> instance_ptr = GELib::GetInstance();
  74. if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
  75. GELOGW("GELib not initialized, atomic from ops kernel judge false, node_name: %s", node->GetName().c_str());
  76. return false;
  77. }
  78. OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj();
  79. vector<OpInfo> op_info_vec = ops_kernel_manager.GetOpsKernelInfo(node->GetType());
  80. for (const auto &op_info : op_info_vec) {
  81. if (op_info.isAtomic) {
  82. // check peer input is DATA
  83. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  84. if (in_data_anchor->GetPeerOutAnchor() != nullptr &&
  85. in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) {
  86. auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode();
  87. if (peer_in_node->GetType() == DATA) {
  88. GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(),
  89. op_info.engine.c_str());
  90. return false;
  91. }
  92. }
  93. }
  94. GELOGI("Recognized atomic op %s from %s engine.", node->GetName().c_str(), op_info.engine.c_str());
  95. hcom_node_vec_.push_back(node);
  96. return true;
  97. }
  98. }
  99. return false;
  100. }
  101. bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int64_t output_index) {
  102. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  103. if (out_data_anchor == nullptr) {
  104. return false;
  105. }
  106. for (auto input_anchor : out_data_anchor->GetPeerInDataAnchors()) {
  107. auto output_node = input_anchor->GetOwnerNode();
  108. // just hccl may mark atomic from ops kernel now, and hccl's atomic if for all input
  109. // hccl's attr ATOMIC_ATTR_INPUT_INDEX mark on CalcOpRunningParam, can't be get here
  110. if (CheckAtomicFromOpsKernel(output_node)) {
  111. return true;
  112. }
  113. }
  114. return false;
  115. }
  116. bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) {
  117. OpDescPtr op_desc = node->GetOpDesc();
  118. std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size;
  119. bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
  120. bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX);
  121. atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size);
  122. if (!has_atomic_input && has_atomic_output && atomic_workspace_index_size.empty()) {
  123. std::vector<int64_t> atomic_output_index;
  124. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  125. bool is_all_output_peer_also_atomic = true;
  126. for (const auto &output_index : atomic_output_index) {
  127. if (!IsOutputIndexPeerInputAtomic(node, output_index)) {
  128. is_all_output_peer_also_atomic = false;
  129. break;
  130. }
  131. }
  132. if (is_all_output_peer_also_atomic) {
  133. GELOGI("all out peer node input atomic, skip this out atomic process, node name: %s", node->GetName().c_str());
  134. return true;
  135. }
  136. }
  137. return false;
  138. }
  139. Status AtomicAddrCleanPass::HandleLoopGraph(ComputeGraphPtr &graph, const vector<NodePtr> &atomic_node_vec) {
  140. // Loop graph , insert clean node follow atomic node
  141. int index = 0;
  142. for (const auto &node : atomic_node_vec) {
  143. if (CheckSkipInsertInLoopGraph(node)) {
  144. continue;
  145. }
  146. // Insert atomic clean op
  147. NodePtr clean_addr_node = InsertAtomicAddrCleanNode(graph);
  148. if (clean_addr_node == nullptr) {
  149. GELOGE(FAILED, "Insert AtomicAddrClean node failed. Ignore atomicAddrClean pass.");
  150. return FAILED;
  151. }
  152. GE_CHECK_NOTNULL(clean_addr_node->GetOpDesc());
  153. string node_name = clean_addr_node->GetOpDesc()->GetName();
  154. std::ostringstream oss;
  155. oss << node_name << index;
  156. node_name = oss.str();
  157. clean_addr_node->GetOpDesc()->SetName(node_name); // [Cascade Pointer]
  158. GELOGD("Inserted atomic clean node name is %s", node_name.c_str());
  159. auto ret = LinkToAtomicNode(node, clean_addr_node);
  160. if (ret != SUCCESS) {
  161. GELOGE(ret, "Link control anchor failed from atomic node to atomic_addr_clean node.");
  162. return ret;
  163. }
  164. index++;
  165. }
  166. return SUCCESS;
  167. }
  168. Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vector<NodePtr> &atomic_node_vec) {
  169. GELOGD("Not loop graph and unknown graph. It will insert atomic clean nodes.");
  170. vector<NodePtr> common_atomic_nodes;
  171. vector<NodePtr> dispersed_atomic_nodes;
  172. auto ret = HandleDispersedAtomicNodes(graph, atomic_node_vec, common_atomic_nodes, dispersed_atomic_nodes);
  173. if (ret != SUCCESS) {
  174. GELOGE(ret, "Handle dispersed atomic nodes failed, graph name is %s.", graph->GetName().c_str());
  175. return ret;
  176. }
  177. if (common_atomic_nodes.empty()) {
  178. GELOGI("common_atomic_nodes is empty");
  179. return SUCCESS;
  180. }
  181. // not loop graph , insert only one clean node in graph
  182. NodePtr clean_addr_node = InsertAtomicAddrCleanNode(graph);
  183. if (clean_addr_node == nullptr) {
  184. GELOGE(FAILED, "Insert AtomicAddrClean node failed. Ignore atomicAddrClean pass.");
  185. return FAILED;
  186. }
  187. for (const auto &node : common_atomic_nodes) {
  188. ret = LinkToAtomicNode(node, clean_addr_node);
  189. if (ret != SUCCESS) {
  190. GELOGE(ret, "Link control anchor failed from atomic node to atomic_addr_clean node.");
  191. return ret;
  192. }
  193. }
  194. // for HCOM atomic node, add one more control link to peer-in node
  195. for (auto &node : hcom_node_vec_) {
  196. for (auto &in_anchor : node->GetAllInDataAnchors()) {
  197. GE_CHECK_NOTNULL(in_anchor->GetPeerOutAnchor());
  198. NodePtr peer_in_node = in_anchor->GetPeerOutAnchor()->GetOwnerNode();
  199. ret = LinkToAtomicNode(peer_in_node, clean_addr_node);
  200. if (ret != SUCCESS) {
  201. GELOGE(ret, "Link failed, %s : %s", peer_in_node->GetName().c_str(), clean_addr_node->GetName().c_str());
  202. return ret;
  203. }
  204. }
  205. }
  206. return LinkToPotentialPrecedenceNode(graph, clean_addr_node, dispersed_atomic_nodes);
  207. }
  208. // Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean
  209. // node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream
  210. // concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control
  211. // edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on
  212. // each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the
  213. // successors of Data/Variable.
  214. Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node,
  215. const vector<NodePtr> &dispersed_atomic_nodes) {
  216. GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.",
  217. atomic_clean_node->GetName().c_str());
  218. auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
  219. GE_CHECK_NOTNULL(out_ctrl_anchor);
  220. for (const auto &node : graph->GetDirectNode()) {
  221. GE_CHECK_NOTNULL(node);
  222. bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty();
  223. if (!need_handle) {
  224. continue;
  225. }
  226. auto second_nodes = node->GetOutAllNodes();
  227. for (const auto &second_node : second_nodes) {
  228. GE_CHECK_NOTNULL(second_node);
  229. if ((std::find(dispersed_atomic_nodes.begin(), dispersed_atomic_nodes.end(), second_node) !=
  230. dispersed_atomic_nodes.end()) || (second_node->GetType() == NETOUTPUT)) {
  231. continue;
  232. }
  233. auto in_ctrl_anchor = second_node->GetInControlAnchor();
  234. GE_CHECK_NOTNULL(in_ctrl_anchor);
  235. if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) {
  236. GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor));
  237. GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str());
  238. }
  239. }
  240. }
  241. return SUCCESS;
  242. }
  243. Status AtomicAddrCleanPass::HandleDispersedAtomicNodes(ComputeGraphPtr &graph,
  244. const std::vector<NodePtr> &atomic_node_vec,
  245. std::vector<NodePtr> &common_atomic_nodes,
  246. vector<NodePtr> &dispersed_atomic_nodes) {
  247. int index = 0;
  248. for (const auto &node : atomic_node_vec) {
  249. vector<int> node_anchors_connect_netoutput;
  250. // If GetBool fail, attr is_connect_netoutput is an empty vector.
  251. (void)ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_NODE_CONNECT_OUTPUT, node_anchors_connect_netoutput);
  252. if (!node_anchors_connect_netoutput.empty()) {
  253. NodePtr dispersed_clean_addr_node = InsertAtomicAddrCleanNode(graph);
  254. if (dispersed_clean_addr_node == nullptr) {
  255. GELOGE(FAILED, "Insert AtomicAddrClean node failed. Ignore atomicAddrClean pass.");
  256. return FAILED;
  257. }
  258. auto dispersed_node_op_desc = dispersed_clean_addr_node->GetOpDesc();
  259. GE_CHECK_NOTNULL(dispersed_node_op_desc);
  260. string node_name = dispersed_node_op_desc->GetName();
  261. std::ostringstream oss;
  262. oss << node_name << "_" << index;
  263. node_name = oss.str();
  264. dispersed_node_op_desc->SetName(node_name);
  265. GELOGD("Inserted dispersed atomic clean node [%s] before [%s]", node_name.c_str(), node->GetName().c_str());
  266. ++index;
  267. Status ret = LinkToAtomicNode(node, dispersed_clean_addr_node);
  268. if (ret != SUCCESS) {
  269. GELOGE(ret, "Link control anchor failed from atomic node: %s to atomic_addr_clean node: %s.",
  270. node->GetName().c_str(), dispersed_clean_addr_node->GetName().c_str());
  271. return ret;
  272. }
  273. dispersed_atomic_nodes.emplace_back(node);
  274. } else {
  275. common_atomic_nodes.emplace_back(node);
  276. }
  277. }
  278. return SUCCESS;
  279. }
  280. NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) {
  281. OpDescPtr op_desc = MakeShared<OpDesc>(NODE_NAME_ATOMIC_ADDR_CLEAN, ATOMICADDRCLEAN);
  282. if (op_desc == nullptr) {
  283. REPORT_CALL_ERROR("E19999", "New OpDesc failed");
  284. GELOGE(INTERNAL_ERROR, "Make shared atomic addr clean op failed.");
  285. return nullptr;
  286. }
  287. string session_graph_id;
  288. if (!AttrUtils::GetStr(*graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id)) {
  289. GELOGW("Get graph session_graph_id attr failed.");
  290. }
  291. if (!session_graph_id.empty()) {
  292. (void) AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
  293. }
  294. string node_name = op_desc->GetName();
  295. // Only flush subgraph name
  296. if (graph->GetParentGraph() != nullptr) {
  297. node_name = graph->GetName() + "_" + node_name;
  298. }
  299. string name = node_name + session_graph_id;
  300. op_desc->SetName(name);
  301. GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str());
  302. // To avoid same name between graphs, set session graph id to this node
  303. NodePtr clean_addr_node = graph->AddNodeFront(op_desc);
  304. return clean_addr_node;
  305. }
  306. Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node) {
  307. GE_IF_BOOL_EXEC(atomic_node == nullptr || atomic_clean_node == nullptr,
  308. REPORT_INNER_ERROR("E19999", "Param atomic_node or atomic_clean_node is nullptr, "
  309. "check invalid");
  310. DOMI_LOGE("param [atomic_node][atomic_clean_node] must not be null.");
  311. return PARAM_INVALID);
  312. InControlAnchorPtr in_ctrl_anchor = atomic_node->GetInControlAnchor();
  313. OutControlAnchorPtr out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
  314. if (in_ctrl_anchor == nullptr || out_ctrl_anchor == nullptr) {
  315. REPORT_INNER_ERROR("E19999", "in_ctrl_anchor of op:%s(%s) or out_ctrl_anchor of op:%s(%s) is nullptr, "
  316. "check invalid",
  317. atomic_node->GetName().c_str(), atomic_node->GetType().c_str(),
  318. atomic_clean_node->GetName().c_str(), atomic_clean_node->GetType().c_str());
  319. GELOGE(INTERNAL_ERROR,
  320. "Get control anchor faild, dst node: %s.",
  321. atomic_node->GetName().c_str());
  322. return INTERNAL_ERROR;
  323. }
  324. graphStatus status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor);
  325. if (status != GRAPH_SUCCESS) {
  326. REPORT_CALL_ERROR("E19999", "Add control edge between op:%s(%s) and op:%s(%s) failed",
  327. out_ctrl_anchor->GetOwnerNode()->GetName().c_str(),
  328. out_ctrl_anchor->GetOwnerNode()->GetType().c_str(),
  329. in_ctrl_anchor->GetOwnerNode()->GetName().c_str(),
  330. in_ctrl_anchor->GetOwnerNode()->GetType().c_str());
  331. GELOGE(INTERNAL_ERROR,
  332. "Graph add cleanAddrNode op out ctrl edge fail, dst node: %s.",
  333. atomic_node->GetName().c_str());
  334. return INTERNAL_ERROR;
  335. }
  336. GELOGD("Graph add cleanAddrNode op out ctrl edge, dst node: %s.", atomic_node->GetName().c_str());
  337. std::string stream_label;
  338. if (is_loop_graph_ && AttrUtils::GetStr(atomic_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
  339. if (!AttrUtils::SetStr(atomic_clean_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
  340. GELOGW("LinkToAtomicNode: SetStr failed");
  341. return INTERNAL_ERROR;
  342. }
  343. }
  344. return SUCCESS;
  345. }
  346. bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) {
  347. GE_IF_BOOL_EXEC(node == nullptr, GELOGE(FAILED, "node is null."); return false);
  348. OpDescPtr op_desc = node->GetOpDesc();
  349. if (op_desc == nullptr) {
  350. return false;
  351. }
  352. // 1.Check if isAtomic attrs exist for HCOM
  353. if (CheckAtomicFromOpsKernel(node)) {
  354. return true;
  355. }
  356. // 2.Check atomic attr in node
  357. std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size;
  358. bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
  359. bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX);
  360. atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size);
  361. if (!has_atomic_input && !has_atomic_output && atomic_workspace_index_size.empty()) {
  362. return false;
  363. }
  364. graphStatus ret = op_desc->SetAttr(ATOMIC_ATTR_IS_ATOMIC_NODE, GeAttrValue::CreateFrom<GeAttrValue::BOOL>(true));
  365. if (ret != GRAPH_SUCCESS) {
  366. GELOGW("set attr ATOMIC_ATTR_IS_ATOMIC_NODE fail.");
  367. }
  368. GELOGD("Recognized atomic op %s from FE engine.", op_desc->GetName().c_str());
  369. return true;
  370. }
  371. ///
  372. /// @brief Clear Status, used for subgraph pass
  373. /// @return SUCCESS
  374. ///
  375. Status AtomicAddrCleanPass::ClearStatus() {
  376. hcom_node_vec_.clear();
  377. return SUCCESS;
  378. }
  379. Status AtomicAddrCleanPass::CompileUnknownGraphOp(const vector<NodePtr> &atomic_node_vec) {
  380. GE_TIMESTAMP_CALLNUM_START(UnknownGraphCompileOp);
  381. std::unordered_map<string, vector<ge::NodePtr>> node_vector_map;
  382. std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
  383. if ((instance == nullptr) || !instance->InitFlag()) {
  384. REPORT_INNER_ERROR("E19999", "GeLib is not init before, check invalid");
  385. GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "CompileSingleOp failed.");
  386. return ge::GE_CLI_GE_NOT_INITIALIZED;
  387. }
  388. for (auto &atomic_node: atomic_node_vec) {
  389. auto op_desc = atomic_node->GetOpDesc();
  390. if (op_desc == nullptr) {
  391. GELOGW("op desc is nullptr.");
  392. continue;
  393. }
  394. string kernel_lib_name = op_desc->GetOpKernelLibName();
  395. if (kernel_lib_name.empty()) {
  396. REPORT_INNER_ERROR("E19999", "Find ops kernel by name:%s failed",
  397. kernel_lib_name.c_str());
  398. GELOGE(ge::INTERNAL_ERROR, "Get atomic node:%s(%s) kernel lib failed.", atomic_node->GetName().c_str(),
  399. atomic_node->GetType().c_str());
  400. return ge::INTERNAL_ERROR;
  401. }
  402. OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name);
  403. GE_CHECK_NOTNULL(kernel_info);
  404. node_vector_map[kernel_lib_name].emplace_back(atomic_node);
  405. }
  406. for (auto &it : node_vector_map) {
  407. auto &kernel_lib_name = it.first;
  408. auto &node_vector = it.second;
  409. OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name);
  410. GE_CHECK_NOTNULL(kernel_info);
  411. GE_TIMESTAMP_RESTART(UnknownGraphCompileOp);
  412. auto ret = kernel_info->CompileOp(node_vector);
  413. GELOGI("The atomic node size of compile op of %s is %zu", kernel_lib_name.c_str(), node_vector.size());
  414. GE_TIMESTAMP_ADD(UnknownGraphCompileOp);
  415. if (ret != ge::SUCCESS) {
  416. REPORT_CALL_ERROR("E19999", "Call CompileOp failed, kernel_lib_name:%s, ret:%d",
  417. kernel_lib_name.c_str(), ret);
  418. GELOGE(ret, "Compile atomic op failed, kernel lib name is %s", kernel_lib_name.c_str());
  419. return ret;
  420. }
  421. }
  422. GE_TIMESTAMP_CALLNUM_END(UnknownGraphCompileOp, "AtomicAddrCleanPass::CompileUnknownGraphOp");
  423. return SUCCESS;
  424. }
  425. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示