You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tuning_utils.h 5.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. #ifndef MAIN_TUNING_UTILS_H
  2. #define MAIN_TUNING_UTILS_H
  3. #include <fcntl.h>
  4. #include <sys/stat.h>
  5. #include <sys/types.h>
  6. #include <algorithm>
  7. #include <cstring>
  8. #include <fstream>
  9. #include <iomanip>
  10. #include <queue>
  11. #include <mutex>
  12. #include <graph/anchor.h>
  13. #include <graph/detail/attributes_holder.h>
  14. #include <graph/ge_tensor.h>
  15. #include <graph/graph.h>
  16. #include <graph/model.h>
  17. #include <graph/node.h>
  18. #include <graph/utils/graph_utils.h>
  19. #include <graph/utils/type_utils.h>
  20. #include "framework/common/debug/ge_log.h"
  21. #include "utils/attr_utils.h"
  22. #include "utils/node_utils.h"
  23. #include "external/ge/ge_api_types.h"
  24. #include "graph/debug/ge_attr_define.h"
  25. #include "graph/utils/op_desc_utils.h"
  26. #include "graph/utils/tensor_utils.h"
  27. namespace ge {
  28. // Configure build mode, default value is "normal"
  29. const char *const BUILD_MODE = "ge.buildMode";
  30. const char *const BUILD_STEP = "ge.buildStep";
  31. // Configure tuning path
  32. const char *const TUNING_PATH = "ge.tuningPath";
  33. // for interface: aclgrphBuildModel
  34. const std::set<std::string> ir_builder_supported_options_for_lx_fusion = {
  35. BUILD_MODE,
  36. BUILD_STEP,
  37. TUNING_PATH
  38. };
  39. // Build model
  40. const char *const BUILD_MODE_NORMAL = "normal";
  41. const char *const BUILD_MODE_TUNING = "tuning";
  42. const char *const BUILD_MODE_BASELINE = "baseline";
  43. const std::set<std::string> build_mode_options = {
  44. BUILD_MODE_NORMAL,
  45. BUILD_MODE_TUNING,
  46. BUILD_MODE_BASELINE
  47. };
  48. // Build step
  49. const char *const BUILD_STEP_BEFORE_UB_MATCH = "before_ub_match";
  50. const char *const BUILD_STEP_AFTER_UB_MATCH = "after_ub_match";
  51. const char *const BUILD_STEP_AFTER_BUILDER = "after_builder";
  52. const char *const BUILD_STEP_AFTER_BUILDER_SUB = "after_builder_sub";
  53. const char *const BUILD_STEP_AFTER_MERGE = "after_merge";
  54. const std::set<std::string> build_step_options = {
  55. BUILD_STEP_BEFORE_UB_MATCH,
  56. BUILD_STEP_AFTER_UB_MATCH,
  57. BUILD_STEP_AFTER_BUILDER,
  58. BUILD_STEP_AFTER_BUILDER_SUB,
  59. BUILD_STEP_AFTER_MERGE
  60. };
  61. using SubgraphCreateOutNode = std::unordered_map<ComputeGraphPtr, NodePtr>;
  62. using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>;
  63. using NodeVec = std::vector<NodePtr>;
  64. using NodeNametoNodeNameMap = std::unordered_map<std::string, std::string>;
  65. using NodetoNodeNameMap = std::unordered_map<NodePtr, std::string>;
  66. class TuningUtils {
  67. public:
  68. TuningUtils() = default;
  69. ~TuningUtils() = default;
  70. // Dump all the subgraphs and modify
  71. // the subgraphs in them to be executable subgraphs if exe_flag is true
  72. // `tuning_path` means path to save the graphs
  73. static graphStatus ConvertGraphToFile(std::vector<ComputeGraphPtr> tuning_subgraphs,
  74. std::vector<ComputeGraphPtr> non_tuning_subgraphs = {},
  75. bool exe_flag = false,
  76. const std::string &path = "",
  77. const std::string &user_path = "");
  78. // Recovery `graph` from graph dump files configured in options
  79. static graphStatus ConvertFileToGraph(const map<int64_t, string> &options, ge::Graph &graph);
  80. private:
  81. // part 1
  82. struct HelpInfo {
  83. int64_t index;
  84. bool exe_flag;
  85. bool is_tuning_graph;
  86. const std::string &path;
  87. const std::string &user_path;
  88. };
  89. static graphStatus MakeExeGraph(ComputeGraphPtr &exe_graph,
  90. const HelpInfo& help_info);
  91. static graphStatus HandlePld(NodePtr &node);
  92. static graphStatus HandleEnd(NodePtr &node);
  93. static graphStatus ChangePld2Data(NodePtr &node, NodePtr &data_node);
  94. static graphStatus ChangeEnd2NetOutput(NodePtr &node, NodePtr &out_node);
  95. static graphStatus LinkEnd2NetOutput(NodePtr &node, NodePtr &out_node);
  96. static graphStatus CreateDataNode(NodePtr &node, NodePtr &data_node);
  97. static graphStatus CreateNetOutput(NodePtr &node, NodePtr &out_node);
  98. static graphStatus AddAttrToDataNodeForMergeGraph(const NodePtr &pld, NodePtr &data_node);
  99. static graphStatus AddAttrToNetOutputForMergeGraph(const NodePtr &end, NodePtr &out_node);
  100. static void DumpGraphToPath(ComputeGraphPtr &exe_graph, int64_t index,
  101. bool is_tuning_graph, std::string path);
  102. static SubgraphCreateOutNode create_output_;
  103. // part 2
  104. static graphStatus MergeAllSubGraph(std::vector<ComputeGraphPtr> &graphs,
  105. ComputeGraphPtr &graph);
  106. static graphStatus MergeSubGraph(ComputeGraphPtr &graph);
  107. // Deletes new data and output nodes added by call `MakeExeGraph()` func in part 1
  108. static graphStatus RemoveDataNetoutputEdge(ComputeGraphPtr &graph);
  109. static graphStatus GetInAndOutAnchorPair(NodePtr &data_node,
  110. NodePtr &out_node,
  111. AnchorPtr &dest_in_anchor,
  112. AnchorPtr &src_out_anchor);
  113. static graphStatus HandleContinuousInputNodeNextData(NodePtr &node);
  114. static NodeNametoNodeNameMap data_2_netoutput_;
  115. static NodetoNodeNameMap data_node_2_netoutput_;
  116. static NodetoNodeMap data_node_2_netoutput_node_;
  117. static NodeVec netoutput_nodes_;
  118. static NodeVec merged_graph_nodes_;
  119. static std::mutex mutex_;
  120. // for debug
  121. static std::string PrintCheckLog();
  122. static std::string GetNodeNameByAnchor(const Anchor *anchor);
  123. };
  124. }
  125. #endif //MAIN_TUNING_UTILS_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示