You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

block_mem_assigner.h 13 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_BUILD_MEMORY_BLOCK_MEM_ASSIGNER_H_
  17. #define GE_GRAPH_BUILD_MEMORY_BLOCK_MEM_ASSIGNER_H_
  18. #include <map>
  19. #include <string>
  20. #include <unordered_map>
  21. #include <unordered_set>
  22. #include <utility>
  23. #include <vector>
  24. #include <list>
  25. #include "common/ge_inner_error_codes.h"
  26. #include "common/types.h"
  27. #include "common/util.h"
  28. #include "graph/build/memory/mem_assigner.h"
  29. #include "graph/compute_graph.h"
  30. #include "graph/utils/graph_utils.h"
  31. namespace ge {
  32. const size_t kMaxLifeTime = 0xffffffff;
  33. using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>;
  34. enum MemoryType { kOutput, kWorkspace };
  35. struct NodeTypeIndex {
  36. NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index, bool ref_input = false)
  37. : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {}
  38. ge::NodePtr node = nullptr;
  39. MemoryType mem_type = kOutput;
  40. uint32_t index = 0;
  41. size_t life_time_end = kMaxLifeTime;
  42. bool ref_input = false;
  43. const string GetMemType() const {
  44. if (mem_type == kOutput) {
  45. return "output";
  46. } else if (mem_type == kWorkspace) {
  47. return "workspace";
  48. }
  49. return "unknown";
  50. }
  51. };
  52. class MemoryBlock {
  53. public:
  54. explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true)
  55. : ref_count_(0),
  56. stream_id_(stream_id),
  57. deleted_block_(false),
  58. reuse_mem_(reuse_mem),
  59. input_index_(0),
  60. continuous_block_(false),
  61. first_continuous_block_(false),
  62. last_continuous_block_(false),
  63. is_zero_copy_(false),
  64. block_size_(block_size),
  65. head_offset_(0),
  66. tail_offset_(0),
  67. child_offset_(0) {}
  68. MemoryBlock(const MemoryBlock &) = delete;
  69. MemoryBlock &operator=(const MemoryBlock &) = delete;
  70. ~MemoryBlock() {
  71. node_type_index_list_.clear();
  72. symbol_list_.clear();
  73. }
  74. void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) {
  75. real_size_list_.emplace_back(real_size);
  76. no_align_size_list_.emplace_back(no_align_size);
  77. node_type_index_list_.emplace_back(node, type, out_index, false);
  78. }
  79. size_t Size() const { return block_size_; }
  80. size_t AlignSize() const;
  81. void SetHeadOffset(size_t offset);
  82. void SetTailOffset(size_t offset);
  83. size_t HeadOffset() const { return head_offset_; }
  84. size_t TailOffset() const { return tail_offset_; }
  85. void AddNodeTypeIndex(const NodeTypeIndex &node_type_index, size_t real_size, size_t no_align_size) {
  86. node_type_index_list_.emplace_back(node_type_index);
  87. real_size_list_.emplace_back(real_size);
  88. no_align_size_list_.emplace_back(no_align_size);
  89. }
  90. void AddSymbol(const std::string &symbol) {
  91. symbol_list_.emplace_back(symbol);
  92. }
  93. const std::vector<NodeTypeIndex> &NodeTypeIndexList() const { return node_type_index_list_; }
  94. const std::vector<std::string> &SymbolList() const { return symbol_list_; }
  95. const std::vector<size_t> &RealSizeList() const { return real_size_list_; }
  96. const std::vector<MemoryBlock *> &ChildBlockList() const { return child_blocks_; }
  97. const std::vector<size_t> &NoAlignSizeList() const { return no_align_size_list_; }
  98. void Resize();
  99. std::string String();
  100. bool IsSameLabel(std::string &first_batch_label);
  101. void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life);
  102. void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life);
  103. void SetLifeTimeEnd(size_t time);
  104. size_t GetLifeBegin();
  105. size_t GetLifeEnd();
  106. void AddDependLifeBegin(DependStreamLife &node_depend_stream_life);
  107. size_t GetDependLifeBegin(int64_t stream_id, DependStreamLife &node_depend_stream_life);
  108. int ref_count_;
  109. int64_t stream_id_;
  110. bool deleted_block_;
  111. bool reuse_mem_;
  112. uint32_t input_index_;
  113. bool continuous_block_;
  114. bool first_continuous_block_;
  115. bool last_continuous_block_;
  116. bool is_zero_copy_;
  117. std::map<int64_t, size_t> depend_stream_life_;
  118. private:
  119. size_t block_size_;
  120. std::vector<size_t> real_size_list_;
  121. std::vector<size_t> no_align_size_list_;
  122. size_t head_offset_;
  123. size_t tail_offset_;
  124. size_t child_offset_;
  125. std::vector<NodeTypeIndex> node_type_index_list_;
  126. std::vector<std::string> symbol_list_;
  127. std::vector<MemoryBlock *> child_blocks_;
  128. };
  129. class BlockMemAssigner : public MemAssigner {
  130. public:
  131. BlockMemAssigner(ComputeGraphPtr compute_graph, const std::map<std::string, std::string> &anchor_to_symbol,
  132. const std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors);
  133. BlockMemAssigner(const BlockMemAssigner &) = delete;
  134. BlockMemAssigner &operator=(const BlockMemAssigner &) = delete;
  135. ~BlockMemAssigner() override;
  136. Status Assign() override;
  137. size_t GetMemOffset() const { return mem_offset_; }
  138. int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; }
  139. std::vector<MemoryBlock *> GetMemoryBlocks() const { return memory_blocks_; }
  140. ///
  141. /// @ingroup domi
  142. /// @brief memory size fixed for reuse. get memory range
  143. /// @param [out] ranges return memory range
  144. /// @return Status result
  145. ///
  146. virtual Status GetMemoryRanges(std::vector<int64_t> &ranges) = 0;
  147. ///
  148. /// @ingroup domi
  149. /// @brief traverse all nodes' outputs and needed workspace mem, apply memory, consider reuse memory
  150. /// @param [in] ranges memory range provided
  151. /// @author
  152. ///
  153. void AssignMemoryWithReuse(std::vector<int64_t> &ranges);
  154. void SetOpMemOffset(bool is_zero_copy);
  155. protected:
  156. ///
  157. /// @ingroup domi
  158. /// @brief traverse all memory size, resize, and calculate offset
  159. /// @param [in&out] memory_blocks memory size, resize and calculate memory address after offset
  160. ///
  161. void ResizeMemoryBlocks();
  162. void GetOutAndWorkSpaceMem(std::vector<int64_t> &all_memory_size);
  163. void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory);
  164. ///
  165. /// @ingroup GE
  166. /// @brief Determine whether it is the type of zero memory node.
  167. /// @param [in] node type.
  168. /// @return bool true: is zero memory node; false: is not zero memory node
  169. /// @author
  170. ///
  171. bool CheckIsZeroMemNodeType(const std::string &node_type) const;
  172. ///
  173. /// @ingroup GE
  174. /// @brief Check pre_reuse flag & post_reuse glag for each symbol
  175. /// @return void
  176. ///
  177. void InitReuseFlag();
  178. ///
  179. /// @ingroup GE
  180. /// @brief get pre_reuse flag
  181. /// @param [in] node
  182. /// @param [in] out_index
  183. /// @return bool
  184. ///
  185. bool IsPreReuse(const NodePtr &node, uint32_t out_index) const;
  186. ///
  187. /// @ingroup GE
  188. /// @brief get post_reuse flag
  189. /// @param [in] mem_block
  190. /// @return bool
  191. ///
  192. bool IsPostReuse(const MemoryBlock *mem_block) const;
  193. ///
  194. /// @ingroup GE
  195. /// @brief check if symbol of cur node_index_io has block
  196. /// @param [in] node_index_io
  197. /// @param [out] symbol
  198. /// @return bool
  199. ///
  200. bool IsSymbolExist(const NodeIndexIO &node_index_io, std::string &symbol);
  201. ///
  202. /// @ingroup GE
  203. /// @brief Print symbol
  204. /// @return void
  205. ///
  206. void PrintSymbolMap();
  207. size_t mem_offset_;
  208. ge::ComputeGraphPtr compute_graph_;
  209. std::vector<MemoryBlock *> memory_blocks_;
  210. std::vector<MemoryBlock *> blocks_store_;
  211. std::vector<NodeTypeIndex> zero_memory_list_;
  212. // ref mapping
  213. const std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors_;
  214. const std::map<std::string, std::string> &anchor_to_symbol_;
  215. std::map<std::string, bool> pre_reuse_flag_;
  216. std::map<std::string, bool> post_reuse_flag_;
  217. std::map<std::string, size_t> symbol_size_;
  218. private:
  219. ///
  220. /// @ingroup GE
  221. /// @brief Traversing the compute_graph_ to apply for output memory while considering reuse
  222. /// @param [in] n node in compute_graph_
  223. /// @param [in] index output node index
  224. /// @param [in] ranges available memory specifications
  225. /// @return MemoryBlock*
  226. /// @author
  227. ///
  228. MemoryBlock *ApplyOutMemory(const ge::NodePtr &n, uint32_t index, const std::vector<int64_t> &ranges,
  229. const bool is_op_reuse_mem, const bool continuous);
  230. Status AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges);
  231. ///
  232. /// @ingroup GE
  233. /// @brief Traversing the compute_graph_ to apply for memory while considering reuse
  234. /// @param [in] block_size applied memory block size
  235. /// @param [in] real_size actual memory size required
  236. /// @param [in] type output or workspace
  237. /// @param [in] n node in compute_graph_
  238. /// @param [in] out_index output node index
  239. /// @param [in] workspace_reuse_flag reuse flag for workspace
  240. /// @return MemoryBlock*
  241. /// @author
  242. ///
  243. MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, MemoryType mem_type,
  244. const ge::NodePtr &n, uint32_t out_index, const std::vector<bool> &workspace_reuse_flag,
  245. const bool is_op_reuse_mem, const bool continuous);
  246. ///
  247. /// @ingroup GE
  248. /// @brief check workspace_reuse_flag to judge if add workspace block wait reuse
  249. /// @param [in] workspace_reuse_flag mark out index if support resue
  250. /// @param [in] index out index
  251. /// @param [in] stream_id which stream op in
  252. /// @param [in] mem_block node workspace mem_block
  253. /// @return void
  254. /// @author
  255. ///
  256. void CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index,
  257. int64_t stream_id, MemoryBlock *mem_block);
  258. ///
  259. /// @ingroup GE
  260. /// @brief Release memory block to reusable list
  261. /// @param [in] to_release memory block to be released
  262. /// @param [in] reusable_memory reusable list
  263. /// @return void
  264. /// @author
  265. ///
  266. void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory);
  267. ///
  268. /// @ingroup GE
  269. /// @brief Release memory blocks to reusable list
  270. /// @param [in] to_releases memory blocks to be released
  271. /// @param [in] reusable_memory reusable list
  272. /// @return void
  273. /// @author
  274. ///
  275. void ReleaseMemorys(const vector<MemoryBlock *> &to_releases, vector<MemoryBlock *> &reusable_memory);
  276. ///
  277. /// @ingroup GE
  278. /// @brief Release memory block to reusable list
  279. /// @param [in] n node in compute_graph_
  280. /// @param [in] node_out_blocks output memory blocks for ops
  281. /// @param [in] reusable_memory reusable list
  282. /// @return void
  283. /// @author
  284. ///
  285. void ReleaseInputNodeOutMemory(const std::unordered_map<string, vector<MemoryBlock *>> &node_out_blocks,
  286. vector<MemoryBlock *> &reusable_memory, ge::NodePtr &n);
  287. ///
  288. /// @ingroup GE
  289. /// @brief Merge memory blocks between different batchs
  290. /// @return merge or not
  291. /// @author
  292. ///
  293. bool MergeDynamicBatchBlocks();
  294. void AssignContinuousBlocks();
  295. bool IsZeroCopyBlock(const NodePtr &node, bool continuous);
  296. bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name,
  297. uint32_t &peer_input_index, bool &no_need_assign_memory);
  298. ///
  299. /// @ingroup GE
  300. /// @|+++++++++block1++++++++| |+++++++++block1++++++++|
  301. /// @|+++++++++block1++++++++||++block2++| |+++++++++block1++++++++||++block2++|
  302. /// @ |++block2++||++block3++| ==> |++block3++| |++block2++|
  303. /// @ |++block3++| |++block3++|
  304. /// @return void
  305. /// @author
  306. ///
  307. void ReuseBlocksByLifeTime(size_t range_size);
  308. bool IsContinuousOutput(const NodePtr &n);
  309. MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);
  310. std::unordered_map<int64_t, std::vector<MemoryBlock *>> reusable_blocks_;
  311. std::map<std::string, uint64_t> reusable_block_counts_;
  312. std::unordered_map<int64_t, std::vector<MemoryBlock *>> stream_workspace_blocks_;
  313. std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_;
  314. std::unordered_map<std::string, MemoryBlock *> symbol_blocks_;
  315. std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_;
  316. std::unordered_map<std::string, uint32_t> node_continuous_input_counts_;
  317. // reuse memory
  318. vector<string> op_no_reuse_mem_vec_;
  319. bool op_reuse_env_valid_ = false;
  320. std::string ge_disable_reuse_mem_env_ = "0";
  321. bool is_op_reuse_mem_ = true;
  322. size_t life_time_;
  323. int64_t atomic_addr_clean_id_ = 0;
  324. DependStreamLife total_node_depend_stream_life_;
  325. };
  326. } // namespace ge
  327. #endif // GE_GRAPH_BUILD_MEMORY_BLOCK_MEM_ASSIGNER_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示