You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

hcom.h 11 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /**
  17. * @file hcom.h
  18. * @brief HCOM API
  19. */
  20. #ifndef HCOM_H_
  21. #define HCOM_H_
  22. #include <hccl/base.h>
  23. #include <hccl/hccl_types.h>
  24. #ifdef __cplusplus
  25. extern "C" {
  26. #endif // __cplusplus
  27. /**
  28. * @brief Initialize HCOM.
  29. *
  30. * @param rank_table A string identifying the rank table file path, include file name.
  31. * @param identify A string identifying the identify for the rank.
  32. * @return HcclResult
  33. * @see hcom_destroy()
  34. */
  35. extern HcclResult hcom_init(const char *rank_table, const char *identify);
  36. /**
  37. * @brief Destroy HCOM
  38. *
  39. * @return HcclResult
  40. * @see hcom_init()
  41. */
  42. extern HcclResult hcom_destroy(void);
  43. /**
  44. * @brief Bind the model.
  45. *
  46. * @param model A pointer identifying the model information.
  47. * @param stream A pointer identifying the stream information.
  48. * @return HcclResult
  49. * @see hcom_unbind_model()
  50. */
  51. extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream);
  52. /**
  53. * @brief Unbind the model.
  54. *
  55. * @param model An pointer identifying the model information.
  56. * @return HcclResult
  57. * @see hcom_unbind_model()
  58. */
  59. extern HcclResult hcom_unbind_model(rtModel_t model);
  60. /**
  61. * @brief All-gather operator.
  62. *
  63. * @param tag A string identifying the tag of the operator.
  64. * @param inputPtr A pointer identifying the input data address of the operator.
  65. * @param outputPtr A pointer identifying the output data address of the operator.
  66. * @param inputCount An integer(u64) identifying the number of the input data.
  67. * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
  68. * @param group A string identifying the group name of ranks participating in the operator.
  69. * @param stream A pointer identifying the stream information.
  70. * @return HcclResult
  71. */
  72. extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount,
  73. HcclDataType dataType, const char *group, rtStream_t stream);
  74. /**
  75. * @brief All-reduce operator.
  76. *
  77. * @param tag A string identifying the tag of the operator.
  78. * @param inputPtr A pointer identifying the input data address of the operator.
  79. * @param outputPtr A pointer identifying the output data address of the operator.
  80. * @param count An integer(u64) identifying the number of the output data.
  81. * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
  82. * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
  83. * @param group A string identifying the group name of ranks participating in the operator.
  84. * @param stream A pointer identifying the stream information.
  85. * @return HcclResult
  86. */
  87. extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count,
  88. HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
  89. /**
  90. * @brief Broadcast operator.
  91. *
  92. * @param tag A string identifying the tag of the operator.
  93. * @param ptr A pointer identifying the data address of the operator.
  94. * @param count An integer(u64) identifying the number of the data.
  95. * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
  96. * @param root An integer(u32) identifying the the root rank in the operator.
  97. * @param group A string identifying the group name of ranks participating in the operator.
  98. * @param stream A pointer identifying the stream information.
  99. * @return HcclResult
  100. */
  101. extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root,
  102. const char *group, rtStream_t stream);
  103. /**
  104. * @brief Reduce-scatter operator.
  105. *
  106. * @param tag A string identifying the tag of the operator.
  107. * @param inputPtr A pointer identifying the input data address of the operator.
  108. * @param outputPtr A pointer identifying the output data address of the operator.
  109. * @param count An integer(u64) identifying the number of the data.
  110. * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
  111. * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
  112. * @param group A string identifying the group name of ranks participating in the operator.
  113. * @param stream A pointer identifying the stream information.
  114. * @return HcclResult
  115. */
  116. extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count,
  117. HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
  118. /**
  119. * @brief Get the rank number in the group.
  120. *
  121. * @param group A string identifying the group name.
  122. * @param rankSize A pointer identifying the rank number.
  123. * @return HcclResult
  124. */
  125. HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
  126. /**
  127. * @brief Get the rank number of this rank's server within the group.
  128. *
  129. * @param group A string identifying the group name.
  130. * @param localRankSize A pointer identifying the rank number.
  131. * @return HcclResult
  132. */
  133. HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
  134. /**
  135. * @brief Get the rank id of this rank.
  136. *
  137. * @param group A string identifying the group name.
  138. * @param rankId A pointer identifying the rank id.
  139. * @return HcclResult
  140. */
  141. HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
  142. /**
  143. * @brief Get the local rank id of this rank's server within the group.
  144. *
  145. * @param group A string identifying the group name.
  146. * @param localRankId A pointer identifying the local rank id.
  147. * @return HcclResult
  148. */
  149. HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
  150. /**
  151. * @brief Get the world rank id according to the group rank id.
  152. *
  153. * @param group A string identifying the group name.
  154. * @param groupRank An integer(u32) identifying the group rank id.
  155. * @param worldRank A pointer identifying the world rank id.
  156. * @return HcclResult
  157. */
  158. HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
  159. /**
  160. * @brief Get the group rank id according to the world rank id.
  161. *
  162. * @param worldRank An integer(u32) identifying the world rank id.
  163. * @param group A string identifying the group name.
  164. * @param groupRank A pointer identifying the group rank id.
  165. * @return HcclResult
  166. */
  167. HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank);
  168. /**
  169. * @brief Create group.
  170. *
  171. * @param group A string identifying the group name.
  172. * @param rankNum An integer(u32) identifying the number of ranks in the group.
  173. * @param rankIds A list identifying the ranks in the group.
  174. * @return HcclResult
  175. */
  176. HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);
  177. /**
  178. * @brief Destroy group
  179. *
  180. * @param group A string identifying the group name.
  181. * @return HcclResult
  182. */
  183. HcclResult hcom_destroy_group(const char *group);
  184. /**
  185. * @brief Send operator.
  186. *
  187. * @param tag A string identifying the tag of the operator.
  188. * @param inputPtr A pointer identifying the input data address of the operator.
  189. * @param count An integer(u64) identifying the number of the data.
  190. * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
  191. * @param destRank An integer identifying the destination rank.
  192. * @param srTag An integer identifying the send/recv message tag.
  193. * The message will be send by the receive operator with the same "sr_tag".
  194. * @param group A string identifying the group name of ranks participating in the operator.
  195. * @param stream A pointer identifying the stream information.
  196. * @return HcclResult
  197. */
  198. HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType,
  199. u32 destRank, u32 srTag, const char *group, rtStream_t stream);
  200. /**
  201. * @brief Receive operator.
  202. *
  203. * @param tag A string identifying the tag of the operator.
  204. * @param outputPtr A pointer identifying the output data address of the operator.
  205. * @param count An integer(u64) identifying the number of the data.
  206. * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
  207. * @param srcRank An integer identifying the source rank.
  208. * @param srTag An integer identifying the send/recv message tag.
  209. * The message will be send by the send operator with the same "sr_tag".
  210. * @param group A string identifying the group name of ranks participating in the operator.
  211. * @param stream A pointer identifying the stream information.
  212. * @return HcclResult
  213. */
  214. HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType,
  215. u32 srcRank, u32 srTag, const char *group, rtStream_t stream);
  216. /**
  217. * @brief Get the gradient split strategy with in the group.
  218. *
  219. * @param group A string identifying the group name.
  220. * @param feature A pointer identifying the feature of the model.
  221. * @param maxSegmentNum An integer(u32) identifying the max segments of gradients.
  222. * @param segmentNum A pointer identifying the segments number of gradients.
  223. * @param segmentIdx A list identifying the index of end gradient in each segment.
  224. * @return HcclResult
  225. */
  226. HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum,
  227. u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE,
  228. OriginalGraphShapeType shapeType = KNOWN_SHAPE);
  229. /**
  230. * @brief Set the gradient split strategy with in the group, according to gradient index.
  231. *
  232. * @param group A string identifying the group name.
  233. * @param segmentNum An integer(u32) identifying the segments number of gradients.
  234. * @param IdxList A list identifying the index of end gradient in each segment.
  235. * @return HcclResult
  236. */
  237. extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
  238. /**
  239. * @brief Set the gradient split strategy with in the group, according to gradient data size.
  240. *
  241. * @param group A string identifying the group name.
  242. * @param segmentNum An integer(u32) identifying the segments number of gradients.
  243. * @param sizeList A list identifying the percent of each segment.
  244. * @return HcclResult
  245. */
  246. extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList);
  247. /**
  248. * @brief Register memories and init resources for remote access.
  249. *
  250. * @param addrList memory addresses for remote access.
  251. * @param count number of remote memory addresses.
  252. * @return HcclResult
  253. */
  254. extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count);
  255. #ifdef __cplusplus
  256. }
  257. #endif // __cplusplus
  258. #endif // HCOM_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示