You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 16 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __CCE_RUNTIME_MEM_H__
  17. #define __CCE_RUNTIME_MEM_H__
  18. /*lint -e7*/
  19. #include <stddef.h>
  20. /*lint +e7*/
  21. #include "base.h"
  22. #include "config.h"
  23. #include "stream.h"
  24. #ifdef __cplusplus
  25. extern "C" {
  26. #endif
  27. /**
  28. * @ingroup dvrt_mem
  29. * @brief memory type
  30. */
  31. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  32. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  33. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  34. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  35. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  36. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device
  37. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache
  38. #define RT_MEMORY_TS_4G ((uint32_t)0x40)
  39. #define RT_MEMORY_TS ((uint32_t)0x80)
  40. #define RT_MEMORY_RESERVED ((uint32_t)0x100)
  41. #define RT_MEMORY_L1 ((uint32_t)0x1<<16)
  42. #define RT_MEMORY_L2 ((uint32_t)0x1<<17)
  43. /**
  44. * @ingroup dvrt_mem
  45. * @brief memory info type
  46. */
  47. #define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1)
  48. #define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2)
  49. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3)
  50. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4)
  51. /**
  52. * @ingroup dvrt_mem
  53. * @brief memory Policy
  54. */
  55. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  56. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  57. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  58. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  59. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p
  60. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p
  61. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p
  62. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  63. /**
  64. * @ingroup dvrt_mem
  65. * @brief memory type | memory Policy
  66. */
  67. typedef uint32_t rtMemType_t;
  68. /**
  69. * @ingroup dvrt_mem
  70. * @brief memory advise type
  71. */
  72. #define RT_MEMORY_ADVISE_EXE (0x02)
  73. #define RT_MEMORY_ADVISE_THP (0x04)
  74. #define RT_MEMORY_ADVISE_PLE (0x08)
  75. #define RT_MEMORY_ADVISE_PIN (0x16)
  76. /**
  77. * @ingroup dvrt_mem
  78. * @brief memory copy type
  79. */
  80. typedef enum tagRtMemcpyKind {
  81. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  82. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  83. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  84. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  85. RT_MEMCPY_MANAGED, // managed memory
  86. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  87. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  88. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  89. RT_MEMCPY_RESERVED,
  90. } rtMemcpyKind_t;
  91. typedef enum tagRtMemInfoType {
  92. RT_MEMORYINFO_DDR,
  93. RT_MEMORYINFO_HBM,
  94. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  95. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  96. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  97. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  98. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  99. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  100. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  101. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  102. } rtMemInfoType_t;
  103. typedef enum tagRtRecudeKind {
  104. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  105. RT_RECUDE_KIND_END
  106. } rtRecudeKind_t;
  107. typedef enum tagRtDataType {
  108. RT_DATA_TYPE_FP32 = 0, // fp32
  109. RT_DATA_TYPE_FP16 = 1, // fp16
  110. RT_DATA_TYPE_INT16 = 2, // int16
  111. RT_DATA_TYPE_END
  112. } rtDataType_t;
  113. /**
  114. * @ingroup dvrt_mem
  115. * @brief memory copy channel type
  116. */
  117. typedef enum tagRtMemcpyChannelType {
  118. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  119. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  120. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  121. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  122. } rtMemcpyChannelType_t;
  123. /**
  124. * @ingroup rt_kernel
  125. * @brief ai core memory size
  126. */
  127. typedef struct rtAiCoreMemorySize {
  128. uint32_t l0ASize;
  129. uint32_t l0BSize;
  130. uint32_t l0CSize;
  131. uint32_t l1Size;
  132. uint32_t ubSize;
  133. uint32_t l2Size;
  134. uint32_t l2PageNum;
  135. uint32_t blockSize;
  136. uint64_t bankSize;
  137. uint64_t bankNum;
  138. uint64_t burstInOneBlock;
  139. uint64_t bankGroupNum;
  140. } rtAiCoreMemorySize_t;
  141. /**
  142. * @ingroup dvrt_mem
  143. * @brief memory type
  144. */
  145. typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t;
  146. /**
  147. * @ingroup dvrt_mem
  148. * @brief memory attribute
  149. */
  150. typedef struct tagRtPointerAttributes {
  151. rtMemoryType_t memoryType; // host memory or device memory
  152. uint32_t deviceID; // device ID
  153. uint32_t isManaged;
  154. uint32_t pageSize;
  155. } rtPointerAttributes_t;
  156. /**
  157. * @ingroup dvrt_mem
  158. * @brief alloc device memory
  159. * @param [in|out] devPtr memory pointer
  160. * @param [in] size memory size
  161. * @param [in] type memory type
  162. * @return RT_ERROR_NONE for ok
  163. * @return RT_ERROR_INVALID_VALUE for error input
  164. */
  165. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  166. /**
  167. * @ingroup dvrt_mem
  168. * @brief free device memory
  169. * @param [in|out] devPtr memory pointer
  170. * @return RT_ERROR_NONE for ok
  171. * @return RT_ERROR_INVALID_VALUE for error input
  172. */
  173. RTS_API rtError_t rtFree(void *devPtr);
  174. /**
  175. * @ingroup dvrt_mem
  176. * @brief alloc device memory for dvpp
  177. * @param [in|out] devPtr memory pointer
  178. * @param [in] size memory size
  179. * @return RT_ERROR_NONE for ok
  180. * @return RT_ERROR_INVALID_VALUE for error input
  181. */
  182. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  183. /**
  184. * @ingroup dvrt_mem
  185. * @brief free device memory for dvpp
  186. * @param [in|out] devPtr memory pointer
  187. * @return RT_ERROR_NONE for ok
  188. * @return RT_ERROR_INVALID_VALUE for error input
  189. */
  190. RTS_API rtError_t rtDvppFree(void *devPtr);
  191. /**
  192. * @ingroup dvrt_mem
  193. * @brief alloc host memory
  194. * @param [in|out] hostPtr memory pointer
  195. * @param [in] size memory size
  196. * @return RT_ERROR_NONE for ok
  197. * @return RT_ERROR_INVALID_VALUE for error input
  198. */
  199. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  200. /**
  201. * @ingroup dvrt_mem
  202. * @brief free host memory
  203. * @param [in] hostPtr memory pointer
  204. * @return RT_ERROR_NONE for ok
  205. * @return RT_ERROR_INVALID_VALUE for error input
  206. */
  207. RTS_API rtError_t rtFreeHost(void *hostPtr);
  208. /**
  209. * @ingroup dvrt_mem
  210. * @brief alloc managed memory
  211. * @param [in|out] ptr memory pointer
  212. * @param [in] size memory size
  213. * @param [in] flag reserved, set to 0.
  214. * @return RT_ERROR_NONE for ok
  215. * @return RT_ERROR_INVALID_VALUE for error input
  216. */
  217. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  218. /**
  219. * @ingroup dvrt_mem
  220. * @brief free managed memory
  221. * @param [in] ptr memory pointer
  222. * @return RT_ERROR_NONE for ok
  223. * @return RT_ERROR_INVALID_VALUE for error input
  224. */
  225. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  226. /**
  227. * @ingroup dvrt_mem
  228. * @brief alloc cached device memory
  229. * @param [in| devPtr memory pointer
  230. * @param [in] size memory size
  231. * @param [in] type memory type
  232. * @return RT_ERROR_NONE for ok
  233. */
  234. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  235. /**
  236. * @ingroup dvrt_mem
  237. * @brief flush device mempory
  238. * @param [in] base virtal base address
  239. * @param [in] len memory size
  240. * @return RT_ERROR_NONE for ok, errno for failed
  241. */
  242. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  243. /**
  244. * @ingroup dvrt_mem
  245. * @brief invalid device mempory
  246. * @param [in] base virtal base address
  247. * @param [in] len memory size
  248. * @return RT_ERROR_NONE for ok, errno for failed
  249. */
  250. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  251. /**
  252. * @ingroup dvrt_mem
  253. * @brief synchronized memcpy
  254. * @param [in] dst destination address pointer
  255. * @param [in] Max length of destination address memory
  256. * @param [in] src source address pointer
  257. * @param [in] count the number of byte to copy
  258. * @param [in] kind memcpy type
  259. * @return RT_ERROR_NONE for ok
  260. * @return RT_ERROR_INVALID_VALUE for error input
  261. */
  262. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  263. /**
  264. * @ingroup dvrt_mem
  265. * @brief asynchronized memcpy
  266. * @param [in] dst destination address pointer
  267. * @param [in] Max length of destination address memory
  268. * @param [in] src source address pointer
  269. * @param [in] count the number of byte to copy
  270. * @param [in] kind memcpy type
  271. * @param [in] stream asynchronized task stream
  272. * @return RT_ERROR_NONE for ok
  273. * @return RT_ERROR_INVALID_VALUE for error input
  274. */
  275. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  276. rtStream_t stream);
  277. /**
  278. * @ingroup dvrt_mem
  279. * @brief asynchronized reduce memcpy
  280. * @param [in] dst destination address pointer
  281. * @param [in] Max length of destination address memory
  282. * @param [in] src source address pointer
  283. * @param [in] count the number of byte to copy
  284. * @param [in] kind memcpy type
  285. * @param [in] type data type
  286. * @param [in] stream asynchronized task stream
  287. * @return RT_ERROR_NONE for ok
  288. * @return RT_ERROR_INVALID_VALUE for error input
  289. */
  290. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  291. rtDataType_t type, rtStream_t stream);
  292. /**
  293. * @ingroup dvrt_mem
  294. * @brief query memory size
  295. * @param [in] aiCoreMemorySize
  296. * @return RT_ERROR_NONE for ok, errno for failed
  297. * @return RT_ERROR_INVALID_VALUE for error input
  298. */
  299. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  300. /**
  301. * @ingroup dvrt_mem
  302. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  303. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  304. * @param [in] aiCoreMemorySize
  305. * @return RT_ERROR_NONE for ok, errno for failed
  306. * @return RT_ERROR_INVALID_VALUE for error input
  307. */
  308. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  309. /**
  310. * @ingroup dvrt_mem
  311. * @brief set memory with uint32_t value
  312. * @param [in] devPtr
  313. * @param [in] Max length of destination address memory
  314. * @param [in] value
  315. * @param [in] count byte num
  316. * @return RT_ERROR_NONE for ok, errno for failed
  317. * @return RT_ERROR_INVALID_VALUE for error input
  318. */
  319. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  320. /**
  321. * @ingroup dvrt_mem
  322. * @brief set memory with uint32_t value async
  323. * @param [in] devPtr
  324. * @param [in] Max length of destination address memory
  325. * @param [in] value
  326. * @param [in] count byte num
  327. * @param [in] stream
  328. * @return RT_ERROR_NONE for ok, errno for failed
  329. * @return RT_ERROR_INVALID_VALUE for error input
  330. */
  331. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  332. /**
  333. * @ingroup dvrt_mem
  334. * @brief get current device memory total and free
  335. * @param [out] free
  336. * @param [out] total
  337. * @return RT_ERROR_NONE for ok, errno for failed
  338. * @return RT_ERROR_INVALID_VALUE for error input
  339. */
  340. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  341. /**
  342. * @ingroup dvrt_mem
  343. * @brief get current device memory total and free
  344. * @param [in] memInfoType
  345. * @param [out] free
  346. * @param [out] total
  347. * @return RT_ERROR_NONE for ok, errno for failed
  348. */
  349. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
  350. /**
  351. * @ingroup dvrt_mem
  352. * @brief set memory with uint32_t value
  353. * @param [in] devPtr
  354. * @param [in] len
  355. * @param [in] device
  356. * @return RT_ERROR_NONE for ok, errno for failed
  357. * @return RT_ERROR_INVALID_VALUE for error input
  358. */
  359. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  360. /**
  361. * @ingroup dvrt_mem
  362. * @brief get memory attribute:Host or Device
  363. * @param [in] ptr
  364. * @param [out] attributes
  365. * @return RT_ERROR_NONE for ok, errno for failed
  366. * @return RT_ERROR_INVALID_VALUE for error input
  367. */
  368. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  369. /**
  370. * @ingroup dvrt_mem
  371. * @brief make memory shared interprocess and assigned a name
  372. * @param [in] ptr device memory address pointer
  373. * @param [in] name identification name
  374. * @param [in] byteCount identification byteCount
  375. * @return RT_ERROR_NONE for ok
  376. * @return RT_ERROR_INVALID_VALUE for error input
  377. * @return RT_ERROR_DRV_ERR for driver error
  378. */
  379. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  380. /**
  381. * @ingroup dvrt_mem
  382. * @brief destroy a interprocess shared memory
  383. * @param [in] name identification name
  384. * @return RT_ERROR_NONE for ok
  385. * @return RT_ERROR_INVALID_VALUE for error input
  386. * @return RT_ERROR_DRV_ERR for driver error
  387. */
  388. rtError_t rtIpcDestroyMemoryName(const char *name);
  389. /**
  390. * @ingroup dvrt_mem
  391. * @brief open a interprocess shared memory
  392. * @param [in|out] ptr device memory address pointer
  393. * @param [in] name identification name
  394. * @return RT_ERROR_NONE for ok
  395. * @return RT_ERROR_INVALID_VALUE for error input
  396. * @return RT_ERROR_DRV_ERR for driver error
  397. */
  398. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  399. /**
  400. * @ingroup dvrt_mem
  401. * @brief close a interprocess shared memory
  402. * @param [in] ptr device memory address pointer
  403. * @param [in] name identification name
  404. * @return RT_ERROR_NONE for ok
  405. * @return RT_ERROR_INVALID_VALUE for error input
  406. * @return RT_ERROR_DRV_ERR for driver error
  407. */
  408. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  409. /**
  410. * @ingroup dvrt_mem
  411. * @brief HCCL Async memory cpy
  412. * @param [in] index sq index
  413. * @param [in] wqe_index moudle index
  414. * @param [in] stream asynchronized task stream
  415. * @return RT_ERROR_NONE for ok
  416. * @return RT_ERROR_INVALID_VALUE for error input
  417. * @return RT_ERROR_DRV_ERR for driver error
  418. */
  419. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stream);
  420. /**
  421. * @ingroup dvrt_mem
  422. * @brief Ipc set mem pid
  423. * @param [in] name name to be queried
  424. * @param [in] pid process id
  425. * @param [in] num length of pid[]
  426. * @return RT_ERROR_NONE for ok
  427. * @return RT_ERROR_INVALID_VALUE for error input
  428. * @return RT_ERROR_DRV_ERR for driver error
  429. */
  430. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  431. /**
  432. * @ingroup dvrt_mem
  433. * @brief HCCL Async memory cpy
  434. * @param [in] dbindex single device 0
  435. * @param [in] dbinfo doorbell info
  436. * @param [in] stream asynchronized task stream
  437. * @return RT_ERROR_NONE for ok
  438. * @return RT_ERROR_INVALID_VALUE for error input
  439. * @return RT_ERROR_DRV_ERR for driver error
  440. */
  441. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
  442. #ifdef __cplusplus
  443. }
  444. #endif
  445. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示