You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 14 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __CCE_RUNTIME_MEM_H__
  17. #define __CCE_RUNTIME_MEM_H__
  18. /*lint -e7*/
  19. #include <stddef.h>
  20. /*lint +e7*/
  21. #include "base.h"
  22. #include "config.h"
  23. #include "stream.h"
  24. #ifdef __cplusplus
  25. extern "C" {
  26. #endif
  27. /**
  28. * @ingroup dvrt_mem
  29. * @brief memory type
  30. */
  31. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  32. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  33. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  34. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  35. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  36. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device
  37. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache
  38. #define RT_MEMORY_TS_4G ((uint32_t)0x40)
  39. #define RT_MEMORY_TS ((uint32_t)0x80)
  40. #define RT_MEMORY_RESERVED ((uint32_t)0x100)
  41. #define RT_MEMORY_L1 ((uint32_t)0x1<<16)
  42. #define RT_MEMORY_L2 ((uint32_t)0x1<<17)
  43. /**
  44. * @ingroup dvrt_mem
  45. * @brief memory Policy
  46. */
  47. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  48. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  49. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  50. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  51. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  52. /**
  53. * @ingroup dvrt_mem
  54. * @brief memory type | memory Policy
  55. */
  56. typedef uint32_t rtMemType_t;
  57. /**
  58. * @ingroup dvrt_mem
  59. * @brief memory advise type
  60. */
  61. #define RT_MEMORY_ADVISE_EXE (0x02)
  62. #define RT_MEMORY_ADVISE_THP (0x04)
  63. #define RT_MEMORY_ADVISE_PLE (0x08)
  64. #define RT_MEMORY_ADVISE_PIN (0x16)
  65. /**
  66. * @ingroup dvrt_mem
  67. * @brief memory copy type
  68. */
  69. typedef enum tagRtMemcpyKind {
  70. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  71. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  72. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  73. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  74. RT_MEMCPY_MANAGED, // managed memory
  75. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  76. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  77. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  78. RT_MEMCPY_RESERVED,
  79. } rtMemcpyKind_t;
  80. typedef enum tagRtRecudeKind {
  81. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  82. RT_RECUDE_KIND_END
  83. } rtRecudeKind_t;
  84. typedef enum tagRtDataType {
  85. RT_DATA_TYPE_FP32 = 0, // fp32
  86. RT_DATA_TYPE_FP16 = 1, // fp16
  87. RT_DATA_TYPE_INT16 = 2, // int16
  88. RT_DATA_TYPE_END
  89. } rtDataType_t;
  90. /**
  91. * @ingroup dvrt_mem
  92. * @brief memory copy channel type
  93. */
  94. typedef enum tagRtMemcpyChannelType {
  95. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  96. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  97. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  98. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  99. } rtMemcpyChannelType_t;
  100. /**
  101. * @ingroup rt_kernel
  102. * @brief ai core memory size
  103. */
  104. typedef struct rtAiCoreMemorySize {
  105. uint32_t l0ASize;
  106. uint32_t l0BSize;
  107. uint32_t l0CSize;
  108. uint32_t l1Size;
  109. uint32_t ubSize;
  110. uint32_t l2Size;
  111. uint32_t l2PageNum;
  112. uint32_t blockSize;
  113. uint64_t bankSize;
  114. uint64_t bankNum;
  115. uint64_t burstInOneBlock;
  116. uint64_t bankGroupNum;
  117. } rtAiCoreMemorySize_t;
  118. /**
  119. * @ingroup dvrt_mem
  120. * @brief memory type
  121. */
  122. typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t;
  123. /**
  124. * @ingroup dvrt_mem
  125. * @brief memory attribute
  126. */
  127. typedef struct tagRtPointerAttributes {
  128. rtMemoryType_t memoryType; // host memory or device memory
  129. uint32_t deviceID; // device ID
  130. uint32_t isManaged;
  131. uint32_t pageSize;
  132. } rtPointerAttributes_t;
  133. /**
  134. * @ingroup dvrt_mem
  135. * @brief alloc device memory
  136. * @param [in|out] devPtr memory pointer
  137. * @param [in] size memory size
  138. * @param [in] type memory type
  139. * @return RT_ERROR_NONE for ok
  140. * @return RT_ERROR_INVALID_VALUE for error input
  141. */
  142. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  143. /**
  144. * @ingroup dvrt_mem
  145. * @brief free device memory
  146. * @param [in|out] devPtr memory pointer
  147. * @return RT_ERROR_NONE for ok
  148. * @return RT_ERROR_INVALID_VALUE for error input
  149. */
  150. RTS_API rtError_t rtFree(void *devPtr);
  151. /**
  152. * @ingroup dvrt_mem
  153. * @brief alloc device memory for dvpp
  154. * @param [in|out] devPtr memory pointer
  155. * @param [in] size memory size
  156. * @return RT_ERROR_NONE for ok
  157. * @return RT_ERROR_INVALID_VALUE for error input
  158. */
  159. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  160. /**
  161. * @ingroup dvrt_mem
  162. * @brief free device memory for dvpp
  163. * @param [in|out] devPtr memory pointer
  164. * @return RT_ERROR_NONE for ok
  165. * @return RT_ERROR_INVALID_VALUE for error input
  166. */
  167. RTS_API rtError_t rtDvppFree(void *devPtr);
  168. /**
  169. * @ingroup dvrt_mem
  170. * @brief alloc host memory
  171. * @param [in|out] hostPtr memory pointer
  172. * @param [in] size memory size
  173. * @return RT_ERROR_NONE for ok
  174. * @return RT_ERROR_INVALID_VALUE for error input
  175. */
  176. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  177. /**
  178. * @ingroup dvrt_mem
  179. * @brief free host memory
  180. * @param [in] hostPtr memory pointer
  181. * @return RT_ERROR_NONE for ok
  182. * @return RT_ERROR_INVALID_VALUE for error input
  183. */
  184. RTS_API rtError_t rtFreeHost(void *hostPtr);
  185. /**
  186. * @ingroup dvrt_mem
  187. * @brief alloc managed memory
  188. * @param [in|out] ptr memory pointer
  189. * @param [in] size memory size
  190. * @param [in] flag reserved, set to 0.
  191. * @return RT_ERROR_NONE for ok
  192. * @return RT_ERROR_INVALID_VALUE for error input
  193. */
  194. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  195. /**
  196. * @ingroup dvrt_mem
  197. * @brief free managed memory
  198. * @param [in] ptr memory pointer
  199. * @return RT_ERROR_NONE for ok
  200. * @return RT_ERROR_INVALID_VALUE for error input
  201. */
  202. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  203. /**
  204. * @ingroup dvrt_mem
  205. * @brief alloc cached device memory
  206. * @param [in| devPtr memory pointer
  207. * @param [in] size memory size
  208. * @param [in] type memory type
  209. * @return RT_ERROR_NONE for ok
  210. */
  211. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  212. /**
  213. * @ingroup dvrt_mem
  214. * @brief flush device mempory
  215. * @param [in] base virtal base address
  216. * @param [in] len memory size
  217. * @return RT_ERROR_NONE for ok, errno for failed
  218. */
  219. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  220. /**
  221. * @ingroup dvrt_mem
  222. * @brief invalid device mempory
  223. * @param [in] base virtal base address
  224. * @param [in] len memory size
  225. * @return RT_ERROR_NONE for ok, errno for failed
  226. */
  227. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  228. /**
  229. * @ingroup dvrt_mem
  230. * @brief synchronized memcpy
  231. * @param [in] dst destination address pointer
  232. * @param [in] Max length of destination address memory
  233. * @param [in] src source address pointer
  234. * @param [in] count the number of byte to copy
  235. * @param [in] kind memcpy type
  236. * @return RT_ERROR_NONE for ok
  237. * @return RT_ERROR_INVALID_VALUE for error input
  238. */
  239. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  240. /**
  241. * @ingroup dvrt_mem
  242. * @brief asynchronized memcpy
  243. * @param [in] dst destination address pointer
  244. * @param [in] Max length of destination address memory
  245. * @param [in] src source address pointer
  246. * @param [in] count the number of byte to copy
  247. * @param [in] kind memcpy type
  248. * @param [in] stream asynchronized task stream
  249. * @return RT_ERROR_NONE for ok
  250. * @return RT_ERROR_INVALID_VALUE for error input
  251. */
  252. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  253. rtStream_t stream);
  254. /**
  255. * @ingroup dvrt_mem
  256. * @brief asynchronized reduce memcpy
  257. * @param [in] dst destination address pointer
  258. * @param [in] Max length of destination address memory
  259. * @param [in] src source address pointer
  260. * @param [in] count the number of byte to copy
  261. * @param [in] kind memcpy type
  262. * @param [in] type data type
  263. * @param [in] stream asynchronized task stream
  264. * @return RT_ERROR_NONE for ok
  265. * @return RT_ERROR_INVALID_VALUE for error input
  266. */
  267. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  268. rtDataType_t type, rtStream_t stream);
  269. /**
  270. * @ingroup dvrt_mem
  271. * @brief query memory size
  272. * @param [in] aiCoreMemorySize
  273. * @return RT_ERROR_NONE for ok, errno for failed
  274. * @return RT_ERROR_INVALID_VALUE for error input
  275. */
  276. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  277. /**
  278. * @ingroup dvrt_mem
  279. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  280. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  281. * @param [in] aiCoreMemorySize
  282. * @return RT_ERROR_NONE for ok, errno for failed
  283. * @return RT_ERROR_INVALID_VALUE for error input
  284. */
  285. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  286. /**
  287. * @ingroup dvrt_mem
  288. * @brief set memory with uint32_t value
  289. * @param [in] devPtr
  290. * @param [in] Max length of destination address memory
  291. * @param [in] value
  292. * @param [in] count byte num
  293. * @return RT_ERROR_NONE for ok, errno for failed
  294. * @return RT_ERROR_INVALID_VALUE for error input
  295. */
  296. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  297. /**
  298. * @ingroup dvrt_mem
  299. * @brief set memory with uint32_t value async
  300. * @param [in] devPtr
  301. * @param [in] Max length of destination address memory
  302. * @param [in] value
  303. * @param [in] count byte num
  304. * @param [in] stream
  305. * @return RT_ERROR_NONE for ok, errno for failed
  306. * @return RT_ERROR_INVALID_VALUE for error input
  307. */
  308. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  309. /**
  310. * @ingroup dvrt_mem
  311. * @brief get current device memory total and free
  312. * @param [out] free
  313. * @param [out] total
  314. * @return RT_ERROR_NONE for ok, errno for failed
  315. * @return RT_ERROR_INVALID_VALUE for error input
  316. */
  317. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  318. /**
  319. * @ingroup dvrt_mem
  320. * @brief set memory with uint32_t value
  321. * @param [in] devPtr
  322. * @param [in] len
  323. * @param [in] device
  324. * @return RT_ERROR_NONE for ok, errno for failed
  325. * @return RT_ERROR_INVALID_VALUE for error input
  326. */
  327. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  328. /**
  329. * @ingroup dvrt_mem
  330. * @brief get memory attribute:Host or Device
  331. * @param [in] ptr
  332. * @param [out] attributes
  333. * @return RT_ERROR_NONE for ok, errno for failed
  334. * @return RT_ERROR_INVALID_VALUE for error input
  335. */
  336. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  337. /**
  338. * @ingroup dvrt_mem
  339. * @brief make memory shared interprocess and assigned a name
  340. * @param [in] ptr device memory address pointer
  341. * @param [in] name identification name
  342. * @param [in] byteCount identification byteCount
  343. * @return RT_ERROR_NONE for ok
  344. * @return RT_ERROR_INVALID_VALUE for error input
  345. * @return RT_ERROR_DRV_ERR for driver error
  346. */
  347. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  348. /**
  349. * @ingroup dvrt_mem
  350. * @brief destroy a interprocess shared memory
  351. * @param [in] name identification name
  352. * @return RT_ERROR_NONE for ok
  353. * @return RT_ERROR_INVALID_VALUE for error input
  354. * @return RT_ERROR_DRV_ERR for driver error
  355. */
  356. rtError_t rtIpcDestroyMemoryName(const char *name);
  357. /**
  358. * @ingroup dvrt_mem
  359. * @brief open a interprocess shared memory
  360. * @param [in|out] ptr device memory address pointer
  361. * @param [in] name identification name
  362. * @return RT_ERROR_NONE for ok
  363. * @return RT_ERROR_INVALID_VALUE for error input
  364. * @return RT_ERROR_DRV_ERR for driver error
  365. */
  366. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  367. /**
  368. * @ingroup dvrt_mem
  369. * @brief close a interprocess shared memory
  370. * @param [in] ptr device memory address pointer
  371. * @param [in] name identification name
  372. * @return RT_ERROR_NONE for ok
  373. * @return RT_ERROR_INVALID_VALUE for error input
  374. * @return RT_ERROR_DRV_ERR for driver error
  375. */
  376. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  377. /**
  378. * @ingroup dvrt_mem
  379. * @brief HCCL Async memory cpy
  380. * @param [in] index sq index
  381. * @param [in] wqe_index moudle index
  382. * @param [in] stream asynchronized task stream
  383. * @return RT_ERROR_NONE for ok
  384. * @return RT_ERROR_INVALID_VALUE for error input
  385. * @return RT_ERROR_DRV_ERR for driver error
  386. */
  387. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stream);
  388. /**
  389. * @ingroup dvrt_mem
  390. * @brief Ipc set mem pid
  391. * @param [in] name name to be queried
  392. * @param [in] pid process id
  393. * @param [in] num length of pid[]
  394. * @return RT_ERROR_NONE for ok
  395. * @return RT_ERROR_INVALID_VALUE for error input
  396. * @return RT_ERROR_DRV_ERR for driver error
  397. */
  398. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  399. /**
  400. * @ingroup dvrt_mem
  401. * @brief HCCL Async memory cpy
  402. * @param [in] dbindex single device 0
  403. * @param [in] dbinfo doorbell info
  404. * @param [in] stream asynchronized task stream
  405. * @return RT_ERROR_NONE for ok
  406. * @return RT_ERROR_INVALID_VALUE for error input
  407. * @return RT_ERROR_DRV_ERR for driver error
  408. */
  409. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
  410. #ifdef __cplusplus
  411. }
  412. #endif
  413. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示