You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 15 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __CCE_RUNTIME_MEM_H__
  17. #define __CCE_RUNTIME_MEM_H__
  18. #include <stddef.h>
  19. #include "base.h"
  20. #include "config.h"
  21. #include "stream.h"
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif
  25. /**
  26. * @ingroup dvrt_mem
  27. * @brief memory type
  28. */
  29. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  30. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  31. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  32. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  33. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  34. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device
  35. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache
  36. #define RT_MEMORY_TS_4G ((uint32_t)0x40)
  37. #define RT_MEMORY_TS ((uint32_t)0x80)
  38. #define RT_MEMORY_RESERVED ((uint32_t)0x100)
  39. #define RT_MEMORY_L1 ((uint32_t)0x1<<16)
  40. #define RT_MEMORY_L2 ((uint32_t)0x1<<17)
  41. /**
  42. * @ingroup dvrt_mem
  43. * @brief memory Policy
  44. */
  45. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  46. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  47. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  48. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  49. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  50. /**
  51. * @ingroup dvrt_mem
  52. * @brief memory type | memory Policy
  53. */
  54. typedef uint32_t rtMemType_t;
  55. /**
  56. * @ingroup dvrt_mem
  57. * @brief memory advise type
  58. */
  59. #define RT_MEMORY_ADVISE_EXE (0x02)
  60. #define RT_MEMORY_ADVISE_THP (0x04)
  61. #define RT_MEMORY_ADVISE_PLE (0x08)
  62. #define RT_MEMORY_ADVISE_PIN (0x16)
  63. /**
  64. * @ingroup dvrt_mem
  65. * @brief memory copy type
  66. */
  67. typedef enum tagRtMemcpyKind {
  68. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  69. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  70. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  71. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  72. RT_MEMCPY_MANAGED, // managed memory
  73. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  74. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  75. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  76. RT_MEMCPY_RESERVED,
  77. } rtMemcpyKind_t;
  78. typedef enum tagRtRecudeKind {
  79. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  80. RT_RECUDE_KIND_END
  81. } rtRecudeKind_t;
  82. typedef enum tagRtDataType {
  83. RT_DATA_TYPE_FP32 = 0, // fp32
  84. RT_DATA_TYPE_FP16 = 1, // fp16
  85. RT_DATA_TYPE_INT16 = 2, // int16
  86. RT_DATA_TYPE_END
  87. } rtDataType_t;
  88. /**
  89. * @ingroup dvrt_mem
  90. * @brief memory copy channel type
  91. */
  92. typedef enum tagRtMemcpyChannelType {
  93. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  94. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  95. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  96. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  97. } rtMemcpyChannelType_t;
  98. /**
  99. * @ingroup rt_kernel
  100. * @brief ai core memory size
  101. */
  102. typedef struct rtAiCoreMemorySize {
  103. uint32_t l0ASize;
  104. uint32_t l0BSize;
  105. uint32_t l0CSize;
  106. uint32_t l1Size;
  107. uint32_t ubSize;
  108. uint32_t l2Size;
  109. uint32_t l2PageNum;
  110. uint32_t blockSize;
  111. uint64_t bankSize;
  112. uint64_t bankNum;
  113. uint64_t burstInOneBlock;
  114. uint64_t bankGroupNum;
  115. } rtAiCoreMemorySize_t;
  116. /**
  117. * @ingroup dvrt_mem
  118. * @brief memory type
  119. */
  120. typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t;
  121. /**
  122. * @ingroup dvrt_mem
  123. * @brief memory attribute
  124. */
  125. typedef struct tagRtPointerAttributes {
  126. rtMemoryType_t memoryType; // host memory or device memory
  127. uint32_t deviceID; // device ID
  128. uint32_t isManaged;
  129. uint32_t pageSize;
  130. } rtPointerAttributes_t;
  131. /**
  132. * @ingroup dvrt_mem
  133. * @brief alloc device memory
  134. * @param [in|out] devPtr memory pointer
  135. * @param [in] size memory size
  136. * @param [in] type memory type
  137. * @return RT_ERROR_NONE for ok
  138. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  139. */
  140. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  141. /**
  142. * @ingroup dvrt_mem
  143. * @brief free device memory
  144. * @param [in|out] devPtr memory pointer
  145. * @return RT_ERROR_NONE for ok
  146. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  147. */
  148. RTS_API rtError_t rtFree(void *devPtr);
  149. /**
  150. * @ingroup dvrt_mem
  151. * @brief alloc device memory for dvpp
  152. * @param [in|out] devPtr memory pointer
  153. * @param [in] size memory size
  154. * @return RT_ERROR_NONE for ok
  155. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  156. */
  157. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  158. /**
  159. * @ingroup dvrt_mem
  160. * @brief free device memory for dvpp
  161. * @param [in|out] devPtr memory pointer
  162. * @return RT_ERROR_NONE for ok
  163. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  164. */
  165. RTS_API rtError_t rtDvppFree(void *devPtr);
  166. /**
  167. * @ingroup dvrt_mem
  168. * @brief alloc host memory
  169. * @param [in|out] hostPtr memory pointer
  170. * @param [in] size memory size
  171. * @return RT_ERROR_NONE for ok
  172. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  173. */
  174. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  175. /**
  176. * @ingroup dvrt_mem
  177. * @brief free host memory
  178. * @param [in] hostPtr memory pointer
  179. * @return RT_ERROR_NONE for ok
  180. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  181. */
  182. RTS_API rtError_t rtFreeHost(void *hostPtr);
  183. /**
  184. * @ingroup dvrt_mem
  185. * @brief alloc managed memory
  186. * @param [in|out] ptr memory pointer
  187. * @param [in] size memory size
  188. * @param [in] flag reserved, set to 0.
  189. * @return RT_ERROR_NONE for ok
  190. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  191. */
  192. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  193. /**
  194. * @ingroup dvrt_mem
  195. * @brief free managed memory
  196. * @param [in] ptr memory pointer
  197. * @return RT_ERROR_NONE for ok
  198. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  199. */
  200. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  201. /**
  202. * @ingroup dvrt_mem
  203. * @brief alloc cached device memory
  204. * @param [in| devPtr memory pointer
  205. * @param [in] size memory size
  206. * @param [in] type memory type
  207. * @return RT_ERROR_NONE for ok
  208. */
  209. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  210. /**
  211. * @ingroup dvrt_mem
  212. * @brief flush device mempory
  213. * @param [in] base virtal base address
  214. * @param [in] len memory size
  215. * @return RT_ERROR_NONE for ok, errno for failed
  216. */
  217. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  218. /**
  219. * @ingroup dvrt_mem
  220. * @brief invalid device mempory
  221. * @param [in] base virtal base address
  222. * @param [in] len memory size
  223. * @return RT_ERROR_NONE for ok, errno for failed
  224. */
  225. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  226. /**
  227. * @ingroup dvrt_mem
  228. * @brief synchronized memcpy
  229. * @param [in] dst destination address pointer
  230. * @param [in] Max length of destination address memory
  231. * @param [in] src source address pointer
  232. * @param [in] count the number of byte to copy
  233. * @param [in] kind memcpy type
  234. * @return RT_ERROR_NONE for ok
  235. * @return RT_ERROR_INVALID_VALUE for error input of count
  236. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  237. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  238. */
  239. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  240. /**
  241. * @ingroup dvrt_mem
  242. * @brief asynchronized memcpy
  243. * @param [in] dst destination address pointer
  244. * @param [in] Max length of destination address memory
  245. * @param [in] src source address pointer
  246. * @param [in] count the number of byte to copy
  247. * @param [in] kind memcpy type
  248. * @param [in] stream asynchronized task stream
  249. * @return RT_ERROR_NONE for ok
  250. * @return RT_ERROR_INVALID_VALUE for error input of count,stream
  251. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  252. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  253. */
  254. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  255. rtStream_t stream);
  256. /**
  257. * @ingroup dvrt_mem
  258. * @brief asynchronized reduce memcpy
  259. * @param [in] dst destination address pointer
  260. * @param [in] Max length of destination address memory
  261. * @param [in] src source address pointer
  262. * @param [in] count the number of byte to copy
  263. * @param [in] kind memcpy type
  264. * @param [in] type data type
  265. * @param [in] stream asynchronized task stream
  266. * @return RT_ERROR_NONE for ok
  267. * @return RT_ERROR_INVALID_VALUE for error input of count,stream
  268. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  269. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  270. */
  271. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  272. rtDataType_t type, rtStream_t stream);
  273. /**
  274. * @ingroup dvrt_mem
  275. * @brief query memory size
  276. * @param [in] aiCoreMemorySize
  277. * @return RT_ERROR_NONE for ok, errno for failed
  278. */
  279. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  280. /**
  281. * @ingroup dvrt_mem
  282. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  283. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  284. * @param [in] aiCoreMemorySize
  285. * @return RT_ERROR_NONE for ok, errno for failed
  286. */
  287. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  288. /**
  289. * @ingroup dvrt_mem
  290. * @brief set memory with uint32_t value
  291. * @param [in] devPtr
  292. * @param [in] Max length of destination address memory
  293. * @param [in] value
  294. * @param [in] count byte num
  295. * @return RT_ERROR_NONE for ok, errno for failed
  296. */
  297. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  298. /**
  299. * @ingroup dvrt_mem
  300. * @brief set memory with uint32_t value async
  301. * @param [in] devPtr
  302. * @param [in] Max length of destination address memory
  303. * @param [in] value
  304. * @param [in] count byte num
  305. * @param [in] stream
  306. * @return RT_ERROR_NONE for ok, errno for failed
  307. */
  308. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  309. /**
  310. * @ingroup dvrt_mem
  311. * @brief get current device memory total and free
  312. * @param [out] free
  313. * @param [out] total
  314. * @return RT_ERROR_NONE for ok, errno for failed
  315. */
  316. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  317. /**
  318. * @ingroup dvrt_mem
  319. * @brief set memory with uint32_t value
  320. * @param [in] devPtr
  321. * @param [in] len
  322. * @param [in] device
  323. * @return RT_ERROR_NONE for ok, errno for failed
  324. */
  325. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  326. /**
  327. * @ingroup dvrt_mem
  328. * @brief get memory attribute:Host or Device
  329. * @param [in] ptr
  330. * @param [out] attributes
  331. * @return RT_ERROR_NONE for ok, errno for failed
  332. */
  333. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  334. /**
  335. * @ingroup dvrt_mem
  336. * @brief make memory shared interprocess and assigned a name
  337. * @param [in] ptr device memory address pointer
  338. * @param [in] name identification name
  339. * @param [in] byteCount identification byteCount
  340. * @return RT_ERROR_NONE for ok
  341. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name, byteCount
  342. * @return RT_ERROR_DRV_ERR for driver error
  343. */
  344. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  345. /**
  346. * @ingroup dvrt_mem
  347. * @brief destroy a interprocess shared memory
  348. * @param [in] name identification name
  349. * @return RT_ERROR_NONE for ok
  350. * @return RT_ERROR_INVALID_VALUE for error input of name
  351. * @return RT_ERROR_DRV_ERR for driver error
  352. */
  353. rtError_t rtIpcDestroyMemoryName(const char *name);
  354. /**
  355. * @ingroup dvrt_mem
  356. * @brief open a interprocess shared memory
  357. * @param [in|out] ptr device memory address pointer
  358. * @param [in] name identification name
  359. * @return RT_ERROR_NONE for ok
  360. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  361. * @return RT_ERROR_DRV_ERR for driver error
  362. */
  363. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  364. /**
  365. * @ingroup dvrt_mem
  366. * @brief close a interprocess shared memory
  367. * @param [in] ptr device memory address pointer
  368. * @param [in] name identification name
  369. * @return RT_ERROR_NONE for ok
  370. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  371. * @return RT_ERROR_DRV_ERR for driver error
  372. */
  373. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  374. /**
  375. * @ingroup dvrt_mem
  376. * @brief HCCL Async memory cpy
  377. * @param [in] index sq index
  378. * @param [in] wqe_index moudle index
  379. * @param [in] stream asynchronized task stream
  380. * @return RT_ERROR_NONE for ok
  381. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  382. * @return RT_ERROR_DRV_ERR for driver error
  383. */
  384. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stream);
  385. /**
  386. * @ingroup dvrt_mem
  387. * @brief Ipc set mem pid
  388. * @param [in] name name to be queried
  389. * @param [in] pid process id
  390. * @param [in] num length of pid[]
  391. * @return RT_ERROR_NONE for ok
  392. * @return RT_ERROR_INVALID_VALUE for error input
  393. * @return RT_ERROR_INVALID_RESOURCE_HANDLE for invalid resource handle
  394. * @return RT_ERROR_DRV_ERR for driver error
  395. */
  396. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  397. /**
  398. * @ingroup dvrt_mem
  399. * @brief HCCL Async memory cpy
  400. * @param [in] dbindex single device 0
  401. * @param [in] dbinfo doorbell info
  402. * @param [in] stream asynchronized task stream
  403. * @return RT_ERROR_NONE for ok
  404. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  405. * @return RT_ERROR_DRV_ERR for driver error
  406. */
  407. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
  408. #ifdef __cplusplus
  409. }
  410. #endif
  411. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示