You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 14 kB

5 years ago
5 years ago
5 years ago
5 years ago

  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __CCE_RUNTIME_MEM_H__
  17. #define __CCE_RUNTIME_MEM_H__
  18. /*lint -e7*/
  19. #include <stddef.h>
  20. /*lint +e7*/
  21. #include "base.h"
  22. #include "config.h"
  23. #include "stream.h"
  24. #ifdef __cplusplus
  25. extern "C" {
  26. #endif // __cplusplus
  27. /**
  28. * @ingroup dvrt_mem
  29. * @brief memory type
  30. */
  31. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  32. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  33. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  34. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  35. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  36. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device>
  37. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache>
  38. #define RT_MEMORY_RESERVED ((uint32_t)0x40)
  39. /**
  40. * @ingroup dvrt_mem
  41. * @brief memory Policy
  42. */
  43. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  44. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  45. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  46. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  47. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  48. /**
  49. * @ingroup dvrt_mem
  50. * @brief memory type | memory Policy
  51. */
  52. typedef uint32_t rtMemType_t;
  53. /**
  54. * @ingroup dvrt_mem
  55. * @brief memory advise type
  56. */
  57. #define RT_MEMORY_ADVISE_EXE (0x02)
  58. #define RT_MEMORY_ADVISE_THP (0x04)
  59. #define RT_MEMORY_ADVISE_PLE (0x08)
  60. #define RT_MEMORY_ADVISE_PIN (0x16)
  61. /**
  62. * @ingroup dvrt_mem
  63. * @brief memory copy type
  64. */
  65. typedef enum tagRtMemcpyKind {
  66. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  67. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  68. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  69. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  70. RT_MEMCPY_MANAGED, // managed memory
  71. RT_MEMCPY_RESERVED,
  72. } rtMemcpyKind_t;
  73. typedef enum tagRtRecudeKind {
  74. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  75. RT_RECUDE_KIND_END
  76. } rtRecudeKind_t;
  77. typedef enum tagRtDataType {
  78. RT_DATA_TYPE_FP32 = 0, // fp32
  79. RT_DATA_TYPE_END
  80. } rtDataType_t;
  81. /**
  82. * @ingroup dvrt_mem
  83. * @brief memory copy channel type
  84. */
  85. typedef enum tagRtMemcpyChannelType {
  86. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  87. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  88. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  89. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  90. } rtMemcpyChannelType_t;
  91. /**
  92. * @ingroup rt_kernel
  93. * @brief ai core memory size
  94. */
  95. typedef struct rtAiCoreMemorySize {
  96. uint32_t l0ASize;
  97. uint32_t l0BSize;
  98. uint32_t l0CSize;
  99. uint32_t l1Size;
  100. uint32_t ubSize;
  101. uint32_t l2Size;
  102. uint32_t l2PageNum;
  103. uint32_t blockSize;
  104. uint64_t bankSize;
  105. uint64_t bankNum;
  106. uint64_t burstInOneBlock;
  107. uint64_t bankGroupNum;
  108. } rtAiCoreMemorySize_t;
  109. /**
  110. * @ingroup dvrt_mem
  111. * @brief memory type
  112. */
  113. typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t;
  114. /**
  115. * @ingroup dvrt_mem
  116. * @brief memory attribute
  117. */
  118. typedef struct tagRtPointerAttributes {
  119. rtMemoryType_t memoryType; // host memory or device memory
  120. uint32_t deviceID; // device ID
  121. uint32_t isManaged;
  122. uint32_t pageSize;
  123. } rtPointerAttributes_t;
  124. /**
  125. * @ingroup dvrt_mem
  126. * @brief alloc device memory
  127. * @param [in|out] devPtr memory pointer
  128. * @param [in] size memory size
  129. * @param [in] type memory type
  130. * @return RT_ERROR_NONE for ok
  131. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  132. */
  133. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  134. /**
  135. * @ingroup dvrt_mem
  136. * @brief free device memory
  137. * @param [in|out] devPtr memory pointer
  138. * @return RT_ERROR_NONE for ok
  139. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  140. */
  141. RTS_API rtError_t rtFree(void *devPtr);
  142. /**
  143. * @ingroup dvrt_mem
  144. * @brief alloc device memory for dvpp
  145. * @param [in|out] devPtr memory pointer
  146. * @param [in] size memory size
  147. * @return RT_ERROR_NONE for ok
  148. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  149. */
  150. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  151. /**
  152. * @ingroup dvrt_mem
  153. * @brief free device memory for dvpp
  154. * @param [in|out] devPtr memory pointer
  155. * @return RT_ERROR_NONE for ok
  156. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  157. */
  158. RTS_API rtError_t rtDvppFree(void *devPtr);
  159. /**
  160. * @ingroup dvrt_mem
  161. * @brief alloc host memory
  162. * @param [in|out] hostPtr memory pointer
  163. * @param [in] size memory size
  164. * @return RT_ERROR_NONE for ok
  165. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  166. */
  167. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  168. /**
  169. * @ingroup dvrt_mem
  170. * @brief free host memory
  171. * @param [in] hostPtr memory pointer
  172. * @return RT_ERROR_NONE for ok
  173. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  174. */
  175. RTS_API rtError_t rtFreeHost(void *hostPtr);
  176. /**
  177. * @ingroup dvrt_mem
  178. * @brief alloc managed memory
  179. * @param [in|out] ptr memory pointer
  180. * @param [in] size memory size
  181. * @param [in] flag reserved, set to 0.
  182. * @return RT_ERROR_NONE for ok
  183. * @return RT_ERROR_MEMORY_ALLOCATION for memory allocation failed
  184. */
  185. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  186. /**
  187. * @ingroup dvrt_mem
  188. * @brief free managed memory
  189. * @param [in] ptr memory pointer
  190. * @return RT_ERROR_NONE for ok
  191. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  192. */
  193. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  194. /**
  195. * @ingroup dvrt_mem
  196. * @brief advise memory
  197. * @param [in] ptr memory pointer
  198. * @param [in] size memory size
  199. * @param [in] advise memory advise
  200. * @return RT_ERROR_NONE for ok
  201. * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer
  202. */
  203. RTS_API rtError_t rtMemAdvise(void *ptr, uint64_t size, uint32_t advise);
  204. /**
  205. * @ingroup dvrt_mem
  206. * @brief flush device mempory
  207. * @param [in] base virtal base address
  208. * @param [in] len memory size
  209. * @return RT_ERROR_NONE for ok, errno for failed
  210. */
  211. RTS_API rtError_t rtFlushCache(uint64_t base, uint32_t len);
  212. /**
  213. * @ingroup dvrt_mem
  214. * @brief invalid device mempory
  215. * @param [in] base virtal base address
  216. * @param [in] len memory size
  217. * @return RT_ERROR_NONE for ok, errno for failed
  218. */
  219. RTS_API rtError_t rtInvalidCache(uint64_t base, uint32_t len);
  220. /**
  221. * @ingroup dvrt_mem
  222. * @brief synchronized memcpy
  223. * @param [in] dst destination address pointer
  224. * @param [in] Max length of destination address memory
  225. * @param [in] src source address pointer
  226. * @param [in] count the number of byte to copy
  227. * @param [in] kind memcpy type
  228. * @return RT_ERROR_NONE for ok
  229. * @return RT_ERROR_INVALID_VALUE for error input of count
  230. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  231. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  232. */
  233. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  234. /**
  235. * @ingroup dvrt_mem
  236. * @brief asynchronized memcpy
  237. * @param [in] dst destination address pointer
  238. * @param [in] Max length of destination address memory
  239. * @param [in] src source address pointer
  240. * @param [in] count the number of byte to copy
  241. * @param [in] kind memcpy type
  242. * @param [in] stream asynchronized task stream
  243. * @return RT_ERROR_NONE for ok
  244. * @return RT_ERROR_INVALID_VALUE for error input of count,stream
  245. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  246. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  247. */
  248. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  249. rtStream_t stream);
  250. /**
  251. * @ingroup dvrt_mem
  252. * @brief asynchronized reduce memcpy
  253. * @param [in] dst destination address pointer
  254. * @param [in] Max length of destination address memory
  255. * @param [in] src source address pointer
  256. * @param [in] count the number of byte to copy
  257. * @param [in] kind memcpy type
  258. * @param [in] type data type
  259. * @param [in] stream asynchronized task stream
  260. * @return RT_ERROR_NONE for ok
  261. * @return RT_ERROR_INVALID_VALUE for error input of count,stream
  262. * @return RT_ERROR_INVALID_DEVICE_POINTER for error input memory pointer of dst,src
  263. * @return RT_ERROR_INVALID_MEMCPY_DIRECTION for error copy direction of kind
  264. */
  265. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  266. rtDataType_t type, rtStream_t stream);
  267. /**
  268. * @ingroup dvrt_mem
  269. * @brief query memory size
  270. * @param [in] aiCoreMemorySize
  271. * @return RT_ERROR_NONE for ok, errno for failed
  272. */
  273. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  274. /**
  275. * @ingroup dvrt_mem
  276. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  277. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  278. * @param [in] aiCoreMemorySize
  279. * @return RT_ERROR_NONE for ok, errno for failed
  280. */
  281. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  282. /**
  283. * @ingroup dvrt_mem
  284. * @brief set memory with uint32_t value
  285. * @param [in] devPtr
  286. * @param [in] Max length of destination address memory
  287. * @param [in] value
  288. * @param [in] count byte num
  289. * @return RT_ERROR_NONE for ok, errno for failed
  290. */
  291. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  292. /**
  293. * @ingroup dvrt_mem
  294. * @brief set memory with uint32_t value async
  295. * @param [in] devPtr
  296. * @param [in] Max length of destination address memory
  297. * @param [in] value
  298. * @param [in] count byte num
  299. * @param [in] stream
  300. * @return RT_ERROR_NONE for ok, errno for failed
  301. */
  302. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  303. /**
  304. * @ingroup dvrt_mem
  305. * @brief get current device memory total and free
  306. * @param [out] free
  307. * @param [out] total
  308. * @return RT_ERROR_NONE for ok, errno for failed
  309. */
  310. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  311. /**
  312. * @ingroup dvrt_mem
  313. * @brief set memory with uint32_t value
  314. * @param [in] devPtr
  315. * @param [in] len
  316. * @param [in] device
  317. * @return RT_ERROR_NONE for ok, errno for failed
  318. */
  319. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  320. /**
  321. * @ingroup dvrt_mem
  322. * @brief get memory attribute:Host or Device
  323. * @param [in] ptr
  324. * @param [out] attributes
  325. * @return RT_ERROR_NONE for ok, errno for failed
  326. */
  327. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  328. /**
  329. * @ingroup dvrt_mem
  330. * @brief make memory shared interprocess and assigned a name
  331. * @param [in] ptr device memory address pointer
  332. * @param [in] name identification name
  333. * @param [in] byteCount identification byteCount
  334. * @return RT_ERROR_NONE for ok
  335. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name, byteCount
  336. * @return RT_ERROR_DRV_ERR for driver error
  337. */
  338. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  339. /**
  340. * @ingroup dvrt_mem
  341. * @brief destroy a interprocess shared memory
  342. * @param [in] name identification name
  343. * @return RT_ERROR_NONE for ok
  344. * @return RT_ERROR_INVALID_VALUE for error input of name
  345. * @return RT_ERROR_DRV_ERR for driver error
  346. */
  347. rtError_t rtIpcDestroyMemoryName(const char *name);
  348. /**
  349. * @ingroup dvrt_mem
  350. * @brief open a interprocess shared memory
  351. * @param [in|out] ptr device memory address pointer
  352. * @param [in] name identification name
  353. * @return RT_ERROR_NONE for ok
  354. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  355. * @return RT_ERROR_DRV_ERR for driver error
  356. */
  357. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  358. /**
  359. * @ingroup dvrt_mem
  360. * @brief close a interprocess shared memory
  361. * @param [in] ptr device memory address pointer
  362. * @param [in] name identification name
  363. * @return RT_ERROR_NONE for ok
  364. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  365. * @return RT_ERROR_DRV_ERR for driver error
  366. */
  367. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  368. /**
  369. * @ingroup dvrt_mem
  370. * @brief HCCL Async memory cpy
  371. * @param [in] index sq index
  372. * @param [in] wqe_index moudle index
  373. * @param [in] stream asynchronized task stream
  374. * @return RT_ERROR_NONE for ok
  375. * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
  376. * @return RT_ERROR_DRV_ERR for driver error
  377. */
  378. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stream);
  379. /**
  380. * @ingroup dvrt_mem
  381. * @brief Set the memory readCount value
  382. * @param [in] devPtr memory pointer
  383. * @param [in] size memory size
  384. * @param [in] readCount readCount value
  385. * @return RT_ERROR_NONE for ok
  386. * @return RT_ERROR_INVALID_VALUE for error input
  387. * @return RT_ERROR_INVALID_RESOURCE_HANDLE for invalid resource handle
  388. * @return RT_ERROR_DRV_ERR for driver error
  389. */
  390. RTS_API rtError_t rtMemSetRC(const void *devPtr, uint64_t size, uint32_t readCount);
  391. /**
  392. * @ingroup dvrt_mem
  393. * @brief Ipc set mem pid
  394. * @param [in] name name to be queried
  395. * @param [in] pid process id
  396. * @param [in] num length of pid[]
  397. * @return RT_ERROR_NONE for ok
  398. * @return RT_ERROR_INVALID_VALUE for error input
  399. * @return RT_ERROR_INVALID_RESOURCE_HANDLE for invalid resource handle
  400. * @return RT_ERROR_DRV_ERR for driver error
  401. */
  402. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  403. #ifdef __cplusplus
  404. }
  405. #endif
  406. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示