You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

mem.h 18 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __CCE_RUNTIME_MEM_H__
  17. #define __CCE_RUNTIME_MEM_H__
  18. /*lint -e7*/
  19. #include <stddef.h>
  20. /*lint +e7*/
  21. #include "base.h"
  22. #include "config.h"
  23. #include "stream.h"
  24. #if defined(__cplusplus)
  25. extern "C" {
  26. #endif
  27. /**
  28. * @ingroup dvrt_mem
  29. * @brief memory type
  30. */
  31. #define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
  32. #define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
  33. #define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device
  34. #define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
  35. #define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
  36. #define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
  37. #define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device
  38. #define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache
  39. #define RT_MEMORY_TS_4G ((uint32_t)0x40)
  40. #define RT_MEMORY_TS ((uint32_t)0x80)
  41. #define RT_MEMORY_RESERVED ((uint32_t)0x100)
  42. #define RT_MEMORY_L1 ((uint32_t)0x1<<16)
  43. #define RT_MEMORY_L2 ((uint32_t)0x1<<17)
  44. /**
  45. * @ingroup dvrt_mem
  46. * @brief memory info type
  47. */
  48. #define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1)
  49. #define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2)
  50. #define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3)
  51. #define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4)
  52. /**
  53. * @ingroup dvrt_mem
  54. * @brief memory Policy
  55. */
  56. #define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
  57. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
  58. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
  59. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
  60. #define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p
  61. #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p
  62. #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p
  63. #define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
  64. /**
  65. * @ingroup dvrt_mem
  66. * @brief memory type | memory Policy
  67. */
  68. typedef uint32_t rtMemType_t;
  69. /**
  70. * @ingroup dvrt_mem
  71. * @brief memory advise type
  72. */
  73. #define RT_MEMORY_ADVISE_EXE (0x02)
  74. #define RT_MEMORY_ADVISE_THP (0x04)
  75. #define RT_MEMORY_ADVISE_PLE (0x08)
  76. #define RT_MEMORY_ADVISE_PIN (0x16)
  77. /**
  78. * @ingroup dvrt_mem
  79. * @brief memory copy type
  80. */
  81. typedef enum tagRtMemcpyKind {
  82. RT_MEMCPY_HOST_TO_HOST = 0, // host to host
  83. RT_MEMCPY_HOST_TO_DEVICE, // host to device
  84. RT_MEMCPY_DEVICE_TO_HOST, // device to host
  85. RT_MEMCPY_DEVICE_TO_DEVICE, // device to device, 1P && P2P
  86. RT_MEMCPY_MANAGED, // managed memory
  87. RT_MEMCPY_ADDR_DEVICE_TO_DEVICE,
  88. RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes)
  89. RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex
  90. RT_MEMCPY_RESERVED,
  91. } rtMemcpyKind_t;
  92. typedef enum tagRtMemInfoType {
  93. RT_MEMORYINFO_DDR,
  94. RT_MEMORYINFO_HBM,
  95. RT_MEMORYINFO_DDR_HUGE, // Hugepage memory of DDR
  96. RT_MEMORYINFO_DDR_NORMAL, // Normal memory of DDR
  97. RT_MEMORYINFO_HBM_HUGE, // Hugepage memory of HBM
  98. RT_MEMORYINFO_HBM_NORMAL, // Normal memory of HBM
  99. RT_MEMORYINFO_DDR_P2P_HUGE, // Hugepage memory of DDR
  100. RT_MEMORYINFO_DDR_P2P_NORMAL, // Normal memory of DDR
  101. RT_MEMORYINFO_HBM_P2P_HUGE, // Hugepage memory of HBM
  102. RT_MEMORYINFO_HBM_P2P_NORMAL, // Normal memory of HBM
  103. } rtMemInfoType_t;
  104. typedef enum tagRtRecudeKind {
  105. RT_MEMCPY_SDMA_AUTOMATIC_ADD = 10, // D2D, SDMA inline reduce, include 1P, and P2P
  106. RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
  107. RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
  108. RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
  109. RT_RECUDE_KIND_END
  110. } rtRecudeKind_t;
  111. typedef enum tagRtDataType {
  112. RT_DATA_TYPE_FP32 = 0, // fp32
  113. RT_DATA_TYPE_FP16 = 1, // fp16
  114. RT_DATA_TYPE_INT16 = 2, // int16
  115. RT_DATA_TYPE_INT4 = 3, // int4
  116. RT_DATA_TYPE_INT8 = 4, // int8
  117. RT_DATA_TYPE_INT32 = 5, // int32
  118. RT_DATA_TYPE_BFP16 = 6, // bfp16
  119. RT_DATA_TYPE_BFP32 = 7, // bfp32
  120. RT_DATA_TYPE_UINT8 = 8, // uint8
  121. RT_DATA_TYPE_UINT16= 9, // uint16
  122. RT_DATA_TYPE_UINT32= 10,// uint32
  123. RT_DATA_TYPE_END
  124. } rtDataType_t;
  125. /**
  126. * @ingroup dvrt_mem
  127. * @brief memory copy channel type
  128. */
  129. typedef enum tagRtMemcpyChannelType {
  130. RT_MEMCPY_CHANNEL_TYPE_INNER = 0, // 1P
  131. RT_MEMCPY_CHANNEL_TYPE_PCIe,
  132. RT_MEMCPY_CHANNEL_TYPE_HCCs, // not support now
  133. RT_MEMCPY_CHANNEL_TYPE_RESERVED,
  134. } rtMemcpyChannelType_t;
  135. /**
  136. * @ingroup rt_kernel
  137. * @brief ai core memory size
  138. */
  139. typedef struct rtAiCoreMemorySize {
  140. uint32_t l0ASize;
  141. uint32_t l0BSize;
  142. uint32_t l0CSize;
  143. uint32_t l1Size;
  144. uint32_t ubSize;
  145. uint32_t l2Size;
  146. uint32_t l2PageNum;
  147. uint32_t blockSize;
  148. uint64_t bankSize;
  149. uint64_t bankNum;
  150. uint64_t burstInOneBlock;
  151. uint64_t bankGroupNum;
  152. } rtAiCoreMemorySize_t;
  153. /**
  154. * @ingroup dvrt_mem
  155. * @brief memory type
  156. */
  157. typedef enum tagRtMemoryType {
  158. RT_MEMORY_TYPE_HOST = 1,
  159. RT_MEMORY_TYPE_DEVICE = 2,
  160. RT_MEMORY_TYPE_SVM = 3,
  161. RT_MEMORY_TYPE_DVPP = 4
  162. } rtMemoryType_t;
  163. /**
  164. * @ingroup dvrt_mem
  165. * @brief memory attribute
  166. */
  167. typedef struct tagRtPointerAttributes {
  168. rtMemoryType_t memoryType; // host memory or device memory
  169. rtMemoryType_t locationType;
  170. uint32_t deviceID; // device ID
  171. uint32_t pageSize;
  172. } rtPointerAttributes_t;
  173. typedef struct rtMallocHostSharedMemoryIn {
  174. const char *name;
  175. const uint64_t size;
  176. uint32_t flag;
  177. } rtMallocHostSharedMemoryIn;
  178. typedef struct rtMallocHostSharedMemoryOut {
  179. int fd;
  180. void *ptr;
  181. void *devPtr;
  182. } rtMallocHostSharedMemoryOut;
  183. typedef struct rtFreeHostSharedMemoryIn {
  184. const char *name;
  185. const uint64_t size;
  186. int fd;
  187. void *ptr;
  188. void *devPtr;
  189. } rtFreeHostSharedMemoryIn;
  190. /**
  191. * @ingroup dvrt_mem
  192. * @brief alloc device memory
  193. * @param [in|out] devPtr memory pointer
  194. * @param [in] size memory size
  195. * @param [in] type memory type
  196. * @return RT_ERROR_NONE for ok
  197. * @return RT_ERROR_INVALID_VALUE for error input
  198. */
  199. RTS_API rtError_t rtMalloc(void **devPtr, uint64_t size, rtMemType_t type);
  200. /**
  201. * @ingroup dvrt_mem
  202. * @brief free device memory
  203. * @param [in|out] devPtr memory pointer
  204. * @return RT_ERROR_NONE for ok
  205. * @return RT_ERROR_INVALID_VALUE for error input
  206. */
  207. RTS_API rtError_t rtFree(void *devPtr);
  208. /**
  209. * @ingroup dvrt_mem
  210. * @brief alloc device memory for dvpp
  211. * @param [in|out] devPtr memory pointer
  212. * @param [in] size memory size
  213. * @return RT_ERROR_NONE for ok
  214. * @return RT_ERROR_INVALID_VALUE for error input
  215. */
  216. RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size);
  217. /**
  218. * @ingroup dvrt_mem
  219. * @brief free device memory for dvpp
  220. * @param [in|out] devPtr memory pointer
  221. * @return RT_ERROR_NONE for ok
  222. * @return RT_ERROR_INVALID_VALUE for error input
  223. */
  224. RTS_API rtError_t rtDvppFree(void *devPtr);
  225. /**
  226. * @ingroup dvrt_mem
  227. * @brief alloc host memory
  228. * @param [in|out] hostPtr memory pointer
  229. * @param [in] size memory size
  230. * @return RT_ERROR_NONE for ok
  231. * @return RT_ERROR_INVALID_VALUE for error input
  232. */
  233. RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size);
  234. /**
  235. * @ingroup dvrt_mem
  236. * @brief free host memory
  237. * @param [in] hostPtr memory pointer
  238. * @return RT_ERROR_NONE for ok
  239. * @return RT_ERROR_INVALID_VALUE for error input
  240. */
  241. RTS_API rtError_t rtFreeHost(void *hostPtr);
  242. /**
  243. * @ingroup dvrt_mem
  244. * @brief alloc host shared memory
  245. * @param [in] in alloc host shared memory inputPara pointer
  246. * @param [in] out alloc host shared memory outputInfo pointer
  247. * @return RT_ERROR_NONE for ok
  248. * @return RT_ERROR_INVALID_VALUE for error input
  249. */
  250. RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in,
  251. rtMallocHostSharedMemoryOut *out);
  252. /**
  253. * @ingroup dvrt_mem
  254. * @brief free host memory
  255. * @param [in] in free host shared memory inputPara pointer
  256. * @return RT_ERROR_NONE for ok
  257. * @return RT_ERROR_INVALID_VALUE for error input
  258. */
  259. RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in);
  260. /**
  261. * @ingroup dvrt_mem
  262. * @brief alloc managed memory
  263. * @param [in|out] ptr memory pointer
  264. * @param [in] size memory size
  265. * @param [in] flag reserved, set to 0.
  266. * @return RT_ERROR_NONE for ok
  267. * @return RT_ERROR_INVALID_VALUE for error input
  268. */
  269. RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag);
  270. /**
  271. * @ingroup dvrt_mem
  272. * @brief free managed memory
  273. * @param [in] ptr memory pointer
  274. * @return RT_ERROR_NONE for ok
  275. * @return RT_ERROR_INVALID_VALUE for error input
  276. */
  277. RTS_API rtError_t rtMemFreeManaged(void *ptr);
  278. /**
  279. * @ingroup dvrt_mem
  280. * @brief alloc cached device memory
  281. * @param [in| devPtr memory pointer
  282. * @param [in] size memory size
  283. * @param [in] type memory type
  284. * @return RT_ERROR_NONE for ok
  285. */
  286. RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type);
  287. /**
  288. * @ingroup dvrt_mem
  289. * @brief flush device mempory
  290. * @param [in] base virtal base address
  291. * @param [in] len memory size
  292. * @return RT_ERROR_NONE for ok, errno for failed
  293. */
  294. RTS_API rtError_t rtFlushCache(void *base, size_t len);
  295. /**
  296. * @ingroup dvrt_mem
  297. * @brief invalid device mempory
  298. * @param [in] base virtal base address
  299. * @param [in] len memory size
  300. * @return RT_ERROR_NONE for ok, errno for failed
  301. */
  302. RTS_API rtError_t rtInvalidCache(void *base, size_t len);
  303. /**
  304. * @ingroup dvrt_mem
  305. * @brief synchronized memcpy
  306. * @param [in] dst destination address pointer
  307. * @param [in] Max length of destination address memory
  308. * @param [in] src source address pointer
  309. * @param [in] count the number of byte to copy
  310. * @param [in] kind memcpy type
  311. * @return RT_ERROR_NONE for ok
  312. * @return RT_ERROR_INVALID_VALUE for error input
  313. */
  314. RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind);
  315. /**
  316. * @ingroup dvrt_mem
  317. * @brief asynchronized memcpy
  318. * @param [in] dst destination address pointer
  319. * @param [in] Max length of destination address memory
  320. * @param [in] src source address pointer
  321. * @param [in] count the number of byte to copy
  322. * @param [in] kind memcpy type
  323. * @param [in] stream asynchronized task stream
  324. * @return RT_ERROR_NONE for ok
  325. * @return RT_ERROR_INVALID_VALUE for error input
  326. */
  327. RTS_API rtError_t rtMemcpyAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtMemcpyKind_t kind,
  328. rtStream_t stream);
  329. /**
  330. * @ingroup dvrt_mem
  331. * @brief asynchronized reduce memcpy
  332. * @param [in] dst destination address pointer
  333. * @param [in] Max length of destination address memory
  334. * @param [in] src source address pointer
  335. * @param [in] count the number of byte to copy
  336. * @param [in] kind memcpy type
  337. * @param [in] type data type
  338. * @param [in] stream asynchronized task stream
  339. * @return RT_ERROR_NONE for ok
  340. * @return RT_ERROR_INVALID_VALUE for error input
  341. */
  342. RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, uint64_t count, rtRecudeKind_t kind,
  343. rtDataType_t type, rtStream_t stream);
  344. /**
  345. * @ingroup dvrt_mem
  346. * @brief query memory size
  347. * @param [in] aiCoreMemorySize
  348. * @return RT_ERROR_NONE for ok, errno for failed
  349. * @return RT_ERROR_INVALID_VALUE for error input
  350. */
  351. RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  352. /**
  353. * @ingroup dvrt_mem
  354. * @brief set memory size, Setting before model reasoning, Bright screen to prevent model can not be fully
  355. integrated network due to memory limitations.Requirement come from JiaMinHu.Only use for Tiny.
  356. * @param [in] aiCoreMemorySize
  357. * @return RT_ERROR_NONE for ok, errno for failed
  358. * @return RT_ERROR_INVALID_VALUE for error input
  359. */
  360. RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
  361. /**
  362. * @ingroup dvrt_mem
  363. * @brief set memory with uint32_t value
  364. * @param [in] devPtr
  365. * @param [in] Max length of destination address memory
  366. * @param [in] value
  367. * @param [in] count byte num
  368. * @return RT_ERROR_NONE for ok, errno for failed
  369. * @return RT_ERROR_INVALID_VALUE for error input
  370. */
  371. RTS_API rtError_t rtMemset(void *devPtr, uint64_t destMax, uint32_t value, uint64_t count);
  372. /**
  373. * @ingroup dvrt_mem
  374. * @brief set memory with uint32_t value async
  375. * @param [in] devPtr
  376. * @param [in] Max length of destination address memory
  377. * @param [in] value
  378. * @param [in] count byte num
  379. * @param [in] stream
  380. * @return RT_ERROR_NONE for ok, errno for failed
  381. * @return RT_ERROR_INVALID_VALUE for error input
  382. */
  383. RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uint64_t count, rtStream_t stream);
  384. /**
  385. * @ingroup dvrt_mem
  386. * @brief get current device memory total and free
  387. * @param [out] free
  388. * @param [out] total
  389. * @return RT_ERROR_NONE for ok, errno for failed
  390. * @return RT_ERROR_INVALID_VALUE for error input
  391. */
  392. RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
  393. /**
  394. * @ingroup dvrt_mem
  395. * @brief get current device memory total and free
  396. * @param [in] memInfoType
  397. * @param [out] free
  398. * @param [out] total
  399. * @return RT_ERROR_NONE for ok, errno for failed
  400. */
  401. RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
  402. /**
  403. * @ingroup dvrt_mem
  404. * @brief set memory with uint32_t value
  405. * @param [in] devPtr
  406. * @param [in] len
  407. * @param [in] device
  408. * @return RT_ERROR_NONE for ok, errno for failed
  409. * @return RT_ERROR_INVALID_VALUE for error input
  410. */
  411. RTS_API rtError_t rtMemPrefetchToDevice(void *devPtr, uint64_t len, int32_t device);
  412. /**
  413. * @ingroup dvrt_mem
  414. * @brief get memory attribute:Host or Device
  415. * @param [in] ptr
  416. * @param [out] attributes
  417. * @return RT_ERROR_NONE for ok, errno for failed
  418. * @return RT_ERROR_INVALID_VALUE for error input
  419. */
  420. RTS_API rtError_t rtPointerGetAttributes(rtPointerAttributes_t *attributes, const void *ptr);
  421. /**
  422. * @ingroup dvrt_mem
  423. * @brief make memory shared interprocess and assigned a name
  424. * @param [in] ptr device memory address pointer
  425. * @param [in] name identification name
  426. * @param [in] byteCount identification byteCount
  427. * @return RT_ERROR_NONE for ok
  428. * @return RT_ERROR_INVALID_VALUE for error input
  429. * @return RT_ERROR_DRV_ERR for driver error
  430. */
  431. RTS_API rtError_t rtIpcSetMemoryName(const void *ptr, uint64_t byteCount, char *name, uint32_t len);
  432. /**
  433. * @ingroup dvrt_mem
  434. * @brief destroy a interprocess shared memory
  435. * @param [in] name identification name
  436. * @return RT_ERROR_NONE for ok
  437. * @return RT_ERROR_INVALID_VALUE for error input
  438. * @return RT_ERROR_DRV_ERR for driver error
  439. */
  440. RTS_API rtError_t rtIpcDestroyMemoryName(const char *name);
  441. /**
  442. * @ingroup dvrt_mem
  443. * @brief open a interprocess shared memory
  444. * @param [in|out] ptr device memory address pointer
  445. * @param [in] name identification name
  446. * @return RT_ERROR_NONE for ok
  447. * @return RT_ERROR_INVALID_VALUE for error input
  448. * @return RT_ERROR_DRV_ERR for driver error
  449. */
  450. RTS_API rtError_t rtIpcOpenMemory(void **ptr, const char *name);
  451. /**
  452. * @ingroup dvrt_mem
  453. * @brief close a interprocess shared memory
  454. * @param [in] ptr device memory address pointer
  455. * @param [in] name identification name
  456. * @return RT_ERROR_NONE for ok
  457. * @return RT_ERROR_INVALID_VALUE for error input
  458. * @return RT_ERROR_DRV_ERR for driver error
  459. */
  460. RTS_API rtError_t rtIpcCloseMemory(const void *ptr);
  461. /**
  462. * @ingroup dvrt_mem
  463. * @brief HCCL Async memory cpy
  464. * @param [in] index sq index
  465. * @param [in] wqeIndex moudle index
  466. * @param [in] stream asynchronized task stream
  467. * @return RT_ERROR_NONE for ok
  468. * @return RT_ERROR_INVALID_VALUE for error input
  469. * @return RT_ERROR_DRV_ERR for driver error
  470. */
  471. RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t stream);
  472. /**
  473. * @ingroup dvrt_mem
  474. * @brief Ipc set mem pid
  475. * @param [in] name name to be queried
  476. * @param [in] pid process id
  477. * @param [in] num length of pid[]
  478. * @return RT_ERROR_NONE for ok
  479. * @return RT_ERROR_INVALID_VALUE for error input
  480. * @return RT_ERROR_DRV_ERR for driver error
  481. */
  482. RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num);
  483. /**
  484. * @ingroup dvrt_mem
  485. * @brief HCCL Async memory cpy
  486. * @param [in] dbindex single device 0
  487. * @param [in] dbinfo doorbell info
  488. * @param [in] stream asynchronized task stream
  489. * @return RT_ERROR_NONE for ok
  490. * @return RT_ERROR_INVALID_VALUE for error input
  491. * @return RT_ERROR_DRV_ERR for driver error
  492. */
  493. RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream);
  494. #if defined(__cplusplus)
  495. }
  496. #endif
  497. #endif // __CCE_RUNTIME_MEM_H__

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示