You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_ffts_plus_define.h 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of ffts plus
  4. */
  5. #ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  6. #define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. // hardware context type
  14. typedef enum tagFftsPlusHwType {
  15. RT_HW_CTX_TYPE_AIC = 0,
  16. RT_HW_CTX_TYPE_AIV = 1,
  17. RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
  18. RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
  19. RT_HW_CTX_TYPE_WRITE_VALUE = 5,
  20. RT_HW_CTX_TYPE_MIX_AIC = 6,
  21. RT_HW_CTX_TYPE_MIX_AIV = 7,
  22. RT_HW_CTX_TYPE_SDMA = 8,
  23. RT_HW_CTX_TYPE_FLUSH_DATA = 9,
  24. RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
  25. RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
  26. RT_HW_CTX_TYPE_AICPU = 12,
  27. RT_HW_CTX_TYPE_LOAD = 13,
  28. RT_HW_CTX_TYPE_MAX = 14,
  29. } rtFftsPlusHwType_t;
  30. // hardware context type
  31. typedef enum tagFftsPlusSoftType {
  32. RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
  33. RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
  34. RT_SOFT_CTX_TYPE_AT_START = 3,
  35. RT_SOFT_CTX_TYPE_AT_END = 4,
  36. RT_SOFT_CTX_TYPE_LABEL = 5,
  37. RT_SOFT_CTX_PERSISTENT_CACHE = 6,
  38. RT_SOFT_CTX_TYPE_MAX = 7,
  39. } rtFftsPlusSoftType_t;
  40. typedef enum tagFftsPlusContextType {
  41. RT_CTX_TYPE_AICORE = 0x0000,
  42. RT_CTX_TYPE_AIV = 0x0001,
  43. RT_CTX_TYPE_NOTIFY_WAIT = 0x0003,
  44. RT_CTX_TYPE_NOTIFY_RECORD = 0x0004,
  45. RT_CTX_TYPE_WRITE_VALUE = 0x0005,
  46. RT_CTX_TYPE_MIX_AIC = 0x0006,
  47. RT_CTX_TYPE_MIX_AIV = 0x0007,
  48. RT_CTX_TYPE_SDMA = 0x0008,
  49. RT_CTX_TYPE_FLUSH_DATA = 0x0009,
  50. RT_CTX_TYPE_INVALIDATE_DATA = 0x000A,
  51. RT_CTX_TYPE_WRITEBACK_DATA = 0x000B,
  52. RT_CTX_TYPE_AICPU = 0x000C,
  53. RT_CTX_TYPE_COND_SWITCH = 0x010D,
  54. RT_CTX_TYPE_CASE_SWITCH = 0x020D,
  55. RT_CTX_TYPE_AT_START = 0x0300,
  56. RT_CTX_TYPE_AT_END = 0x0400,
  57. RT_CTX_TYPE_LABEL = 0x0500,
  58. RT_CTX_TYPE_PERSISTENT_CACHE = 0x0600,
  59. }rtFftsPlusContextType_t;
  60. // condition type
  61. typedef enum tagFftsPlusCondType {
  62. RT_COND_TYPE_EQUAL = 0,
  63. RT_COND_TYPE_NOTEQUAL = 1,
  64. RT_COND_TYPE_GREATER = 2,
  65. RT_COND_TYPE_GREATER_OR_EQUAL = 3,
  66. RT_COND_TYPE_LESS = 4,
  67. RT_COND_TYPE_LESS_OR_EQUAL = 5,
  68. RT_COND_TYPE_MAX = 6,
  69. } rtFftsPlusCondType_t;
  70. // the definition of ffts plus context
  71. #define RT_CTX_SUCCESSOR_NUM 26
  72. // ffts plus common context
  73. typedef struct tagFftsPlusComCtx {
  74. // 0-3 bytes
  75. uint16_t contextType;
  76. uint8_t successorNum;
  77. uint8_t rsv1 : 7;
  78. uint8_t aten : 1;
  79. // 4-7
  80. uint8_t rsv2;
  81. uint8_t rsv3;
  82. uint8_t predCntInit;
  83. uint8_t predCnt;
  84. // 8-11
  85. uint32_t rsv4;
  86. // 12-63
  87. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  88. // 64-71
  89. uint32_t rsv5[2];
  90. // 72-75
  91. uint16_t threadId;
  92. uint16_t threadDim;
  93. // 76-127
  94. uint32_t res6[13];
  95. } rtFftsPlusComCtx_t;
  96. // aic/aiv context
  97. typedef struct tagFftsPlusAicAivCtx {
  98. // 0-3 bytes
  99. uint16_t contextType;
  100. uint8_t successorNum;
  101. uint8_t resv : 7;
  102. uint8_t aten : 1;
  103. // 4-7
  104. uint8_t prefetchConfig;
  105. uint8_t resv1;
  106. uint8_t predCntInit;
  107. uint8_t predCnt;
  108. // 8-11
  109. uint32_t resv2;
  110. // 12-63
  111. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  112. // 64-67
  113. uint16_t resv3 : 1;
  114. uint16_t schem : 2;
  115. uint16_t icachePrefetchCnt : 5;
  116. uint16_t resv4 : 7;
  117. uint16_t atm : 1;
  118. uint16_t prefetchEnableBitmap : 4;
  119. uint16_t res6 : 4;
  120. uint16_t prefetchOnceBitmap : 4;
  121. uint16_t res7 : 4;
  122. // 68-71
  123. uint16_t pmg : 2;
  124. uint16_t ns : 1;
  125. uint16_t partId : 8;
  126. uint16_t res8 : 1;
  127. uint16_t qos : 4;
  128. uint16_t res9;
  129. // 72-75
  130. uint16_t threadId;
  131. uint16_t threadDim;
  132. // 76-79
  133. uint16_t nonTailBlockdim;
  134. uint16_t tailBlockdim;
  135. // 80-83
  136. uint32_t taskParamPtrBaseL;
  137. // 84-87
  138. uint16_t taskParamPtrBaseH;
  139. uint16_t taskParamPtrOffset;
  140. // 88-95
  141. uint32_t res10;
  142. uint32_t res11;
  143. // 96-103
  144. uint32_t nonTailTaskStartPcL;
  145. uint16_t nonTailTaskStartPcH;
  146. uint16_t res12;
  147. // 104-111
  148. uint32_t tailTaskStartPcL;
  149. uint16_t tailTaskStartPcH;
  150. uint16_t res13;
  151. // 112-119
  152. uint32_t res14;
  153. uint32_t res15;
  154. // 120-127
  155. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  156. } rtFftsPlusAicAivCtx_t;
  157. // mix aic/aiv context
  158. typedef struct tagFftsPlusMixAicAivCtx {
  159. // 0-3 bytes
  160. uint16_t contextType;
  161. uint8_t successorNum;
  162. uint8_t reserved1 : 7;
  163. uint8_t aten : 1;
  164. // 4-7
  165. uint8_t prefetchConfig;
  166. uint8_t reserved2;
  167. uint8_t predCntInit;
  168. uint8_t predCnt;
  169. // 8-11
  170. uint32_t reserved3;
  171. // 12-63
  172. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  173. // 64-67
  174. uint16_t reserved4 : 1;
  175. uint16_t schem : 2;
  176. uint16_t aicIcachePrefetchCnt : 5;
  177. uint16_t aivIcachePrefetchCnt : 5;
  178. uint16_t reserved5 : 2;
  179. uint16_t atm : 1;
  180. uint16_t prefetchEnableBitmap : 4;
  181. uint16_t reserved6 : 4;
  182. uint16_t prefetchOnceBitmap : 4;
  183. uint16_t reserved7 : 4;
  184. // 68-71
  185. uint16_t pmg : 2;
  186. uint16_t ns : 1;
  187. uint16_t partId : 8;
  188. uint16_t reserved8 : 1;
  189. uint16_t qos : 4;
  190. uint8_t nonTailBlockRatioN;
  191. uint8_t tailBlockRatioN;
  192. // 72-75
  193. uint16_t threadId;
  194. uint16_t threadDim;
  195. // 76-79
  196. uint16_t nonTailBlockdim;
  197. uint16_t tailBlockdim;
  198. // 80-87
  199. uint32_t aicTaskParamPtrL;
  200. uint16_t aicTaskParamPtrH;
  201. uint16_t aicTaskParamPtrOffset;
  202. // 88-95
  203. uint32_t aivTaskParamPtrL;
  204. uint16_t aivTaskParamPtrH;
  205. uint16_t aivTaskParamPtrOffset;
  206. // 96-103
  207. uint32_t nonTailAicTaskStartPcL;
  208. uint16_t nonTailAicTaskStartPcH;
  209. uint16_t tailAicTaskStartPcH;
  210. // 104-111
  211. uint32_t tailAicTaskStartPcL;
  212. uint32_t nonTailAivTaskStartPcL;
  213. // 112-119
  214. uint16_t nonTailAivTaskStartPcH;
  215. uint16_t tailAivTaskStartPcH;
  216. uint32_t tailAivTaskStartPcL;
  217. // 120-127
  218. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  219. } rtFftsPlusMixAicAivCtx_t;
  220. // sdma context
  221. typedef struct tagFftsPlusSdmaCtx {
  222. // 0-3 bytes
  223. uint16_t contextType;
  224. uint8_t successorNum;
  225. uint8_t res1 : 7;
  226. uint8_t aten : 1;
  227. // 4-7
  228. uint8_t res2;
  229. uint8_t res3;
  230. uint8_t predCntInit;
  231. uint8_t predCnt;
  232. // 8-11
  233. uint32_t res4;
  234. // 12-63
  235. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  236. // 64-67
  237. uint8_t res5;
  238. uint8_t res6 : 7;
  239. uint8_t atm : 1;
  240. uint16_t res7;
  241. // 68-71
  242. uint16_t pmg : 2;
  243. uint16_t ns : 1;
  244. uint16_t partId : 8;
  245. uint16_t res8 : 1;
  246. uint16_t qos : 4;
  247. uint16_t res9;
  248. // 72-75
  249. uint16_t threadId;
  250. uint16_t threadDim;
  251. // 76-79
  252. uint32_t sdmaSqeHeader; // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
  253. // 80-83
  254. uint16_t sourceStreamId;
  255. uint16_t sourceSubstreamId;
  256. // 84-87
  257. uint16_t destinationStreamId;
  258. uint16_t destinationSubstreamId;
  259. // 88-127
  260. uint32_t sourceAddressBaseL;
  261. uint32_t sourceAddressBaseH;
  262. uint32_t sourceAddressOffset;
  263. uint32_t destinationAddressBaseL;
  264. uint32_t destinationAddressBaseH;
  265. uint32_t destinationAddressOffset;
  266. uint32_t nonTailDataLength;
  267. uint32_t tailDataLength;
  268. uint32_t res10[2];
  269. } rtFftsPlusSdmaCtx_t;
  270. // ffts plus notify record/wait context
  271. typedef struct tagFftsPlusNotifyCtx {
  272. // 0-3 bytes
  273. uint16_t contextType;
  274. uint8_t successorNum;
  275. uint8_t res : 7;
  276. uint8_t aten : 1;
  277. // 4-7
  278. uint8_t res1;
  279. uint8_t res2;
  280. uint8_t predCntInit;
  281. uint8_t predCnt;
  282. // 8-11
  283. uint32_t res3;
  284. // 12-63
  285. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  286. // 64-67
  287. uint16_t res4 : 14;
  288. uint16_t satm : 1;
  289. uint16_t atm : 1;
  290. uint16_t res6;
  291. // 68-71
  292. uint32_t res7;
  293. // 72-75
  294. uint16_t threadId;
  295. uint16_t threadDim;
  296. // 76-79
  297. uint16_t notifyIdBase;
  298. uint8_t autoWindow;
  299. uint8_t res8;
  300. // 80-127
  301. uint32_t res9[4];
  302. uint16_t notifyId[16];
  303. } rtFftsPlusNotifyCtx_t;
  304. // write Value context
  305. typedef struct tagFftsPlusWriteValueCtx {
  306. // 0-3 bytes
  307. uint16_t contextType;
  308. uint8_t successorNum;
  309. uint8_t resv1 : 7;
  310. uint8_t aten : 1;
  311. // 4-7
  312. uint8_t resv2;
  313. uint8_t resv3;
  314. uint8_t predCntInit;
  315. uint8_t predCnt;
  316. // 8-11
  317. uint32_t resv4;
  318. // 12-63
  319. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  320. // 64-67
  321. uint16_t resv5 : 15;
  322. uint16_t atm : 1;
  323. uint16_t resv6;
  324. // 68-71
  325. uint32_t resv7;
  326. // 72-75
  327. uint16_t threadId;
  328. uint16_t threadDim;
  329. // 76-79
  330. uint8_t awSize : 3;
  331. uint8_t awSnoop : 1;
  332. uint8_t resv8 : 4;
  333. uint8_t awCache : 4;
  334. uint8_t awProt : 3;
  335. uint8_t awVa : 1;
  336. uint8_t arSize : 3;
  337. uint8_t arSnoop : 1;
  338. uint8_t resv9 : 4;
  339. uint8_t arCache : 4;
  340. uint8_t arProt : 3;
  341. uint8_t arVa : 1;
  342. // 80-83
  343. uint32_t writeAddressBaseL;
  344. // 84-87
  345. uint32_t writeAddressBaseH : 17;
  346. uint32_t res10 : 15;
  347. // 88-91
  348. uint32_t writeAddressOffset;
  349. // 92-95
  350. uint32_t res11;
  351. // 96-111
  352. uint32_t writeValue[4]; // write_value_00 -> write_value_03
  353. // 112-127
  354. uint32_t res12[4];
  355. } rtFftsPlusWriteValueCtx_t;
  356. // ai cpu context
  357. typedef struct tagFftsPlusAiCpuCtx {
  358. // 0-3 bytes
  359. uint16_t contextType;
  360. uint8_t successorNum;
  361. uint8_t res1 : 7;
  362. uint8_t aten : 1;
  363. // 4-7
  364. uint8_t res2;
  365. uint8_t res3;
  366. uint8_t predCntInit;
  367. uint8_t predCnt;
  368. // 8-11
  369. uint32_t res4;
  370. // 12-63
  371. uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
  372. // 64-67
  373. uint16_t res5 : 15;
  374. uint16_t atm : 1;
  375. uint16_t res6;
  376. // 68-71
  377. uint16_t sqeIndex;
  378. uint8_t kernelType : 7;
  379. uint8_t bm : 1;
  380. uint8_t topicType : 4;
  381. uint8_t qos : 3;
  382. uint8_t res7 : 1;
  383. // 72-75
  384. uint16_t threadId;
  385. uint16_t threadDim;
  386. // 76-79
  387. uint16_t nonTailBlockdim;
  388. uint16_t tailBlockdim;
  389. // 80-115
  390. uint32_t usrData[9]; // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
  391. // 116--119
  392. uint32_t res8;
  393. // 120-123
  394. uint32_t subtopicId : 12;
  395. uint32_t topicId : 6;
  396. uint32_t groupId : 6;
  397. uint32_t usrDataLength : 8;
  398. // 124-127
  399. uint32_t taskParamOffset;
  400. } rtFftsPlusAiCpuCtx_t;
  401. // data context
  402. typedef struct tagFftsPlusDataCtx {
  403. // 0-3 bytes
  404. uint16_t contextType;
  405. uint8_t successorNum;
  406. uint8_t res1 : 7;
  407. uint8_t aten : 1;
  408. // 4-7
  409. uint8_t res2;
  410. uint8_t res3;
  411. uint8_t cntInit; // cons_cnt_init / prod_cnt_init
  412. uint8_t cnt; // cons_cnt / prod_cnt
  413. // 8-11
  414. uint32_t res4;
  415. // 12-63
  416. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  417. // 64-67
  418. uint16_t res5 : 15;
  419. uint16_t atm : 1;
  420. uint16_t res6;
  421. // 68-71
  422. uint16_t pmg : 2;
  423. uint16_t ns : 1;
  424. uint16_t partId : 8;
  425. uint16_t res7 : 1;
  426. uint16_t qos : 4;
  427. uint16_t res8;
  428. // 72-75
  429. uint16_t threadId;
  430. uint16_t threadDim;
  431. // 76-79
  432. uint16_t origConsumerCounter;
  433. uint16_t runConsumerCounter;
  434. // 80-83
  435. uint32_t addressBaseL;
  436. // 84-87
  437. uint32_t addressBaseH;
  438. // 88-91
  439. uint32_t addressOffset;
  440. // 92-95
  441. uint32_t res9;
  442. // 96-99
  443. uint16_t nonTailNumOutter;
  444. uint16_t nonTailNumInner;
  445. // 100-103
  446. uint32_t nonTailLengthInner;
  447. // 104-107
  448. uint32_t nonTailStrideOutter;
  449. // 108-111
  450. uint32_t nonTailStrideInner;
  451. // 112-115
  452. uint16_t tailNumOutter;
  453. uint16_t tailNumInner;
  454. // 116-119
  455. uint32_t tailLengthInner;
  456. // 120-123
  457. uint32_t tailStrideOutter;
  458. // 124-127
  459. uint32_t tailStrideInner;
  460. } rtFftsPlusDataCtx_t;
  461. // at start context
  462. typedef struct tagFftsPlusAtStartCtx {
  463. // 0-3 bytes
  464. uint16_t contextType;
  465. uint8_t successorNum;
  466. uint8_t rs1 : 7;
  467. uint8_t aten : 1;
  468. // 4-7
  469. uint8_t rs2;
  470. uint8_t rs3;
  471. uint8_t predCntInit;
  472. uint8_t predCnt;
  473. // 8-11
  474. uint32_t rs4;
  475. // 12-63
  476. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  477. // 64-67
  478. uint16_t rs5;
  479. uint16_t rs6;
  480. // 68-71
  481. uint16_t rs7;
  482. uint16_t rs8;
  483. // 72-75
  484. uint16_t threadId;
  485. uint16_t threadDim;
  486. // 76-79
  487. uint16_t threadIdInit;
  488. uint16_t threadWindowSize;
  489. // 80-127
  490. uint32_t res9[12];
  491. } rtFftsPlusAtStartCtx_t;
  492. // at end context
  493. #define RT_CTX_SUCC_AT_START_SLOT_NUM 12
  494. #define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM 12
  495. typedef struct tagFftsPlusAtEndCtx {
  496. // 0-3 bytes
  497. uint16_t contextType;
  498. uint8_t atStartSlotNumber;
  499. uint8_t outLabelSlotNumber : 7;
  500. uint8_t aten : 1;
  501. // 4-7
  502. uint8_t res1;
  503. uint8_t res2;
  504. uint8_t predCntInit;
  505. uint8_t predCnt;
  506. // 8-11
  507. uint32_t res3;
  508. // 12-59
  509. uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
  510. uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
  511. // 60-63
  512. uint16_t res4;
  513. uint16_t res5;
  514. // 64-67
  515. uint16_t res6;
  516. uint16_t res7;
  517. // 68-71
  518. uint16_t res8;
  519. uint16_t res9;
  520. // 72-75
  521. uint16_t threadId;
  522. uint16_t res10;
  523. // 76-79
  524. uint16_t res11;
  525. uint16_t res12;
  526. // 80-127
  527. uint32_t res13[12];
  528. } rtFftsPlusAtEndCtx_t;
  529. // label context
  530. typedef struct tagFftsPlusLabelCtx {
  531. // 0-3 bytes
  532. uint16_t contextType;
  533. uint8_t successorNum;
  534. uint8_t res1;
  535. // 4-7
  536. uint8_t res2;
  537. uint8_t res3;
  538. uint8_t predCntInit;
  539. uint8_t predCnt;
  540. // 8-11
  541. uint32_t res4;
  542. // 12-63
  543. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  544. // 64-79
  545. uint16_t res5[8];
  546. // 80-127
  547. uint32_t res6[12];
  548. } rtFftsPlusLabelCtx_t;
  549. // case switch context
  550. typedef struct tagFftsPlusCaseSwitchCtx {
  551. // 0-3 bytes
  552. uint16_t contextType;
  553. uint8_t successorNum;
  554. uint8_t resv0 : 7;
  555. uint8_t aten : 1;
  556. // 4-7
  557. uint8_t startLabelId;
  558. uint8_t labelListLen;
  559. uint8_t predCntInit;
  560. uint8_t predCnt;
  561. // 8-11
  562. uint32_t resv1;
  563. // 12-63
  564. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  565. // 64-67
  566. uint16_t resv2 : 15;
  567. uint16_t atm : 1;
  568. uint16_t resv3;
  569. // 68-71
  570. uint32_t resv4;
  571. // 72-75
  572. uint16_t threadId;
  573. uint16_t threadDim;
  574. // 76-79
  575. uint8_t arSize : 3;
  576. uint8_t snoop : 1;
  577. uint8_t resv5 : 4;
  578. uint8_t arCache : 4;
  579. uint8_t arProt : 3;
  580. uint8_t va : 1;
  581. uint16_t resv6;
  582. // 80-83
  583. uint32_t loadAddress0BaseL;
  584. // 84-87
  585. uint32_t loadAddress0BaseH : 17;
  586. uint32_t resv7 : 14;
  587. uint32_t ld0En : 1;
  588. // 88-91
  589. uint32_t loadAddress0Offset;
  590. // 92-95
  591. uint32_t resv8;
  592. // 96-99
  593. uint32_t loadAddress1BaseL;
  594. // 100-103
  595. uint32_t loadAddress1BaseH : 17;
  596. uint32_t resv9 : 14;
  597. uint32_t ld1En : 1;
  598. // 104-107
  599. uint32_t loadAddress1Offset;
  600. // 108-127
  601. uint32_t resv10[5];
  602. } rtFftsPlusCaseSwitchCtx_t;
  603. // case default context
  604. typedef struct tagFftsPlusCaseDefCtx {
  605. // 0-3 bytes
  606. uint16_t contextType;
  607. uint8_t successorNum;
  608. uint8_t rs0 : 7;
  609. uint8_t aten : 1;
  610. // 4-7
  611. uint8_t startLabelId;
  612. uint8_t labelListLen;
  613. uint8_t predCntInit;
  614. uint8_t predCnt;
  615. // 8-11
  616. uint32_t rs1;
  617. // 12-63
  618. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  619. // 64-67
  620. uint16_t rs2;
  621. uint16_t rs3;
  622. // 68-127
  623. uint32_t rs4[15];
  624. } rtFftsPlusCaseDefCtx_t;
  625. // condition switch context
  626. #define RT_CTX_TRUE_SUCCESSOR_NUM 13
  627. #define RT_CTX_FALSE_SUCCESSOR_NUM 13
  628. typedef struct tagFftsPlusCondSwitchCtx {
  629. // 0-3 bytes
  630. uint16_t contextType;
  631. uint8_t trueSuccessorNum;
  632. uint8_t falseSuccessorNum : 7;
  633. uint8_t aten : 1;
  634. // 4-7
  635. uint8_t condition;
  636. uint8_t res1;
  637. uint8_t predCntInit;
  638. uint8_t predCnt;
  639. // 8-11
  640. uint32_t res2;
  641. // 12-63
  642. uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
  643. uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
  644. // 64-67
  645. uint16_t res3 : 15;
  646. uint16_t atm : 1;
  647. uint16_t res4;
  648. // 68-71
  649. uint32_t res5;
  650. // 72-75
  651. uint16_t threadId;
  652. uint16_t threadDim;
  653. // 76-79
  654. uint8_t arSize : 3;
  655. uint8_t snoop : 1;
  656. uint8_t res6 : 4;
  657. uint8_t arCache : 4;
  658. uint8_t arProt : 3;
  659. uint8_t va : 1;
  660. uint16_t res7;
  661. // 80-83
  662. uint32_t loadAddress0BaseL;
  663. // 84-87
  664. uint32_t loadAddress0BaseH : 17;
  665. uint32_t res8 : 14;
  666. uint32_t ld0En : 1;
  667. // 88-91
  668. uint32_t loadAddress0Offset;
  669. // 92-95
  670. uint32_t res9;
  671. // 96-99
  672. uint32_t loadAddress1BaseL;
  673. // 100-103
  674. uint32_t loadAddress1BaseH : 17;
  675. uint32_t res10 : 14;
  676. uint32_t ld1En : 1;
  677. // 104-107
  678. uint32_t loadAddress1Offset;
  679. // 108-127
  680. uint32_t res11[3];
  681. uint32_t cmpValue1;
  682. uint32_t cmpValue2;
  683. } rtFftsPlusCondSwitchCtx_t;
  684. // ffts plus persistent cache context
  685. typedef struct tagFftsPlusPersistentCacheCtx {
  686. // 0- 3bytes
  687. uint16_t contextType;
  688. uint8_t successorNum;
  689. uint8_t res1 : 7;
  690. uint8_t aten : 1;
  691. // 4-7
  692. uint8_t res2[2];
  693. uint8_t predCntInit;
  694. uint8_t predCnt;
  695. // 8-11
  696. uint8_t res3[4];
  697. // 12-63
  698. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  699. // 64-67
  700. uint8_t persistentEnable : 1;
  701. uint8_t res4 : 7;
  702. uint8_t res5;
  703. uint16_t persistentSize;
  704. // 68-71
  705. uint32_t persistentId;
  706. // 72-127
  707. uint32_t res6[14];
  708. } rtFftsPlusPersistentCacheCtx_t;
  709. #pragma pack(pop)
  710. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  711. }
  712. #endif
  713. #endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示