You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_ffts_plus_define.h 16 kB

4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of ffts plus
  4. */
  5. #ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
  6. #define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. // hardware context type
  14. typedef enum tagFftsPlusHwType {
  15. RT_HW_CTX_TYPE_AIC = 0,
  16. RT_HW_CTX_TYPE_AIV = 1,
  17. RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
  18. RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
  19. RT_HW_CTX_TYPE_WRITE_VALUE = 5,
  20. RT_HW_CTX_TYPE_MIX_AIC = 6,
  21. RT_HW_CTX_TYPE_MIX_AIV = 7,
  22. RT_HW_CTX_TYPE_SDMA = 8,
  23. RT_HW_CTX_TYPE_FLUSH_DATA = 9,
  24. RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
  25. RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
  26. RT_HW_CTX_TYPE_AICPU = 12,
  27. RT_HW_CTX_TYPE_LOAD = 13,
  28. RT_HW_CTX_TYPE_MAX,
  29. }rtFftsPlusHwType_t;
  30. // hardware context type
  31. typedef enum tagFftsPlusSoftType {
  32. RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
  33. RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
  34. RT_SOFT_CTX_TYPE_AT_START = 3,
  35. RT_SOFT_CTX_TYPE_AT_END = 4,
  36. RT_SOFT_CTX_TYPE_LABEL = 5,
  37. RT_SOFT_CTX_TYPE_MAX,
  38. }rtFftsPlusSoftType_t;
  39. // condition type
  40. typedef enum tagFftsPlusCondType {
  41. RT_COND_TYPE_EQUAL = 0,
  42. RT_COND_TYPE_NOTEQUAL = 1,
  43. RT_COND_TYPE_GREATER = 2,
  44. RT_COND_TYPE_GREATER_OR_EQUAL = 3,
  45. RT_COND_TYPE_LESS = 4,
  46. RT_COND_TYPE_LESS_OR_EQUAL = 5,
  47. RT_COND_TYPE_MAX,
  48. }rtFftsPlusCondType_t;
  49. // the definition of ffts plus context
  50. #define RT_CTX_SUCCESSOR_NUM 26
  51. // ffts plus common context
  52. typedef struct tagFftsPlusComCtx {
  53. // 0-3 bytes
  54. uint8_t hardwareContextType;
  55. uint8_t softwareContextType;
  56. uint8_t successorNum;
  57. uint8_t res1 : 7;
  58. uint8_t aten : 1;
  59. // 4-7
  60. uint8_t res2;
  61. uint8_t res3;
  62. uint8_t predCntInit;
  63. uint8_t predCnt;
  64. // 8-11
  65. uint32_t res4;
  66. // 12-63
  67. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  68. // 64-71
  69. uint32_t res5[2];
  70. // 72-75
  71. uint16_t threadId;
  72. uint16_t threadDim;
  73. // 76-127
  74. uint32_t res6[13];
  75. } rtFftsPlusComCtx_t;
  76. // aic/aiv context
  77. typedef struct tagFftsPlusAicAivCtx {
  78. // 0-3 bytes
  79. uint8_t hardwareContextType;
  80. uint8_t softwareContextType;
  81. uint8_t successorNum;
  82. uint8_t res1: 7;
  83. uint8_t aten: 1;
  84. // 4-7
  85. uint8_t res2;
  86. uint8_t res3;
  87. uint8_t predCntInit;
  88. uint8_t predCnt;
  89. // 8-11
  90. uint32_t res4;
  91. // 12-63
  92. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  93. // 64-67
  94. uint16_t stat: 1;
  95. uint16_t schem: 2;
  96. uint16_t icachePrefetchCnt: 5;
  97. uint16_t res5: 7;
  98. uint16_t atm: 1;
  99. uint16_t prefetchEnableBitmap: 4;
  100. uint16_t res6: 4;
  101. uint16_t prefetchOnceBitmap: 4;
  102. uint16_t res7: 4;
  103. // 68-71
  104. uint32_t res8;
  105. // 72-75
  106. uint16_t threadId;
  107. uint16_t threadDim;
  108. // 76-79
  109. uint16_t nonTailBlockdim;
  110. uint16_t tailBlockdim;
  111. // 80-83
  112. uint32_t taskParamPtrBaseL;
  113. // 84-87
  114. uint16_t taskParamPtrBaseH;
  115. uint16_t taskParamPtrOffset;
  116. // 88-95
  117. uint32_t res9;
  118. uint32_t res10;
  119. // 96-103
  120. uint32_t nonTailTaskStartPcL;
  121. uint16_t nonTailTaskStartPcH;
  122. uint16_t res11;
  123. // 104-111
  124. uint32_t tailTaskStartPcL;
  125. uint16_t tailTaskStartPcH;
  126. uint16_t res12;
  127. // 112-119
  128. uint32_t res13;
  129. uint32_t res14;
  130. // 120-127
  131. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  132. } rtFftsPlusAicAivCtx_t;
  133. // mix aic/aiv context
  134. typedef struct tagFftsPlusMixAicAivCtx {
  135. // 0-3 bytes
  136. uint8_t hardwareContextType;
  137. uint8_t softwareContextType;
  138. uint8_t successorNum;
  139. uint8_t res1: 7;
  140. uint8_t aten: 1;
  141. // 4-7
  142. uint8_t res2;
  143. uint8_t res3;
  144. uint8_t predCntInit;
  145. uint8_t predCnt;
  146. // 8-11
  147. uint32_t res4;
  148. // 12-63
  149. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  150. // 64-67
  151. uint16_t stat: 1;
  152. uint16_t schem: 2;
  153. uint16_t icachePrefetchCnt: 5;
  154. uint16_t res5: 7;
  155. uint16_t atm: 1;
  156. uint16_t prefetchEnableBitmap: 4;
  157. uint16_t res6: 4;
  158. uint16_t prefetchOnceBitmap: 4;
  159. uint16_t res7: 4;
  160. // 68-71
  161. uint16_t res8;
  162. uint8_t nonTailBlockRatioN;
  163. uint8_t tailBlockRatioN;
  164. // 72-75
  165. uint16_t threadId;
  166. uint16_t threadDim;
  167. // 76-79
  168. uint16_t nonTailBlockdim;
  169. uint16_t tailBlockdim;
  170. // 80-87
  171. uint32_t aicTaskParamPtrL;
  172. uint16_t aicTaskParamPtrH;
  173. uint16_t aicTaskParamPtrOffset;
  174. // 88-95
  175. uint32_t aivTaskParamPtrL;
  176. uint16_t aivTaskParamPtrH;
  177. uint16_t aivTaskParamPtrOffset;
  178. // 96-103
  179. uint32_t nonTailAicTaskStartPcL;
  180. uint16_t nonTailAicTaskStartPcH;
  181. uint16_t tailAicTaskStartPcH;
  182. // 104-111
  183. uint32_t tailAicTaskStartPcL;
  184. uint32_t nonTailAivTaskStartPcL;
  185. // 112-119
  186. uint16_t nontailAivTaskStartPcH;
  187. uint16_t tailAivTaskStartPcH;
  188. uint32_t tailAivTaskStartPcL;
  189. // 120-127
  190. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  191. } rtFftsPlusMixAicAivCtx_t;
  192. // adma context
  193. typedef struct tagFftsPlusSdmaCtx {
  194. // 0-3 bytes
  195. uint8_t hardwareContextType;
  196. uint8_t softwareContextType;
  197. uint8_t successorNum;
  198. uint8_t res1: 7;
  199. uint8_t aten: 1;
  200. // 4-7
  201. uint8_t res2;
  202. uint8_t res3;
  203. uint8_t predCntInit;
  204. uint8_t predCnt;
  205. // 8-11
  206. uint32_t res4;
  207. // 12-63
  208. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  209. // 64-67
  210. uint8_t sat: 1;
  211. uint8_t res5: 7;
  212. uint8_t res6: 7;
  213. uint8_t atm: 1;
  214. uint16_t res7;
  215. // 68-71
  216. uint32_t res8;
  217. // 72-75
  218. uint16_t threadId;
  219. uint16_t threadDim;
  220. // 76-79
  221. uint32_t sdmaSqeHeader; // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
  222. // 80-83
  223. uint16_t sourceStreamId;
  224. uint16_t sourceSubstreamId;
  225. // 84-87
  226. uint16_t destinationStreamId;
  227. uint16_t destinationSubstreamId;
  228. // 88-127
  229. uint32_t sourceAddressBaseL;
  230. uint32_t sourceAddressBaseH;
  231. uint32_t sourceAddressOffset;
  232. uint32_t destinationAddressBaseL;
  233. uint32_t destinationAddressBaseH;
  234. uint32_t destinationAddressOffset;
  235. uint32_t nonTailDataLength;
  236. uint32_t tailDataLength;
  237. uint32_t res9[2];
  238. } rtFftsPlusSdmaCtx_t;
  239. // ffts plus notify record/wait context
  240. typedef struct tagFftsPlusNotifyCtx {
  241. // 0-3 bytes
  242. uint8_t hardwareContextType;
  243. uint8_t softwareContextType;
  244. uint8_t successorNum;
  245. uint8_t res1: 7;
  246. uint8_t aten: 1;
  247. // 4-7
  248. uint8_t res2;
  249. uint8_t res3;
  250. uint8_t predCntInit;
  251. uint8_t predCnt;
  252. // 8-11
  253. uint32_t res4;
  254. // 12-63
  255. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  256. // 64-67
  257. uint16_t res5: 15;
  258. uint16_t atm: 1;
  259. uint16_t res6;
  260. // 68-71
  261. uint32_t res7;
  262. // 72-75
  263. uint16_t threadId;
  264. uint16_t threadDim;
  265. // 76-79
  266. uint16_t notifyIdBase;
  267. uint16_t res8;
  268. // 80-127
  269. uint32_t res9[12];
  270. } rtFftsPlusNotifyCtx_t;
  271. // write Value context
  272. typedef struct tagFftsPlusWriteValueCtx {
  273. // 0-3 bytes
  274. uint8_t hardwareContextType;
  275. uint8_t softwareContextType;
  276. uint8_t successorNum;
  277. uint8_t res1: 7;
  278. uint8_t aten: 1;
  279. // 4-7
  280. uint8_t res2;
  281. uint8_t res3;
  282. uint8_t predCntInit;
  283. uint8_t predCnt;
  284. // 8-11
  285. uint32_t res4;
  286. // 12-63
  287. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  288. // 64-67
  289. uint16_t res5: 15;
  290. uint16_t atm: 1;
  291. uint16_t res6;
  292. // 68-71
  293. uint32_t res7;
  294. // 72-75
  295. uint16_t threadId;
  296. uint16_t threadDim;
  297. // 76-79
  298. uint8_t awSize: 3;
  299. uint8_t snoop: 1;
  300. uint8_t res8: 4;
  301. uint8_t awCache: 4;
  302. uint8_t awProt: 3;
  303. uint8_t va: 1;
  304. uint16_t res9;
  305. // 80-83
  306. uint32_t writeAddressBaseL;
  307. // 84-87
  308. uint32_t writeAddressBaseH: 17;
  309. uint32_t res10: 15;
  310. // 88-91
  311. uint32_t writeAddressOffset;
  312. // 92-95
  313. uint32_t res11;
  314. // 96-111
  315. uint32_t writeValue[4]; // write_value_00 -> write_value_03
  316. // 112-127
  317. uint32_t res12[4];
  318. } rtFftsPlusWriteValueCtx_t;
  319. // ai cpu context
  320. typedef struct tagFftsPlusAiCpuCtx {
  321. // 0-3 bytes
  322. uint8_t hardwareContextType;
  323. uint8_t softwareContextType;
  324. uint8_t successorNum;
  325. uint8_t res1: 7;
  326. uint8_t aten: 1;
  327. // 4-7
  328. uint8_t res2;
  329. uint8_t res3;
  330. uint8_t predCntInit;
  331. uint8_t predCnt;
  332. // 8-11
  333. uint32_t res4;
  334. // 12-63
  335. uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
  336. // 64-67
  337. uint16_t sat: 1;
  338. uint16_t res5: 14;
  339. uint16_t atm: 1;
  340. uint16_t res6;
  341. // 68-71
  342. uint16_t sqeIndex;
  343. uint8_t kernelType: 7;
  344. uint8_t bm: 1;
  345. uint8_t topicType: 4;
  346. uint8_t qos: 3;
  347. uint8_t res7: 1;
  348. // 72-75
  349. uint16_t threadId;
  350. uint16_t threadDim;
  351. // 76-79
  352. uint16_t nonTailBlockdim;
  353. uint16_t tailBlockdim;
  354. // 80-115
  355. uint32_t usrData[9]; // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
  356. // 116--119
  357. uint32_t res8;
  358. // 120-123
  359. uint32_t subtopicId: 12;
  360. uint32_t topicId: 6;
  361. uint32_t groupId: 6;
  362. uint32_t usrDataLength: 8;
  363. // 124-127
  364. uint32_t taskParamOffset;
  365. } rtFftsPlusAiCpuCtx_t;
  366. // data context
  367. typedef struct tagFftsPlusDataCtx {
  368. // 0-3 bytes
  369. uint8_t hardwareContextType;
  370. uint8_t softwareContextType;
  371. uint8_t successorNum;
  372. uint8_t res1: 7;
  373. uint8_t aten: 1;
  374. // 4-7
  375. uint8_t res2;
  376. uint8_t res3;
  377. uint8_t cntInit; // cons_cnt_init / prod_cnt_init
  378. uint8_t cnt; // cons_cnt / prod_cnt
  379. // 8-11
  380. uint32_t res4;
  381. // 12-63
  382. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  383. // 64-67
  384. uint16_t res5: 15;
  385. uint16_t atm: 1;
  386. uint16_t res6;
  387. // 68-81
  388. uint16_t origConsumerCounter;
  389. uint16_t runConsumerCounter;
  390. // 72-75
  391. uint16_t threadId;
  392. uint16_t threadDim;
  393. // 76-79
  394. uint32_t res7;
  395. // 80-83
  396. uint32_t addressBaseL;
  397. // 84-87
  398. uint32_t addressBaseH;
  399. // 88-91
  400. uint32_t addressOffset;
  401. // 92-95
  402. uint32_t res8;
  403. // 96-99
  404. uint16_t nonTailNumOutter;
  405. uint16_t nonTailNumInner;
  406. // 100-103
  407. uint32_t nonTailLengthInner;
  408. // 104-107
  409. uint32_t nonTailStrideOutter;
  410. // 108-111
  411. uint32_t nonTailStrideInner;
  412. // 112-115
  413. uint16_t tailNumOutter;
  414. uint16_t tailNumInner;
  415. // 116-119
  416. uint32_t tailLengthInner;
  417. // 120-123
  418. uint32_t tailStrideOutter;
  419. // 124-127
  420. uint32_t tailStrideInner;
  421. } rtFftsPlusDataCtx_t;
  422. // at start context
  423. typedef struct tagFftsPlusAtStartCtx {
  424. // 0-3 bytes
  425. uint8_t hardwareContextType;
  426. uint8_t softwareContextType;
  427. uint8_t successorNum;
  428. uint8_t res1: 7;
  429. uint8_t aten: 1;
  430. // 4-7
  431. uint8_t res2;
  432. uint8_t res3;
  433. uint8_t predCntInit;
  434. uint8_t predCnt;
  435. // 8-11
  436. uint32_t res4;
  437. // 12-63
  438. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  439. // 64-67
  440. uint16_t res5;
  441. uint16_t res6;
  442. // 68-71
  443. uint16_t res7;
  444. uint16_t res8;
  445. // 72-75
  446. uint16_t threadId;
  447. uint16_t threadDim;
  448. // 76-79
  449. uint16_t threadIdInit;
  450. uint16_t threadWindowSize;
  451. // 80-127
  452. uint16_t res9[12];
  453. } rtFftsPlusAtStartCtx_t;
  454. // at end context
  455. #define RT_CTX_SUCC_AT_START_SLOT_NUM 12
  456. #define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM 12
  457. typedef struct tagFftsPlusAtEndCtx {
  458. // 0-3 bytes
  459. uint8_t hardwareContextType;
  460. uint8_t softwareContextType;
  461. uint8_t atStartSlotNumber;
  462. uint8_t outLabelSlotNumber: 7;
  463. uint8_t aten: 1;
  464. // 4-7
  465. uint8_t res1;
  466. uint8_t res2;
  467. uint8_t predCntInit;
  468. uint8_t predCnt;
  469. // 8-11
  470. uint32_t res3;
  471. // 12-59
  472. uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
  473. uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
  474. // 60-63
  475. uint16_t res4;
  476. uint16_t res5;
  477. // 64-67
  478. uint16_t res6;
  479. uint16_t res7;
  480. // 68-71
  481. uint16_t res8;
  482. uint16_t res9;
  483. // 72-75
  484. uint16_t threadId;
  485. uint16_t res10;
  486. // 76-79
  487. uint16_t res11;
  488. uint16_t res12;
  489. // 80-127
  490. uint32_t res13[12];
  491. } rtFftsPlusAtEndCtx_t;
  492. // label context
  493. typedef struct tagFftsPlusLabelCtx {
  494. // 0-3 bytes
  495. uint8_t hardwareContextType;
  496. uint8_t softwareContextType;
  497. uint8_t successorNum;
  498. uint8_t res1;
  499. // 4-7
  500. uint8_t res2;
  501. uint8_t res3;
  502. uint8_t predCntInit;
  503. uint8_t predCnt;
  504. // 8-11
  505. uint32_t res4;
  506. // 12-63
  507. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  508. // 64-79
  509. uint16_t res5[8];
  510. // 80-127
  511. uint32_t res6[12];
  512. } rtFftsPlusLabelCtx_t;
  513. // case switch context
  514. typedef struct tagFftsPlusCaseSwitchCtx {
  515. // 0-3 bytes
  516. uint8_t hardwareContextType;
  517. uint8_t softwareContextType;
  518. uint8_t successorNum;
  519. uint8_t res1: 7;
  520. uint8_t aten: 1;
  521. // 4-7
  522. uint8_t startLabelId;
  523. uint8_t labelListLen;
  524. uint8_t predCntInit;
  525. uint8_t predCnt;
  526. // 8-11
  527. uint32_t res2;
  528. // 12-63
  529. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  530. // 64-67
  531. uint16_t res3: 15;
  532. uint16_t atm: 1;
  533. uint16_t res4;
  534. // 68-71
  535. uint32_t res5;
  536. // 72-75
  537. uint16_t threadId;
  538. uint16_t threadDim;
  539. // 76-79
  540. uint8_t arSize: 3;
  541. uint8_t snoop: 1;
  542. uint8_t res6: 4;
  543. uint8_t arCache: 4;
  544. uint8_t arProt: 3;
  545. uint8_t va: 1;
  546. uint16_t res7;
  547. // 80-83
  548. uint32_t loadAddress0BaseL;
  549. // 84-87
  550. uint32_t loadAddress0BaseH: 17;
  551. uint32_t res8: 14;
  552. uint32_t ld0En: 1;
  553. // 88-91
  554. uint32_t loadAddress0Offset;
  555. // 92-95
  556. uint32_t res9;
  557. // 96-99
  558. uint32_t loadAddress1BaseL;
  559. // 100-103
  560. uint32_t loadAddress1BaseH: 17;
  561. uint32_t res10: 14;
  562. uint32_t ld1En: 1;
  563. // 104-107
  564. uint32_t loadAddress1Offset;
  565. // 108-127
  566. uint32_t res11[5];
  567. } rtFftsPlusCaseSwitchCtx_t;
  568. // case default context
  569. typedef struct tagFftsPlusCaseDefCtx {
  570. // 0-3 bytes
  571. uint8_t hardwareContextType;
  572. uint8_t softwareContextType;
  573. uint8_t successorNum;
  574. uint8_t res1: 7;
  575. uint8_t aten: 1;
  576. // 4-7
  577. uint8_t startLabelId;
  578. uint8_t labelListLen;
  579. uint8_t predCntInit;
  580. uint8_t predCnt;
  581. // 8-11
  582. uint32_t res2;
  583. // 12-63
  584. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  585. // 64-67
  586. uint16_t res3;
  587. uint16_t res4;
  588. // 68-127
  589. uint32_t res5[15];
  590. } rtFftsPlusCaseDefCtx_t;
  591. // condition switch context
  592. #define RT_CTX_TRUE_SUCCESSOR_NUM 12
  593. #define RT_CTX_FALSE_SUCCESSOR_NUM 14
  594. typedef struct tagFftsPlusCondSwitchCtx {
  595. // 0-3 bytes
  596. uint8_t hardwareContextType;
  597. uint8_t softwareContextType;
  598. uint8_t trueSuccessorNum;
  599. uint8_t falseSuccessorNum: 7;
  600. uint8_t aten: 1;
  601. // 4-7
  602. uint8_t condition;
  603. uint8_t res1;
  604. uint8_t predCntInit;
  605. uint8_t predCnt;
  606. // 8-11
  607. uint32_t res2;
  608. // 12-63
  609. uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
  610. uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
  611. // 64-67
  612. uint16_t res3: 15;
  613. uint16_t atm: 1;
  614. uint16_t res4;
  615. // 68-71
  616. uint32_t res5;
  617. // 72-75
  618. uint16_t threadId;
  619. uint16_t threadDim;
  620. // 76-79
  621. uint8_t arSize: 3;
  622. uint8_t snoop: 1;
  623. uint8_t res6: 4;
  624. uint8_t arCache: 4;
  625. uint8_t arProt: 3;
  626. uint8_t va: 1;
  627. uint16_t res7;
  628. // 80-83
  629. uint32_t loadAddress0BaseL;
  630. // 84-87
  631. uint32_t loadAddress0BaseH: 17;
  632. uint32_t res8: 14;
  633. uint32_t ld0En: 1;
  634. // 88-91
  635. uint32_t loadAddress0Offset;
  636. // 92-95
  637. uint32_t res9;
  638. // 96-99
  639. uint32_t loadAddress1BaseL;
  640. // 100-103
  641. uint32_t loadAddress1BaseH: 17;
  642. uint32_t res10: 14;
  643. uint32_t ld1En: 1;
  644. // 104-107
  645. uint32_t loadAddress1Offset;
  646. // 108-127
  647. uint32_t res11[3];
  648. uint32_t cmpValue1;
  649. uint32_t cmpValue2;
  650. } rtFftsPlusCondSwitchCtx_t;
  651. #pragma pack(pop)
  652. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  653. }
  654. #endif
  655. #endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示