You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_ffts_plus_define.h 16 kB

4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of ffts plus
  4. */
  5. #ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
  6. #define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. // hardware context type
  14. typedef enum tagFftsPlusHwType {
  15. RT_HW_CTX_TYPE_AIC = 0,
  16. RT_HW_CTX_TYPE_AIV = 1,
  17. RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
  18. RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
  19. RT_HW_CTX_TYPE_WRITE_VALUE = 5,
  20. RT_HW_CTX_TYPE_MIX_AIC = 6,
  21. RT_HW_CTX_TYPE_MIX_AIV = 7,
  22. RT_HW_CTX_TYPE_SDMA = 8,
  23. RT_HW_CTX_TYPE_FLUSH_DATA = 9,
  24. RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
  25. RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
  26. RT_HW_CTX_TYPE_AICPU = 12,
  27. RT_HW_CTX_TYPE_LOAD = 13,
  28. RT_HW_CTX_TYPE_MAX,
  29. }rtFftsPlusHwType_t;
  30. // hardware context type
  31. typedef enum tagFftsPlusSoftType {
  32. RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
  33. RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
  34. RT_SOFT_CTX_TYPE_AT_START = 3,
  35. RT_SOFT_CTX_TYPE_AT_END = 4,
  36. RT_SOFT_CTX_TYPE_LABEL = 5,
  37. RT_SOFT_CTX_TYPE_MAX,
  38. }rtFftsPlusSoftType_t;
  39. typedef enum tagFftsPlusContextType {
  40. RT_CTX_TYPE_AICORE = 0x0000,
  41. RT_CTX_TYPE_AIV = 0x0001,
  42. RT_CTX_TYPE_NOTIFY_WAIT = 0x0003,
  43. RT_CTX_TYPE_NOTIFY_RECORD = 0x0004,
  44. RT_CTX_TYPE_WRITE_VALUE = 0x0005,
  45. RT_CTX_TYPE_MIX_AIC = 0x0006,
  46. RT_CTX_TYPE_MIX_AIV = 0x0007,
  47. RT_CTX_TYPE_SDMA = 0x0008,
  48. RT_CTX_TYPE_FLUSH_DATA = 0x0009,
  49. RT_CTX_TYPE_INVALIDATE_DATA = 0x000A,
  50. RT_CTX_TYPE_WRITEBACK_DATA = 0x000B,
  51. RT_CTX_TYPE_AICPU = 0x000C,
  52. RT_CTX_TYPE_COND_SWITCH = 0x010D,
  53. RT_CTX_TYPE_CASE_SWITCH = 0x020D,
  54. RT_CTX_TYPE_AT_START = 0x0300,
  55. RT_CTX_TYPE_AT_END = 0x0400,
  56. RT_CTX_TYPE_LABEL = 0x0500,
  57. }rtFftsPlusContextType_t;
  58. // condition type
  59. typedef enum tagFftsPlusCondType {
  60. RT_COND_TYPE_EQUAL = 0,
  61. RT_COND_TYPE_NOTEQUAL = 1,
  62. RT_COND_TYPE_GREATER = 2,
  63. RT_COND_TYPE_GREATER_OR_EQUAL = 3,
  64. RT_COND_TYPE_LESS = 4,
  65. RT_COND_TYPE_LESS_OR_EQUAL = 5,
  66. RT_COND_TYPE_MAX,
  67. }rtFftsPlusCondType_t;
  68. // the definition of ffts plus context
  69. #define RT_CTX_SUCCESSOR_NUM 26
  70. // ffts plus common context
  71. typedef struct tagFftsPlusComCtx {
  72. // 0-3 bytes
  73. uint16_t contextType;
  74. uint8_t successorNum;
  75. uint8_t res1 : 7;
  76. uint8_t aten : 1;
  77. // 4-7
  78. uint8_t res2;
  79. uint8_t res3;
  80. uint8_t predCntInit;
  81. uint8_t predCnt;
  82. // 8-11
  83. uint32_t res4;
  84. // 12-63
  85. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  86. // 64-71
  87. uint32_t res5[2];
  88. // 72-75
  89. uint16_t threadId;
  90. uint16_t threadDim;
  91. // 76-127
  92. uint32_t res6[13];
  93. } rtFftsPlusComCtx_t;
  94. // aic/aiv context
  95. typedef struct tagFftsPlusAicAivCtx {
  96. // 0-3 bytes
  97. uint16_t contextType;
  98. uint8_t successorNum;
  99. uint8_t res1: 7;
  100. uint8_t aten: 1;
  101. // 4-7
  102. uint8_t prefetchConfig;
  103. uint8_t res3;
  104. uint8_t predCntInit;
  105. uint8_t predCnt;
  106. // 8-11
  107. uint32_t res4;
  108. // 12-63
  109. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  110. // 64-67
  111. uint16_t stat: 1;
  112. uint16_t schem: 2;
  113. uint16_t icachePrefetchCnt: 5;
  114. uint16_t res5: 7;
  115. uint16_t atm: 1;
  116. uint16_t prefetchEnableBitmap: 4;
  117. uint16_t res6: 4;
  118. uint16_t prefetchOnceBitmap: 4;
  119. uint16_t res7: 4;
  120. // 68-71
  121. uint32_t res8;
  122. // 72-75
  123. uint16_t threadId;
  124. uint16_t threadDim;
  125. // 76-79
  126. uint16_t nonTailBlockdim;
  127. uint16_t tailBlockdim;
  128. // 80-83
  129. uint32_t taskParamPtrBaseL;
  130. // 84-87
  131. uint16_t taskParamPtrBaseH;
  132. uint16_t taskParamPtrOffset;
  133. // 88-95
  134. uint32_t res9;
  135. uint32_t res10;
  136. // 96-103
  137. uint32_t nonTailTaskStartPcL;
  138. uint16_t nonTailTaskStartPcH;
  139. uint16_t res11;
  140. // 104-111
  141. uint32_t tailTaskStartPcL;
  142. uint16_t tailTaskStartPcH;
  143. uint16_t res12;
  144. // 112-119
  145. uint32_t res13;
  146. uint32_t res14;
  147. // 120-127
  148. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  149. } rtFftsPlusAicAivCtx_t;
  150. // mix aic/aiv context
  151. typedef struct tagFftsPlusMixAicAivCtx {
  152. // 0-3 bytes
  153. uint16_t contextType;
  154. uint8_t successorNum;
  155. uint8_t res1: 7;
  156. uint8_t aten: 1;
  157. // 4-7
  158. uint8_t prefetchConfig;
  159. uint8_t res3;
  160. uint8_t predCntInit;
  161. uint8_t predCnt;
  162. // 8-11
  163. uint32_t res4;
  164. // 12-63
  165. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  166. // 64-67
  167. uint16_t stat: 1;
  168. uint16_t schem: 2;
  169. uint16_t aicIcachePrefetchCnt: 5;
  170. uint16_t aivIcachePrefetchCnt: 5;
  171. uint16_t res5: 2;
  172. uint16_t atm: 1;
  173. uint16_t prefetchEnableBitmap: 4;
  174. uint16_t res6: 4;
  175. uint16_t prefetchOnceBitmap: 4;
  176. uint16_t res7: 4;
  177. // 68-71
  178. uint16_t res8;
  179. uint8_t nonTailBlockRatioN;
  180. uint8_t tailBlockRatioN;
  181. // 72-75
  182. uint16_t threadId;
  183. uint16_t threadDim;
  184. // 76-79
  185. uint16_t nonTailBlockdim;
  186. uint16_t tailBlockdim;
  187. // 80-87
  188. uint32_t aicTaskParamPtrL;
  189. uint16_t aicTaskParamPtrH;
  190. uint16_t aicTaskParamPtrOffset;
  191. // 88-95
  192. uint32_t aivTaskParamPtrL;
  193. uint16_t aivTaskParamPtrH;
  194. uint16_t aivTaskParamPtrOffset;
  195. // 96-103
  196. uint32_t nonTailAicTaskStartPcL;
  197. uint16_t nonTailAicTaskStartPcH;
  198. uint16_t tailAicTaskStartPcH;
  199. // 104-111
  200. uint32_t tailAicTaskStartPcL;
  201. uint32_t nonTailAivTaskStartPcL;
  202. // 112-119
  203. uint16_t nonTailAivTaskStartPcH;
  204. uint16_t tailAivTaskStartPcH;
  205. uint32_t tailAivTaskStartPcL;
  206. // 120-127
  207. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  208. } rtFftsPlusMixAicAivCtx_t;
  209. // adma context
  210. typedef struct tagFftsPlusSdmaCtx {
  211. // 0-3 bytes
  212. uint16_t contextType;
  213. uint8_t successorNum;
  214. uint8_t res1: 7;
  215. uint8_t aten: 1;
  216. // 4-7
  217. uint8_t res2;
  218. uint8_t res3;
  219. uint8_t predCntInit;
  220. uint8_t predCnt;
  221. // 8-11
  222. uint32_t res4;
  223. // 12-63
  224. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  225. // 64-67
  226. uint8_t sat: 1;
  227. uint8_t res5: 7;
  228. uint8_t res6: 7;
  229. uint8_t atm: 1;
  230. uint16_t res7;
  231. // 68-71
  232. uint32_t res8;
  233. // 72-75
  234. uint16_t threadId;
  235. uint16_t threadDim;
  236. // 76-79
  237. uint32_t sdmaSqeHeader; // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
  238. // 80-83
  239. uint16_t sourceStreamId;
  240. uint16_t sourceSubstreamId;
  241. // 84-87
  242. uint16_t destinationStreamId;
  243. uint16_t destinationSubstreamId;
  244. // 88-127
  245. uint32_t sourceAddressBaseL;
  246. uint32_t sourceAddressBaseH;
  247. uint32_t sourceAddressOffset;
  248. uint32_t destinationAddressBaseL;
  249. uint32_t destinationAddressBaseH;
  250. uint32_t destinationAddressOffset;
  251. uint32_t nonTailDataLength;
  252. uint32_t tailDataLength;
  253. uint32_t res9[2];
  254. } rtFftsPlusSdmaCtx_t;
  255. // ffts plus notify record/wait context
  256. typedef struct tagFftsPlusNotifyCtx {
  257. // 0-3 bytes
  258. uint16_t contextType;
  259. uint8_t successorNum;
  260. uint8_t res1: 7;
  261. uint8_t aten: 1;
  262. // 4-7
  263. uint8_t res2;
  264. uint8_t res3;
  265. uint8_t predCntInit;
  266. uint8_t predCnt;
  267. // 8-11
  268. uint32_t res4;
  269. // 12-63
  270. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  271. // 64-67
  272. uint16_t res5: 15;
  273. uint16_t atm: 1;
  274. uint16_t res6;
  275. // 68-71
  276. uint32_t res7;
  277. // 72-75
  278. uint16_t threadId;
  279. uint16_t threadDim;
  280. // 76-79
  281. uint16_t notifyIdBase;
  282. uint16_t res8;
  283. // 80-127
  284. uint32_t res9[12];
  285. } rtFftsPlusNotifyCtx_t;
  286. // write Value context
  287. typedef struct tagFftsPlusWriteValueCtx {
  288. // 0-3 bytes
  289. uint16_t contextType;
  290. uint8_t successorNum;
  291. uint8_t res1: 7;
  292. uint8_t aten: 1;
  293. // 4-7
  294. uint8_t res2;
  295. uint8_t res3;
  296. uint8_t predCntInit;
  297. uint8_t predCnt;
  298. // 8-11
  299. uint32_t res4;
  300. // 12-63
  301. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  302. // 64-67
  303. uint16_t res5: 15;
  304. uint16_t atm: 1;
  305. uint16_t res6;
  306. // 68-71
  307. uint32_t res7;
  308. // 72-75
  309. uint16_t threadId;
  310. uint16_t threadDim;
  311. // 76-79
  312. uint8_t awSize: 3;
  313. uint8_t snoop: 1;
  314. uint8_t res8: 4;
  315. uint8_t awCache: 4;
  316. uint8_t awProt: 3;
  317. uint8_t va: 1;
  318. uint16_t res9;
  319. // 80-83
  320. uint32_t writeAddressBaseL;
  321. // 84-87
  322. uint32_t writeAddressBaseH: 17;
  323. uint32_t res10: 15;
  324. // 88-91
  325. uint32_t writeAddressOffset;
  326. // 92-95
  327. uint32_t res11;
  328. // 96-111
  329. uint32_t writeValue[4]; // write_value_00 -> write_value_03
  330. // 112-127
  331. uint32_t res12[4];
  332. } rtFftsPlusWriteValueCtx_t;
  333. // ai cpu context
  334. typedef struct tagFftsPlusAiCpuCtx {
  335. // 0-3 bytes
  336. uint16_t contextType;
  337. uint8_t successorNum;
  338. uint8_t res1: 7;
  339. uint8_t aten: 1;
  340. // 4-7
  341. uint8_t res2;
  342. uint8_t res3;
  343. uint8_t predCntInit;
  344. uint8_t predCnt;
  345. // 8-11
  346. uint32_t res4;
  347. // 12-63
  348. uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
  349. // 64-67
  350. uint16_t sat: 1;
  351. uint16_t res5: 14;
  352. uint16_t atm: 1;
  353. uint16_t res6;
  354. // 68-71
  355. uint16_t sqeIndex;
  356. uint8_t kernelType: 7;
  357. uint8_t bm: 1;
  358. uint8_t topicType: 4;
  359. uint8_t qos: 3;
  360. uint8_t res7: 1;
  361. // 72-75
  362. uint16_t threadId;
  363. uint16_t threadDim;
  364. // 76-79
  365. uint16_t nonTailBlockdim;
  366. uint16_t tailBlockdim;
  367. // 80-115
  368. uint32_t usrData[9]; // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
  369. // 116--119
  370. uint32_t res8;
  371. // 120-123
  372. uint32_t subtopicId: 12;
  373. uint32_t topicId: 6;
  374. uint32_t groupId: 6;
  375. uint32_t usrDataLength: 8;
  376. // 124-127
  377. uint32_t taskParamOffset;
  378. } rtFftsPlusAiCpuCtx_t;
  379. // data context
  380. typedef struct tagFftsPlusDataCtx {
  381. // 0-3 bytes
  382. uint16_t contextType;
  383. uint8_t successorNum;
  384. uint8_t res1: 7;
  385. uint8_t aten: 1;
  386. // 4-7
  387. uint8_t res2;
  388. uint8_t res3;
  389. uint8_t cntInit; // cons_cnt_init / prod_cnt_init
  390. uint8_t cnt; // cons_cnt / prod_cnt
  391. // 8-11
  392. uint32_t res4;
  393. // 12-63
  394. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  395. // 64-67
  396. uint16_t res5: 15;
  397. uint16_t atm: 1;
  398. uint16_t res6;
  399. // 68-81
  400. uint16_t origConsumerCounter;
  401. uint16_t runConsumerCounter;
  402. // 72-75
  403. uint16_t threadId;
  404. uint16_t threadDim;
  405. // 76-79
  406. uint32_t res7;
  407. // 80-83
  408. uint32_t addressBaseL;
  409. // 84-87
  410. uint32_t addressBaseH;
  411. // 88-91
  412. uint32_t addressOffset;
  413. // 92-95
  414. uint32_t res8;
  415. // 96-99
  416. uint16_t nonTailNumOutter;
  417. uint16_t nonTailNumInner;
  418. // 100-103
  419. uint32_t nonTailLengthInner;
  420. // 104-107
  421. uint32_t nonTailStrideOutter;
  422. // 108-111
  423. uint32_t nonTailStrideInner;
  424. // 112-115
  425. uint16_t tailNumOutter;
  426. uint16_t tailNumInner;
  427. // 116-119
  428. uint32_t tailLengthInner;
  429. // 120-123
  430. uint32_t tailStrideOutter;
  431. // 124-127
  432. uint32_t tailStrideInner;
  433. } rtFftsPlusDataCtx_t;
  434. // at start context
  435. typedef struct tagFftsPlusAtStartCtx {
  436. // 0-3 bytes
  437. uint16_t contextType;
  438. uint8_t successorNum;
  439. uint8_t res1: 7;
  440. uint8_t aten: 1;
  441. // 4-7
  442. uint8_t res2;
  443. uint8_t res3;
  444. uint8_t predCntInit;
  445. uint8_t predCnt;
  446. // 8-11
  447. uint32_t res4;
  448. // 12-63
  449. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  450. // 64-67
  451. uint16_t res5;
  452. uint16_t res6;
  453. // 68-71
  454. uint16_t res7;
  455. uint16_t res8;
  456. // 72-75
  457. uint16_t threadId;
  458. uint16_t threadDim;
  459. // 76-79
  460. uint16_t threadIdInit;
  461. uint16_t threadWindowSize;
  462. // 80-127
  463. uint16_t res9[12];
  464. } rtFftsPlusAtStartCtx_t;
  465. // at end context
  466. #define RT_CTX_SUCC_AT_START_SLOT_NUM 12
  467. #define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM 12
  468. typedef struct tagFftsPlusAtEndCtx {
  469. // 0-3 bytes
  470. uint16_t contextType;
  471. uint8_t atStartSlotNumber;
  472. uint8_t outLabelSlotNumber: 7;
  473. uint8_t aten: 1;
  474. // 4-7
  475. uint8_t res1;
  476. uint8_t res2;
  477. uint8_t predCntInit;
  478. uint8_t predCnt;
  479. // 8-11
  480. uint32_t res3;
  481. // 12-59
  482. uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
  483. uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
  484. // 60-63
  485. uint16_t res4;
  486. uint16_t res5;
  487. // 64-67
  488. uint16_t res6;
  489. uint16_t res7;
  490. // 68-71
  491. uint16_t res8;
  492. uint16_t res9;
  493. // 72-75
  494. uint16_t threadId;
  495. uint16_t res10;
  496. // 76-79
  497. uint16_t res11;
  498. uint16_t res12;
  499. // 80-127
  500. uint32_t res13[12];
  501. } rtFftsPlusAtEndCtx_t;
  502. // label context
  503. typedef struct tagFftsPlusLabelCtx {
  504. // 0-3 bytes
  505. uint16_t contextType;
  506. uint8_t successorNum;
  507. uint8_t res1;
  508. // 4-7
  509. uint8_t res2;
  510. uint8_t res3;
  511. uint8_t predCntInit;
  512. uint8_t predCnt;
  513. // 8-11
  514. uint32_t res4;
  515. // 12-63
  516. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  517. // 64-79
  518. uint16_t res5[8];
  519. // 80-127
  520. uint32_t res6[12];
  521. } rtFftsPlusLabelCtx_t;
  522. // case switch context
  523. typedef struct tagFftsPlusCaseSwitchCtx {
  524. // 0-3 bytes
  525. uint16_t contextType;
  526. uint8_t successorNum;
  527. uint8_t res1: 7;
  528. uint8_t aten: 1;
  529. // 4-7
  530. uint8_t startLabelId;
  531. uint8_t labelListLen;
  532. uint8_t predCntInit;
  533. uint8_t predCnt;
  534. // 8-11
  535. uint32_t res2;
  536. // 12-63
  537. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  538. // 64-67
  539. uint16_t res3: 15;
  540. uint16_t atm: 1;
  541. uint16_t res4;
  542. // 68-71
  543. uint32_t res5;
  544. // 72-75
  545. uint16_t threadId;
  546. uint16_t threadDim;
  547. // 76-79
  548. uint8_t arSize: 3;
  549. uint8_t snoop: 1;
  550. uint8_t res6: 4;
  551. uint8_t arCache: 4;
  552. uint8_t arProt: 3;
  553. uint8_t va: 1;
  554. uint16_t res7;
  555. // 80-83
  556. uint32_t loadAddress0BaseL;
  557. // 84-87
  558. uint32_t loadAddress0BaseH: 17;
  559. uint32_t res8: 14;
  560. uint32_t ld0En: 1;
  561. // 88-91
  562. uint32_t loadAddress0Offset;
  563. // 92-95
  564. uint32_t res9;
  565. // 96-99
  566. uint32_t loadAddress1BaseL;
  567. // 100-103
  568. uint32_t loadAddress1BaseH: 17;
  569. uint32_t res10: 14;
  570. uint32_t ld1En: 1;
  571. // 104-107
  572. uint32_t loadAddress1Offset;
  573. // 108-127
  574. uint32_t res11[5];
  575. } rtFftsPlusCaseSwitchCtx_t;
  576. // case default context
  577. typedef struct tagFftsPlusCaseDefCtx {
  578. // 0-3 bytes
  579. uint16_t contextType;
  580. uint8_t successorNum;
  581. uint8_t res1: 7;
  582. uint8_t aten: 1;
  583. // 4-7
  584. uint8_t startLabelId;
  585. uint8_t labelListLen;
  586. uint8_t predCntInit;
  587. uint8_t predCnt;
  588. // 8-11
  589. uint32_t res2;
  590. // 12-63
  591. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  592. // 64-67
  593. uint16_t res3;
  594. uint16_t res4;
  595. // 68-127
  596. uint32_t res5[15];
  597. } rtFftsPlusCaseDefCtx_t;
  598. // condition switch context
  599. #define RT_CTX_TRUE_SUCCESSOR_NUM 12
  600. #define RT_CTX_FALSE_SUCCESSOR_NUM 14
  601. typedef struct tagFftsPlusCondSwitchCtx {
  602. // 0-3 bytes
  603. uint16_t contextType;
  604. uint8_t trueSuccessorNum;
  605. uint8_t falseSuccessorNum: 7;
  606. uint8_t aten: 1;
  607. // 4-7
  608. uint8_t condition;
  609. uint8_t res1;
  610. uint8_t predCntInit;
  611. uint8_t predCnt;
  612. // 8-11
  613. uint32_t res2;
  614. // 12-63
  615. uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
  616. uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
  617. // 64-67
  618. uint16_t res3: 15;
  619. uint16_t atm: 1;
  620. uint16_t res4;
  621. // 68-71
  622. uint32_t res5;
  623. // 72-75
  624. uint16_t threadId;
  625. uint16_t threadDim;
  626. // 76-79
  627. uint8_t arSize: 3;
  628. uint8_t snoop: 1;
  629. uint8_t res6: 4;
  630. uint8_t arCache: 4;
  631. uint8_t arProt: 3;
  632. uint8_t va: 1;
  633. uint16_t res7;
  634. // 80-83
  635. uint32_t loadAddress0BaseL;
  636. // 84-87
  637. uint32_t loadAddress0BaseH: 17;
  638. uint32_t res8: 14;
  639. uint32_t ld0En: 1;
  640. // 88-91
  641. uint32_t loadAddress0Offset;
  642. // 92-95
  643. uint32_t res9;
  644. // 96-99
  645. uint32_t loadAddress1BaseL;
  646. // 100-103
  647. uint32_t loadAddress1BaseH: 17;
  648. uint32_t res10: 14;
  649. uint32_t ld1En: 1;
  650. // 104-107
  651. uint32_t loadAddress1Offset;
  652. // 108-127
  653. uint32_t res11[3];
  654. uint32_t cmpValue1;
  655. uint32_t cmpValue2;
  656. } rtFftsPlusCondSwitchCtx_t;
  657. #pragma pack(pop)
  658. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  659. }
  660. #endif
  661. #endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示