You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_ffts_plus_define.h 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  17. #define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  18. #include "base.h"
  19. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  20. extern "C" {
  21. #endif
  22. #pragma pack(push)
  23. #pragma pack (1)
  24. // hardware context type
  25. typedef enum tagFftsPlusHwType {
  26. RT_HW_CTX_TYPE_AIC = 0,
  27. RT_HW_CTX_TYPE_AIV = 1,
  28. RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
  29. RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
  30. RT_HW_CTX_TYPE_WRITE_VALUE = 5,
  31. RT_HW_CTX_TYPE_MIX_AIC = 6,
  32. RT_HW_CTX_TYPE_MIX_AIV = 7,
  33. RT_HW_CTX_TYPE_SDMA = 8,
  34. RT_HW_CTX_TYPE_FLUSH_DATA = 9,
  35. RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
  36. RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
  37. RT_HW_CTX_TYPE_AICPU = 12,
  38. RT_HW_CTX_TYPE_LOAD = 13,
  39. RT_HW_CTX_TYPE_MAX = 14,
  40. } rtFftsPlusHwType_t;
  41. // hardware context type
  42. typedef enum tagFftsPlusSoftType {
  43. RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
  44. RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
  45. RT_SOFT_CTX_TYPE_AT_START = 3,
  46. RT_SOFT_CTX_TYPE_AT_END = 4,
  47. RT_SOFT_CTX_TYPE_LABEL = 5,
  48. RT_SOFT_CTX_PERSISTENT_CACHE = 6,
  49. RT_SOFT_CTX_TYPE_MAX = 7,
  50. } rtFftsPlusSoftType_t;
  51. typedef enum tagFftsPlusContextType {
  52. RT_CTX_TYPE_AICORE = 0x0000,
  53. RT_CTX_TYPE_AIV = 0x0001,
  54. RT_CTX_TYPE_NOTIFY_WAIT = 0x0003,
  55. RT_CTX_TYPE_NOTIFY_RECORD = 0x0004,
  56. RT_CTX_TYPE_WRITE_VALUE = 0x0005,
  57. RT_CTX_TYPE_MIX_AIC = 0x0006,
  58. RT_CTX_TYPE_MIX_AIV = 0x0007,
  59. RT_CTX_TYPE_SDMA = 0x0008,
  60. RT_CTX_TYPE_FLUSH_DATA = 0x0009,
  61. RT_CTX_TYPE_INVALIDATE_DATA = 0x000A,
  62. RT_CTX_TYPE_WRITEBACK_DATA = 0x000B,
  63. RT_CTX_TYPE_AICPU = 0x000C,
  64. RT_CTX_TYPE_COND_SWITCH = 0x010D,
  65. RT_CTX_TYPE_CASE_SWITCH = 0x020D,
  66. RT_CTX_TYPE_AT_START = 0x0300,
  67. RT_CTX_TYPE_AT_END = 0x0400,
  68. RT_CTX_TYPE_LABEL = 0x0500,
  69. RT_CTX_TYPE_PERSISTENT_CACHE = 0x0600,
  70. }rtFftsPlusContextType_t;
  71. // condition type
  72. typedef enum tagFftsPlusCondType {
  73. RT_COND_TYPE_EQUAL = 0,
  74. RT_COND_TYPE_NOTEQUAL = 1,
  75. RT_COND_TYPE_GREATER = 2,
  76. RT_COND_TYPE_GREATER_OR_EQUAL = 3,
  77. RT_COND_TYPE_LESS = 4,
  78. RT_COND_TYPE_LESS_OR_EQUAL = 5,
  79. RT_COND_TYPE_MAX = 6,
  80. } rtFftsPlusCondType_t;
  81. // the definition of ffts plus context
  82. #define RT_CTX_SUCCESSOR_NUM 26
  83. // ffts plus common context
  84. typedef struct tagFftsPlusComCtx {
  85. // 0-3 bytes
  86. uint16_t contextType;
  87. uint8_t successorNum;
  88. uint8_t rsv1 : 7;
  89. uint8_t aten : 1;
  90. // 4-7
  91. uint8_t rsv2;
  92. uint8_t rsv3;
  93. uint8_t predCntInit;
  94. uint8_t predCnt;
  95. // 8-11
  96. uint32_t rsv4;
  97. // 12-63
  98. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  99. // 64-71
  100. uint32_t rsv5[2];
  101. // 72-75
  102. uint16_t threadId;
  103. uint16_t threadDim;
  104. // 76-127
  105. uint32_t res6[13];
  106. } rtFftsPlusComCtx_t;
  107. // aic/aiv context
  108. typedef struct tagFftsPlusAicAivCtx {
  109. // 0-3 bytes
  110. uint16_t contextType;
  111. uint8_t successorNum;
  112. uint8_t resv : 7;
  113. uint8_t aten : 1;
  114. // 4-7
  115. uint8_t prefetchConfig;
  116. uint8_t resv1;
  117. uint8_t predCntInit;
  118. uint8_t predCnt;
  119. // 8-11
  120. uint32_t resv2;
  121. // 12-63
  122. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  123. // 64-67
  124. uint16_t resv3 : 1;
  125. uint16_t schem : 2;
  126. uint16_t icachePrefetchCnt : 5;
  127. uint16_t resv4 : 7;
  128. uint16_t atm : 1;
  129. uint16_t prefetchEnableBitmap : 4;
  130. uint16_t res6 : 4;
  131. uint16_t prefetchOnceBitmap : 4;
  132. uint16_t res7 : 4;
  133. // 68-71
  134. uint16_t pmg : 2;
  135. uint16_t ns : 1;
  136. uint16_t partId : 8;
  137. uint16_t res8 : 1;
  138. uint16_t qos : 4;
  139. uint16_t res9;
  140. // 72-75
  141. uint16_t threadId;
  142. uint16_t threadDim;
  143. // 76-79
  144. uint16_t nonTailBlockdim;
  145. uint16_t tailBlockdim;
  146. // 80-83
  147. uint32_t taskParamPtrBaseL;
  148. // 84-87
  149. uint16_t taskParamPtrBaseH;
  150. uint16_t taskParamPtrOffset;
  151. // 88-95
  152. uint32_t res10;
  153. uint32_t res11;
  154. // 96-103
  155. uint32_t nonTailTaskStartPcL;
  156. uint16_t nonTailTaskStartPcH;
  157. uint16_t res12;
  158. // 104-111
  159. uint32_t tailTaskStartPcL;
  160. uint16_t tailTaskStartPcH;
  161. uint16_t res13;
  162. // 112-119
  163. uint32_t res14;
  164. uint32_t res15;
  165. // 120-127
  166. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  167. } rtFftsPlusAicAivCtx_t;
  168. // mix aic/aiv context
  169. typedef struct tagFftsPlusMixAicAivCtx {
  170. // 0-3 bytes
  171. uint16_t contextType;
  172. uint8_t successorNum;
  173. uint8_t reserved1 : 7;
  174. uint8_t aten : 1;
  175. // 4-7
  176. uint8_t prefetchConfig;
  177. uint8_t reserved2;
  178. uint8_t predCntInit;
  179. uint8_t predCnt;
  180. // 8-11
  181. uint32_t reserved3;
  182. // 12-63
  183. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  184. // 64-67
  185. uint16_t reserved4 : 1;
  186. uint16_t schem : 2;
  187. uint16_t aicIcachePrefetchCnt : 5;
  188. uint16_t aivIcachePrefetchCnt : 5;
  189. uint16_t reserved5 : 2;
  190. uint16_t atm : 1;
  191. uint16_t prefetchEnableBitmap : 4;
  192. uint16_t reserved6 : 4;
  193. uint16_t prefetchOnceBitmap : 4;
  194. uint16_t reserved7 : 4;
  195. // 68-71
  196. uint16_t pmg : 2;
  197. uint16_t ns : 1;
  198. uint16_t partId : 8;
  199. uint16_t reserved8 : 1;
  200. uint16_t qos : 4;
  201. uint8_t nonTailBlockRatioN;
  202. uint8_t tailBlockRatioN;
  203. // 72-75
  204. uint16_t threadId;
  205. uint16_t threadDim;
  206. // 76-79
  207. uint16_t nonTailBlockdim;
  208. uint16_t tailBlockdim;
  209. // 80-87
  210. uint32_t aicTaskParamPtrL;
  211. uint16_t aicTaskParamPtrH;
  212. uint16_t aicTaskParamPtrOffset;
  213. // 88-95
  214. uint32_t aivTaskParamPtrL;
  215. uint16_t aivTaskParamPtrH;
  216. uint16_t aivTaskParamPtrOffset;
  217. // 96-103
  218. uint32_t nonTailAicTaskStartPcL;
  219. uint16_t nonTailAicTaskStartPcH;
  220. uint16_t tailAicTaskStartPcH;
  221. // 104-111
  222. uint32_t tailAicTaskStartPcL;
  223. uint32_t nonTailAivTaskStartPcL;
  224. // 112-119
  225. uint16_t nonTailAivTaskStartPcH;
  226. uint16_t tailAivTaskStartPcH;
  227. uint32_t tailAivTaskStartPcL;
  228. // 120-127
  229. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  230. } rtFftsPlusMixAicAivCtx_t;
  231. // sdma context
  232. typedef struct tagFftsPlusSdmaCtx {
  233. // 0-3 bytes
  234. uint16_t contextType;
  235. uint8_t successorNum;
  236. uint8_t res1 : 7;
  237. uint8_t aten : 1;
  238. // 4-7
  239. uint8_t res2;
  240. uint8_t res3;
  241. uint8_t predCntInit;
  242. uint8_t predCnt;
  243. // 8-11
  244. uint32_t res4;
  245. // 12-63
  246. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  247. // 64-67
  248. uint8_t res5;
  249. uint8_t res6 : 7;
  250. uint8_t atm : 1;
  251. uint16_t res7;
  252. // 68-71
  253. uint16_t pmg : 2;
  254. uint16_t ns : 1;
  255. uint16_t partId : 8;
  256. uint16_t res8 : 1;
  257. uint16_t qos : 4;
  258. uint16_t res9;
  259. // 72-75
  260. uint16_t threadId;
  261. uint16_t threadDim;
  262. // 76-79
  263. uint32_t sdmaSqeHeader; // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
  264. // 80-83
  265. uint16_t sourceStreamId;
  266. uint16_t sourceSubstreamId;
  267. // 84-87
  268. uint16_t destinationStreamId;
  269. uint16_t destinationSubstreamId;
  270. // 88-127
  271. uint32_t sourceAddressBaseL;
  272. uint32_t sourceAddressBaseH;
  273. uint32_t sourceAddressOffset;
  274. uint32_t destinationAddressBaseL;
  275. uint32_t destinationAddressBaseH;
  276. uint32_t destinationAddressOffset;
  277. uint32_t nonTailDataLength;
  278. uint32_t tailDataLength;
  279. uint32_t res10[2];
  280. } rtFftsPlusSdmaCtx_t;
  281. // ffts plus notify record/wait context
  282. typedef struct tagFftsPlusNotifyCtx {
  283. // 0-3 bytes
  284. uint16_t contextType;
  285. uint8_t successorNum;
  286. uint8_t res : 7;
  287. uint8_t aten : 1;
  288. // 4-7
  289. uint8_t res1;
  290. uint8_t res2;
  291. uint8_t predCntInit;
  292. uint8_t predCnt;
  293. // 8-11
  294. uint32_t res3;
  295. // 12-63
  296. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  297. // 64-67
  298. uint16_t res4 : 14;
  299. uint16_t satm : 1;
  300. uint16_t atm : 1;
  301. uint16_t res6;
  302. // 68-71
  303. uint32_t res7;
  304. // 72-75
  305. uint16_t threadId;
  306. uint16_t threadDim;
  307. // 76-79
  308. uint16_t notifyIdBase;
  309. uint8_t autoWindow;
  310. uint8_t res8;
  311. // 80-127
  312. uint32_t res9[4];
  313. uint16_t notifyId[16];
  314. } rtFftsPlusNotifyCtx_t;
  315. // write Value context
  316. typedef struct tagFftsPlusWriteValueCtx {
  317. // 0-3 bytes
  318. uint16_t contextType;
  319. uint8_t successorNum;
  320. uint8_t resv1 : 7;
  321. uint8_t aten : 1;
  322. // 4-7
  323. uint8_t resv2;
  324. uint8_t resv3;
  325. uint8_t predCntInit;
  326. uint8_t predCnt;
  327. // 8-11
  328. uint32_t resv4;
  329. // 12-63
  330. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  331. // 64-67
  332. uint16_t resv5 : 15;
  333. uint16_t atm : 1;
  334. uint16_t resv6;
  335. // 68-71
  336. uint32_t resv7;
  337. // 72-75
  338. uint16_t threadId;
  339. uint16_t threadDim;
  340. // 76-79
  341. uint8_t awSize : 3;
  342. uint8_t awSnoop : 1;
  343. uint8_t resv8 : 4;
  344. uint8_t awCache : 4;
  345. uint8_t awProt : 3;
  346. uint8_t awVa : 1;
  347. uint8_t arSize : 3;
  348. uint8_t arSnoop : 1;
  349. uint8_t resv9 : 4;
  350. uint8_t arCache : 4;
  351. uint8_t arProt : 3;
  352. uint8_t arVa : 1;
  353. // 80-83
  354. uint32_t writeAddressBaseL;
  355. // 84-87
  356. uint32_t writeAddressBaseH: 17;
  357. uint32_t res10: 15;
  358. // 88-91
  359. uint32_t writeAddressOffset;
  360. // 92-95
  361. uint32_t res11;
  362. // 96-111
  363. uint32_t writeValue[4]; // write_value_00 -> write_value_03
  364. // 112-127
  365. uint32_t res12[4];
  366. } rtFftsPlusWriteValueCtx_t;
  367. // ai cpu context
  368. typedef struct tagFftsPlusAiCpuCtx {
  369. // 0-3 bytes
  370. uint16_t contextType;
  371. uint8_t successorNum;
  372. uint8_t res1 : 7;
  373. uint8_t aten : 1;
  374. // 4-7
  375. uint8_t res2;
  376. uint8_t res3;
  377. uint8_t predCntInit;
  378. uint8_t predCnt;
  379. // 8-11
  380. uint32_t res4;
  381. // 12-63
  382. uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
  383. // 64-67
  384. uint16_t res5 : 15;
  385. uint16_t atm : 1;
  386. uint16_t res6;
  387. // 68-71
  388. uint16_t sqeIndex;
  389. uint8_t kernelType : 7;
  390. uint8_t bm : 1;
  391. uint8_t topicType : 4;
  392. uint8_t qos : 3;
  393. uint8_t res7 : 1;
  394. // 72-75
  395. uint16_t threadId;
  396. uint16_t threadDim;
  397. // 76-79
  398. uint16_t nonTailBlockdim;
  399. uint16_t tailBlockdim;
  400. // 80-115
  401. uint32_t usrData[9]; // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
  402. // 116--119
  403. uint32_t res8;
  404. // 120-123
  405. uint32_t subtopicId : 12;
  406. uint32_t topicId : 6;
  407. uint32_t groupId : 6;
  408. uint32_t usrDataLength : 8;
  409. // 124-127
  410. uint32_t taskParamOffset;
  411. } rtFftsPlusAiCpuCtx_t;
  412. // data context
  413. typedef struct tagFftsPlusDataCtx {
  414. // 0-3 bytes
  415. uint16_t contextType;
  416. uint8_t successorNum;
  417. uint8_t res1 : 7;
  418. uint8_t aten : 1;
  419. // 4-7
  420. uint8_t res2;
  421. uint8_t res3;
  422. uint8_t cntInit; // cons_cnt_init / prod_cnt_init
  423. uint8_t cnt; // cons_cnt / prod_cnt
  424. // 8-11
  425. uint32_t res4;
  426. // 12-63
  427. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  428. // 64-67
  429. uint16_t res5 : 15;
  430. uint16_t atm : 1;
  431. uint16_t res6;
  432. // 68-71
  433. uint16_t pmg : 2;
  434. uint16_t ns : 1;
  435. uint16_t partId : 8;
  436. uint16_t res7 : 1;
  437. uint16_t qos : 4;
  438. uint16_t res8;
  439. // 72-75
  440. uint16_t threadId;
  441. uint16_t threadDim;
  442. // 76-79
  443. uint16_t origConsumerCounter;
  444. uint16_t runConsumerCounter;
  445. // 80-83
  446. uint32_t addressBaseL;
  447. // 84-87
  448. uint32_t addressBaseH;
  449. // 88-91
  450. uint32_t addressOffset;
  451. // 92-95
  452. uint32_t res9;
  453. // 96-99
  454. uint16_t nonTailNumOutter;
  455. uint16_t nonTailNumInner;
  456. // 100-103
  457. uint32_t nonTailLengthInner;
  458. // 104-107
  459. uint32_t nonTailStrideOutter;
  460. // 108-111
  461. uint32_t nonTailStrideInner;
  462. // 112-115
  463. uint16_t tailNumOutter;
  464. uint16_t tailNumInner;
  465. // 116-119
  466. uint32_t tailLengthInner;
  467. // 120-123
  468. uint32_t tailStrideOutter;
  469. // 124-127
  470. uint32_t tailStrideInner;
  471. } rtFftsPlusDataCtx_t;
  472. // at start context
  473. typedef struct tagFftsPlusAtStartCtx {
  474. // 0-3 bytes
  475. uint16_t contextType;
  476. uint8_t successorNum;
  477. uint8_t rs1 : 7;
  478. uint8_t aten : 1;
  479. // 4-7
  480. uint8_t rs2;
  481. uint8_t rs3;
  482. uint8_t predCntInit;
  483. uint8_t predCnt;
  484. // 8-11
  485. uint32_t rs4;
  486. // 12-63
  487. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  488. // 64-67
  489. uint16_t rs5;
  490. uint16_t rs6;
  491. // 68-71
  492. uint16_t rs7;
  493. uint16_t rs8;
  494. // 72-75
  495. uint16_t threadId;
  496. uint16_t threadDim;
  497. // 76-79
  498. uint16_t threadIdInit;
  499. uint16_t threadWindowSize;
  500. // 80-127
  501. uint32_t res9[12];
  502. } rtFftsPlusAtStartCtx_t;
  503. // at end context
  504. #define RT_CTX_SUCC_AT_START_SLOT_NUM 12
  505. #define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM 12
  506. typedef struct tagFftsPlusAtEndCtx {
  507. // 0-3 bytes
  508. uint16_t contextType;
  509. uint8_t atStartSlotNumber;
  510. uint8_t outLabelSlotNumber : 7;
  511. uint8_t aten : 1;
  512. // 4-7
  513. uint8_t res1;
  514. uint8_t res2;
  515. uint8_t predCntInit;
  516. uint8_t predCnt;
  517. // 8-11
  518. uint32_t res3;
  519. // 12-59
  520. uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
  521. uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
  522. // 60-63
  523. uint16_t res4;
  524. uint16_t res5;
  525. // 64-67
  526. uint16_t res6;
  527. uint16_t res7;
  528. // 68-71
  529. uint16_t res8;
  530. uint16_t res9;
  531. // 72-75
  532. uint16_t threadId;
  533. uint16_t res10;
  534. // 76-79
  535. uint16_t res11;
  536. uint16_t res12;
  537. // 80-127
  538. uint32_t res13[12];
  539. } rtFftsPlusAtEndCtx_t;
  540. // label context
  541. typedef struct tagFftsPlusLabelCtx {
  542. // 0-3 bytes
  543. uint16_t contextType;
  544. uint8_t successorNum;
  545. uint8_t res1;
  546. // 4-7
  547. uint8_t res2;
  548. uint8_t res3;
  549. uint8_t predCntInit;
  550. uint8_t predCnt;
  551. // 8-11
  552. uint32_t res4;
  553. // 12-63
  554. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  555. // 64-79
  556. uint16_t res5[8];
  557. // 80-127
  558. uint32_t res6[12];
  559. } rtFftsPlusLabelCtx_t;
  560. // case switch context
  561. typedef struct tagFftsPlusCaseSwitchCtx {
  562. // 0-3 bytes
  563. uint16_t contextType;
  564. uint8_t successorNum;
  565. uint8_t resv0 : 7;
  566. uint8_t aten : 1;
  567. // 4-7
  568. uint8_t startLabelId;
  569. uint8_t labelListLen;
  570. uint8_t predCntInit;
  571. uint8_t predCnt;
  572. // 8-11
  573. uint32_t resv1;
  574. // 12-63
  575. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  576. // 64-67
  577. uint16_t resv2 : 15;
  578. uint16_t atm : 1;
  579. uint16_t resv3;
  580. // 68-71
  581. uint32_t resv4;
  582. // 72-75
  583. uint16_t threadId;
  584. uint16_t threadDim;
  585. // 76-79
  586. uint8_t arSize : 3;
  587. uint8_t snoop : 1;
  588. uint8_t resv5 : 4;
  589. uint8_t arCache : 4;
  590. uint8_t arProt : 3;
  591. uint8_t va : 1;
  592. uint16_t resv6;
  593. // 80-83
  594. uint32_t loadAddress0BaseL;
  595. // 84-87
  596. uint32_t loadAddress0BaseH : 17;
  597. uint32_t resv7 : 14;
  598. uint32_t ld0En : 1;
  599. // 88-91
  600. uint32_t loadAddress0Offset;
  601. // 92-95
  602. uint32_t resv8;
  603. // 96-99
  604. uint32_t loadAddress1BaseL;
  605. // 100-103
  606. uint32_t loadAddress1BaseH : 17;
  607. uint32_t resv9 : 14;
  608. uint32_t ld1En : 1;
  609. // 104-107
  610. uint32_t loadAddress1Offset;
  611. // 108-127
  612. uint32_t resv10[5];
  613. } rtFftsPlusCaseSwitchCtx_t;
  614. // case default context
  615. typedef struct tagFftsPlusCaseDefCtx {
  616. // 0-3 bytes
  617. uint16_t contextType;
  618. uint8_t successorNum;
  619. uint8_t rs0 : 7;
  620. uint8_t aten : 1;
  621. // 4-7
  622. uint8_t startLabelId;
  623. uint8_t labelListLen;
  624. uint8_t predCntInit;
  625. uint8_t predCnt;
  626. // 8-11
  627. uint32_t rs1;
  628. // 12-63
  629. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  630. // 64-67
  631. uint16_t rs2;
  632. uint16_t rs3;
  633. // 68-127
  634. uint32_t rs4[15];
  635. } rtFftsPlusCaseDefCtx_t;
  636. // condition switch context
  637. #define RT_CTX_TRUE_SUCCESSOR_NUM 13
  638. #define RT_CTX_FALSE_SUCCESSOR_NUM 13
  639. typedef struct tagFftsPlusCondSwitchCtx {
  640. // 0-3 bytes
  641. uint16_t contextType;
  642. uint8_t trueSuccessorNum;
  643. uint8_t falseSuccessorNum : 7;
  644. uint8_t aten : 1;
  645. // 4-7
  646. uint8_t condition;
  647. uint8_t res1;
  648. uint8_t predCntInit;
  649. uint8_t predCnt;
  650. // 8-11
  651. uint32_t res2;
  652. // 12-63
  653. uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
  654. uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
  655. // 64-67
  656. uint16_t res3 : 15;
  657. uint16_t atm : 1;
  658. uint16_t res4;
  659. // 68-71
  660. uint32_t res5;
  661. // 72-75
  662. uint16_t threadId;
  663. uint16_t threadDim;
  664. // 76-79
  665. uint8_t arSize : 3;
  666. uint8_t snoop : 1;
  667. uint8_t res6 : 4;
  668. uint8_t arCache : 4;
  669. uint8_t arProt : 3;
  670. uint8_t va : 1;
  671. uint16_t res7;
  672. // 80-83
  673. uint32_t loadAddress0BaseL;
  674. // 84-87
  675. uint32_t loadAddress0BaseH : 17;
  676. uint32_t res8 : 14;
  677. uint32_t ld0En : 1;
  678. // 88-91
  679. uint32_t loadAddress0Offset;
  680. // 92-95
  681. uint32_t res9;
  682. // 96-99
  683. uint32_t loadAddress1BaseL;
  684. // 100-103
  685. uint32_t loadAddress1BaseH : 17;
  686. uint32_t res10 : 14;
  687. uint32_t ld1En : 1;
  688. // 104-107
  689. uint32_t loadAddress1Offset;
  690. // 108-127
  691. uint32_t res11[3];
  692. uint32_t cmpValue1;
  693. uint32_t cmpValue2;
  694. } rtFftsPlusCondSwitchCtx_t;
  695. // ffts plus persistent cache context
  696. typedef struct tagFftsPlusPersistentCacheCtx {
  697. // 0- 3bytes
  698. uint16_t contextType;
  699. uint8_t successorNum;
  700. uint8_t res1 : 7;
  701. uint8_t aten : 1;
  702. // 4-7
  703. uint8_t res2[2];
  704. uint8_t predCntInit;
  705. uint8_t predCnt;
  706. // 8-11
  707. uint8_t res3[4];
  708. // 12-63
  709. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  710. // 64-67
  711. uint8_t persistentEnable : 1;
  712. uint8_t res4 : 7;
  713. uint8_t res5;
  714. uint16_t persistentSize;
  715. // 68-71
  716. uint32_t persistentId;
  717. // 72-127
  718. uint32_t res6[14];
  719. } rtFftsPlusPersistentCacheCtx_t;
  720. #pragma pack(pop)
  721. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  722. }
  723. #endif
  724. #endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示