You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ztrsm_kernel_2x2_LT.S 33 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #if !defined(EV4) && !defined(EV5) && !defined(EV6)
  41. #error "Architecture is not specified."
  42. #endif
  43. #ifdef EV6
  44. #define PREFETCHSIZE 56
  45. #define UNOP unop
  46. #endif
  47. #ifdef EV5
  48. #define PREFETCHSIZE 48
  49. #define UNOP
  50. #endif
  51. #ifdef EV4
  52. #define UNOP
  53. #endif
  54. .set noat
  55. .set noreorder
  56. .arch ev6
  57. .text
  58. .align 5
  59. .globl CNAME
  60. .ent CNAME
  61. #define STACKSIZE 80
  62. #define M $16
  63. #define N $17
  64. #define K $18
  65. #define A $21
  66. #define B $22
  67. #define C $20
  68. #define LDC $23
  69. #define C1 $19
  70. #define C2 $24
  71. #define AO $at
  72. #define BO $5
  73. #define I $6
  74. #define J $7
  75. #define L $8
  76. #define a1 $f16
  77. #define a2 $f17
  78. #define a3 $f18
  79. #define a4 $f19
  80. #define b1 $f20
  81. #define b2 $f21
  82. #define b3 $f22
  83. #define b4 $f23
  84. #define t1 $f24
  85. #define t2 $f25
  86. #define t3 $f26
  87. #define t4 $f27
  88. #define a5 $f28
  89. #define a6 $f30
  90. #define b5 $f29
  91. #define alpha_i $f29
  92. #define alpha_r $f30
  93. #define c01 $f0
  94. #define c02 $f1
  95. #define c03 $f2
  96. #define c04 $f3
  97. #define c05 $f4
  98. #define c06 $f5
  99. #define c07 $f6
  100. #define c08 $f7
  101. #define c09 $f8
  102. #define c10 $f9
  103. #define c11 $f10
  104. #define c12 $f11
  105. #define c13 $f12
  106. #define c14 $f13
  107. #define c15 $f14
  108. #define c16 $f15
  109. #define TMP1 $0
  110. #define TMP2 $1
  111. #define KK $2
  112. #define AORIG $3
  113. #define OFFSET $4
  114. #if defined(LN) || defined(LT)
  115. #ifndef CONJ
  116. #define ADD1 ADD
  117. #define ADD2 SUB
  118. #define ADD3 ADD
  119. #define ADD4 ADD
  120. #define ADD5 SUB
  121. #define ADD6 ADD
  122. #else
  123. #define ADD1 ADD
  124. #define ADD2 ADD
  125. #define ADD3 SUB
  126. #define ADD4 ADD
  127. #define ADD5 ADD
  128. #define ADD6 SUB
  129. #endif
  130. #else
  131. #ifndef CONJ
  132. #define ADD1 ADD
  133. #define ADD2 SUB
  134. #define ADD3 ADD
  135. #define ADD4 ADD
  136. #define ADD5 SUB
  137. #define ADD6 ADD
  138. #else
  139. #define ADD1 ADD
  140. #define ADD2 ADD
  141. #define ADD3 ADD
  142. #define ADD4 SUB
  143. #define ADD5 ADD
  144. #define ADD6 SUB
  145. #endif
  146. #endif
  147. CNAME:
  148. .frame $sp, STACKSIZE, $26, 0
  149. #ifdef PROFILE
  150. ldgp $gp, 0($27)
  151. lda $at, _mcount
  152. jsr $at, ($at), _mcount
  153. #endif
  154. #ifndef PROFILE
  155. .prologue 0
  156. #else
  157. .prologue 1
  158. #endif
  159. lda $sp, -STACKSIZE($sp)
  160. ldq B, 0 + STACKSIZE($sp)
  161. ldq C, 8 + STACKSIZE($sp)
  162. ldq LDC, 16 + STACKSIZE($sp)
  163. ldq OFFSET, 24 + STACKSIZE($sp)
  164. sll LDC, ZBASE_SHIFT, LDC
  165. stt $f2, 0($sp)
  166. stt $f3, 8($sp)
  167. stt $f4, 16($sp)
  168. stt $f5, 24($sp)
  169. stt $f6, 32($sp)
  170. stt $f7, 40($sp)
  171. stt $f8, 48($sp)
  172. stt $f9, 56($sp)
  173. cmple M, 0, $0
  174. cmple N, 0, $1
  175. cmple K, 0, $2
  176. or $0, $1, $0
  177. or $0, $2, $0
  178. bne $0, $L999
  179. #ifdef LN
  180. addq M, M, TMP2
  181. mulq TMP2, K, TMP1
  182. SXADDQ TMP1, A, A
  183. SXADDQ TMP2, C, C
  184. #endif
  185. #ifdef RN
  186. negq OFFSET, KK
  187. #endif
  188. #ifdef RT
  189. mulq N, K, TMP1
  190. addq TMP1, TMP1, TMP1
  191. SXADDQ TMP1, B, B
  192. mulq N, LDC, TMP1
  193. addq TMP1, C, C
  194. subq N, OFFSET, KK
  195. #endif
  196. sra N, 1, J
  197. ble J, $L30
  198. .align 4
  199. $L01:
  200. #ifdef RT
  201. sll K, ZBASE_SHIFT + 1, TMP1
  202. subq B, TMP1, B
  203. subq C, LDC, C2
  204. subq C2, LDC, C1
  205. subq C2, LDC, C
  206. #else
  207. mov C, C1
  208. addq C, LDC, C2
  209. addq C2, LDC, C
  210. #endif
  211. #ifdef LN
  212. addq M, OFFSET, KK
  213. #endif
  214. #ifdef LT
  215. mov OFFSET, KK
  216. #endif
  217. #if defined(LN) || defined(RT)
  218. mov A, AORIG
  219. #else
  220. mov A, AO
  221. #endif
  222. sra M, 1, I
  223. fclr t1
  224. fclr t2
  225. fclr t3
  226. fclr t4
  227. fclr c01
  228. fclr c05
  229. ble I, $L20
  230. .align 4
  231. $L11:
  232. #if defined(LT) || defined(RN)
  233. LD a1, 0 * SIZE(AO)
  234. fclr c09
  235. LD a2, 1 * SIZE(AO)
  236. fclr c13
  237. LD a3, 2 * SIZE(AO)
  238. fclr c02
  239. LD a4, 3 * SIZE(AO)
  240. fclr c06
  241. LD b1, 0 * SIZE(B)
  242. fclr c10
  243. LD b2, 1 * SIZE(B)
  244. fclr c14
  245. LD b3, 2 * SIZE(B)
  246. fclr c03
  247. LD b4, 3 * SIZE(B)
  248. fclr c07
  249. lda BO, 4 * SIZE(B)
  250. fclr c11
  251. lda AO, 4 * SIZE(AO)
  252. fclr c15
  253. lds $f31, 4 * SIZE(C1)
  254. fclr c04
  255. lda L, -2(KK)
  256. fclr c08
  257. lds $f31, 4 * SIZE(C2)
  258. fclr c12
  259. fclr c16
  260. ble KK, $L18
  261. ble L, $L15
  262. #else
  263. #ifdef LN
  264. sll K, ZBASE_SHIFT + 1, TMP1
  265. subq AORIG, TMP1, AORIG
  266. #endif
  267. sll KK, ZBASE_SHIFT + 1, TMP1
  268. addq AORIG, TMP1, AO
  269. addq B, TMP1, BO
  270. subq K, KK, TMP1
  271. LD a1, 0 * SIZE(AO)
  272. fclr c09
  273. LD a2, 1 * SIZE(AO)
  274. fclr c13
  275. LD a3, 2 * SIZE(AO)
  276. fclr c02
  277. LD a4, 3 * SIZE(AO)
  278. fclr c06
  279. LD b1, 0 * SIZE(BO)
  280. fclr c10
  281. LD b2, 1 * SIZE(BO)
  282. fclr c14
  283. LD b3, 2 * SIZE(BO)
  284. fclr c03
  285. LD b4, 3 * SIZE(BO)
  286. fclr c07
  287. lda BO, 4 * SIZE(BO)
  288. fclr c11
  289. lda AO, 4 * SIZE(AO)
  290. fclr c15
  291. lds $f31, 4 * SIZE(C1)
  292. fclr c04
  293. lda L, -2(TMP1)
  294. fclr c08
  295. lds $f31, 4 * SIZE(C2)
  296. fclr c12
  297. fclr c16
  298. ble TMP1, $L18
  299. ble L, $L15
  300. #endif
  301. .align 5
  302. $L12:
  303. /* 1 */
  304. ADD1 c11, t1, c11
  305. #ifndef EV4
  306. ldq $31, PREFETCHSIZE * SIZE(AO)
  307. #else
  308. unop
  309. #endif
  310. MUL b1, a1, t1
  311. #ifndef EV4
  312. ldl $31, PREFETCHSIZE * SIZE(BO)
  313. #else
  314. unop
  315. #endif
  316. ADD3 c12, t2, c12
  317. unop
  318. MUL b1, a2, t2
  319. unop
  320. ADD2 c16, t3, c16
  321. unop
  322. MUL b2, a2, t3
  323. LD a5, 0 * SIZE(AO)
  324. ADD4 c15, t4, c15
  325. unop
  326. MUL b2, a1, t4
  327. LD b5, 0 * SIZE(BO)
  328. /* 2 */
  329. ADD1 c01, t1, c01
  330. UNOP
  331. MUL b1, a3, t1
  332. UNOP
  333. ADD3 c02, t2, c02
  334. UNOP
  335. MUL b1, a4, t2
  336. UNOP
  337. ADD2 c06, t3, c06
  338. unop
  339. MUL b2, a4, t3
  340. unop
  341. ADD4 c05, t4, c05
  342. unop
  343. MUL b4, a1, t4
  344. unop
  345. /* 3 */
  346. ADD1 c03, t1, c03
  347. unop
  348. MUL b3, a1, t1
  349. unop
  350. ADD3 c04, t2, c04
  351. unop
  352. MUL b3, a2, t2
  353. unop
  354. ADD2 c08, t3, c08
  355. unop
  356. MUL b4, a2, t3
  357. LD a2, 1 * SIZE(AO)
  358. ADD4 c13, t4, c13
  359. unop
  360. MUL b2, a3, t4
  361. LD b2, 1 * SIZE(BO)
  362. /* 4 */
  363. ADD1 c09, t1, c09
  364. unop
  365. MUL b3, a3, t1
  366. LD a6, 2 * SIZE(AO)
  367. ADD3 c10, t2, c10
  368. unop
  369. MUL b3, a4, t2
  370. LD b3, 2 * SIZE(BO)
  371. ADD2 c14, t3, c14
  372. unop
  373. MUL b4, a4, t3
  374. LD a4, 3 * SIZE(AO)
  375. ADD4 c07, t4, c07
  376. unop
  377. MUL b4, a3, t4
  378. LD b4, 3 * SIZE(BO)
  379. /* 5 */
  380. ADD1 c11, t1, c11
  381. unop
  382. MUL b5, a5, t1
  383. LD a1, 4 * SIZE(AO)
  384. ADD3 c12, t2, c12
  385. lda L, -2(L)
  386. MUL b5, a2, t2
  387. LD b1, 4 * SIZE(BO)
  388. ADD2 c16, t3, c16
  389. unop
  390. MUL b2, a2, t3
  391. unop
  392. ADD4 c15, t4, c15
  393. unop
  394. MUL b2, a5, t4
  395. unop
  396. /* 6 */
  397. ADD1 c01, t1, c01
  398. unop
  399. MUL b5, a6, t1
  400. unop
  401. ADD3 c02, t2, c02
  402. unop
  403. MUL b5, a4, t2
  404. unop
  405. ADD2 c06, t3, c06
  406. unop
  407. MUL b2, a4, t3
  408. unop
  409. ADD4 c05, t4, c05
  410. unop
  411. MUL b4, a5, t4
  412. unop
  413. /* 7 */
  414. ADD1 c03, t1, c03
  415. lda AO, 8 * SIZE(AO)
  416. MUL b3, a5, t1
  417. unop
  418. ADD3 c04, t2, c04
  419. lda BO, 8 * SIZE(BO)
  420. MUL b3, a2, t2
  421. unop
  422. ADD2 c08, t3, c08
  423. unop
  424. MUL b4, a2, t3
  425. LD a2, -3 * SIZE(AO)
  426. ADD4 c13, t4, c13
  427. unop
  428. MUL b2, a6, t4
  429. LD b2, -3 * SIZE(BO)
  430. /* 8 */
  431. ADD1 c09, t1, c09
  432. unop
  433. MUL b3, a6, t1
  434. LD a3, -2 * SIZE(AO)
  435. ADD3 c10, t2, c10
  436. unop
  437. MUL b3, a4, t2
  438. LD b3, -2 * SIZE(BO)
  439. ADD2 c14, t3, c14
  440. unop
  441. MUL b4, a4, t3
  442. LD a4, -1 * SIZE(AO)
  443. ADD4 c07, t4, c07
  444. MUL b4, a6, t4
  445. LD b4, -1 * SIZE(BO)
  446. bgt L, $L12
  447. .align 4
  448. $L15:
  449. ADD1 c11, t1, c11
  450. unop
  451. MUL b1, a1, t1
  452. #if defined(LT) || defined(RN)
  453. blbs KK, $L17
  454. #else
  455. blbs TMP1, $L17
  456. #endif
  457. .align 4
  458. ADD3 c12, t2, c12
  459. MUL b1, a2, t2
  460. ADD2 c16, t3, c16
  461. MUL b2, a2, t3
  462. ADD4 c15, t4, c15
  463. MUL b2, a1, t4
  464. ADD1 c01, t1, c01
  465. MUL b1, a3, t1
  466. ADD3 c02, t2, c02
  467. unop
  468. MUL b1, a4, t2
  469. LD b1, 0 * SIZE(BO)
  470. ADD2 c06, t3, c06
  471. MUL b2, a4, t3
  472. ADD4 c05, t4, c05
  473. MUL b4, a1, t4
  474. ADD1 c03, t1, c03
  475. unop
  476. MUL b3, a1, t1
  477. LD a1, 0 * SIZE(AO)
  478. ADD3 c04, t2, c04
  479. unop
  480. MUL b3, a2, t2
  481. unop
  482. ADD2 c08, t3, c08
  483. unop
  484. MUL b4, a2, t3
  485. LD a2, 1 * SIZE(AO)
  486. ADD4 c13, t4, c13
  487. unop
  488. MUL b2, a3, t4
  489. LD b2, 1 * SIZE(BO)
  490. ADD1 c09, t1, c09
  491. unop
  492. MUL b3, a3, t1
  493. lda AO, 4 * SIZE(AO)
  494. ADD3 c10, t2, c10
  495. unop
  496. MUL b3, a4, t2
  497. LD b3, 2 * SIZE(BO)
  498. ADD2 c14, t3, c14
  499. unop
  500. MUL b4, a4, t3
  501. LD a4, -1 * SIZE(AO)
  502. ADD4 c07, t4, c07
  503. unop
  504. MUL b4, a3, t4
  505. LD a3, -2 * SIZE(AO)
  506. ADD1 c11, t1, c11
  507. LD b4, 3 * SIZE(BO)
  508. MUL b1, a1, t1
  509. lda BO, 4 * SIZE(BO)
  510. .align 4
  511. $L17:
  512. ADD3 c12, t2, c12
  513. MUL b1, a2, t2
  514. ADD2 c16, t3, c16
  515. MUL b2, a2, t3
  516. ADD4 c15, t4, c15
  517. MUL b2, a1, t4
  518. ADD1 c01, t1, c01
  519. MUL b1, a3, t1
  520. ADD3 c02, t2, c02
  521. MUL b1, a4, t2
  522. ADD2 c06, t3, c06
  523. MUL b2, a4, t3
  524. ADD4 c05, t4, c05
  525. MUL b4, a1, t4
  526. ADD1 c03, t1, c03
  527. MUL b3, a1, t1
  528. ADD3 c04, t2, c04
  529. MUL b3, a2, t2
  530. ADD2 c08, t3, c08
  531. MUL b4, a2, t3
  532. ADD4 c13, t4, c13
  533. MUL b2, a3, t4
  534. ADD1 c09, t1, c09
  535. MUL b3, a3, t1
  536. ADD3 c10, t2, c10
  537. MUL b3, a4, t2
  538. ADD2 c14, t3, c14
  539. MUL b4, a4, t3
  540. ADD4 c07, t4, c07
  541. lda AO, 4 * SIZE(AO)
  542. MUL b4, a3, t4
  543. lda BO, 4 * SIZE(BO)
  544. ADD1 c11, t1, c11
  545. ADD3 c12, t2, c12
  546. ADD2 c16, t3, c16
  547. ADD4 c15, t4, c15
  548. ADD c01, c06, c01
  549. ADD c02, c05, c02
  550. ADD c03, c08, c03
  551. ADD c04, c07, c04
  552. ADD c09, c14, c09
  553. ADD c10, c13, c10
  554. ADD c11, c16, c11
  555. ADD c12, c15, c12
  556. .align 4
  557. $L18:
  558. #if defined(LN) || defined(RT)
  559. #ifdef LN
  560. subq KK, 2, TMP1
  561. #else
  562. subq KK, 2, TMP1
  563. #endif
  564. sll TMP1, ZBASE_SHIFT + 1, TMP2
  565. addq AORIG, TMP2, AO
  566. sll TMP1, ZBASE_SHIFT + 1, TMP2
  567. addq B, TMP2, BO
  568. #else
  569. lda AO, -4 * SIZE(AO)
  570. lda BO, -4 * SIZE(BO)
  571. #endif
  572. #if defined(LN) || defined(LT)
  573. LD a1, 0 * SIZE(BO)
  574. LD a2, 1 * SIZE(BO)
  575. LD a3, 2 * SIZE(BO)
  576. LD a4, 3 * SIZE(BO)
  577. LD b1, 4 * SIZE(BO)
  578. LD b2, 5 * SIZE(BO)
  579. LD b3, 6 * SIZE(BO)
  580. LD b4, 7 * SIZE(BO)
  581. SUB a1, c01, c01
  582. SUB a2, c02, c02
  583. SUB a3, c09, c09
  584. SUB a4, c10, c10
  585. SUB b1, c03, c03
  586. SUB b2, c04, c04
  587. SUB b3, c11, c11
  588. SUB b4, c12, c12
  589. #else
  590. LD a1, 0 * SIZE(AO)
  591. LD a2, 1 * SIZE(AO)
  592. LD a3, 2 * SIZE(AO)
  593. LD a4, 3 * SIZE(AO)
  594. LD b1, 4 * SIZE(AO)
  595. LD b2, 5 * SIZE(AO)
  596. LD b3, 6 * SIZE(AO)
  597. LD b4, 7 * SIZE(AO)
  598. SUB a1, c01, c01
  599. SUB a2, c02, c02
  600. SUB a3, c03, c03
  601. SUB a4, c04, c04
  602. SUB b1, c09, c09
  603. SUB b2, c10, c10
  604. SUB b3, c11, c11
  605. SUB b4, c12, c12
  606. #endif
  607. #ifdef LN
  608. LD a1, 6 * SIZE(AO)
  609. LD a2, 7 * SIZE(AO)
  610. LD a3, 4 * SIZE(AO)
  611. LD a4, 5 * SIZE(AO)
  612. MUL a2, c04, t1
  613. MUL a2, c03, t2
  614. MUL a2, c12, t3
  615. MUL a2, c11, t4
  616. MUL a1, c03, c03
  617. MUL a1, c04, c04
  618. MUL a1, c11, c11
  619. MUL a1, c12, c12
  620. ADD5 c03, t1, c03
  621. ADD6 c04, t2, c04
  622. ADD5 c11, t3, c11
  623. ADD6 c12, t4, c12
  624. MUL a3, c03, t1
  625. MUL a3, c04, t2
  626. MUL a3, c11, t3
  627. MUL a3, c12, t4
  628. SUB c01, t1, c01
  629. SUB c02, t2, c02
  630. SUB c09, t3, c09
  631. SUB c10, t4, c10
  632. MUL a4, c04, t1
  633. MUL a4, c03, t2
  634. MUL a4, c12, t3
  635. MUL a4, c11, t4
  636. ADD6 c01, t1, c01
  637. ADD5 c02, t2, c02
  638. ADD6 c09, t3, c09
  639. ADD5 c10, t4, c10
  640. LD a1, 0 * SIZE(AO)
  641. LD a2, 1 * SIZE(AO)
  642. MUL a2, c02, t1
  643. MUL a2, c01, t2
  644. MUL a2, c10, t3
  645. MUL a2, c09, t4
  646. MUL a1, c01, c01
  647. MUL a1, c02, c02
  648. MUL a1, c09, c09
  649. MUL a1, c10, c10
  650. ADD5 c01, t1, c01
  651. ADD6 c02, t2, c02
  652. ADD5 c09, t3, c09
  653. ADD6 c10, t4, c10
  654. #endif
  655. #ifdef LT
  656. LD a1, 0 * SIZE(AO)
  657. LD a2, 1 * SIZE(AO)
  658. LD a3, 2 * SIZE(AO)
  659. LD a4, 3 * SIZE(AO)
  660. MUL a2, c02, t1
  661. MUL a2, c01, t2
  662. MUL a2, c10, t3
  663. MUL a2, c09, t4
  664. MUL a1, c01, c01
  665. MUL a1, c02, c02
  666. MUL a1, c09, c09
  667. MUL a1, c10, c10
  668. ADD5 c01, t1, c01
  669. ADD6 c02, t2, c02
  670. ADD5 c09, t3, c09
  671. ADD6 c10, t4, c10
  672. MUL a3, c01, t1
  673. MUL a3, c02, t2
  674. MUL a3, c09, t3
  675. MUL a3, c10, t4
  676. SUB c03, t1, c03
  677. SUB c04, t2, c04
  678. SUB c11, t3, c11
  679. SUB c12, t4, c12
  680. MUL a4, c02, t1
  681. MUL a4, c01, t2
  682. MUL a4, c10, t3
  683. MUL a4, c09, t4
  684. ADD6 c03, t1, c03
  685. ADD5 c04, t2, c04
  686. ADD6 c11, t3, c11
  687. ADD5 c12, t4, c12
  688. LD a1, 6 * SIZE(AO)
  689. LD a2, 7 * SIZE(AO)
  690. MUL a2, c04, t1
  691. MUL a2, c03, t2
  692. MUL a2, c12, t3
  693. MUL a2, c11, t4
  694. MUL a1, c03, c03
  695. MUL a1, c04, c04
  696. MUL a1, c11, c11
  697. MUL a1, c12, c12
  698. ADD5 c03, t1, c03
  699. ADD6 c04, t2, c04
  700. ADD5 c11, t3, c11
  701. ADD6 c12, t4, c12
  702. #endif
  703. #ifdef RN
  704. LD a1, 0 * SIZE(BO)
  705. LD a2, 1 * SIZE(BO)
  706. LD a3, 2 * SIZE(BO)
  707. LD a4, 3 * SIZE(BO)
  708. MUL a2, c02, t1
  709. MUL a2, c01, t2
  710. MUL a2, c04, t3
  711. MUL a2, c03, t4
  712. MUL a1, c01, c01
  713. MUL a1, c02, c02
  714. MUL a1, c03, c03
  715. MUL a1, c04, c04
  716. ADD5 c01, t1, c01
  717. ADD6 c02, t2, c02
  718. ADD5 c03, t3, c03
  719. ADD6 c04, t4, c04
  720. MUL a3, c01, t1
  721. MUL a3, c02, t2
  722. MUL a3, c03, t3
  723. MUL a3, c04, t4
  724. SUB c09, t1, c09
  725. SUB c10, t2, c10
  726. SUB c11, t3, c11
  727. SUB c12, t4, c12
  728. MUL a4, c02, t1
  729. MUL a4, c01, t2
  730. MUL a4, c04, t3
  731. MUL a4, c03, t4
  732. ADD6 c09, t1, c09
  733. ADD5 c10, t2, c10
  734. ADD6 c11, t3, c11
  735. ADD5 c12, t4, c12
  736. LD a1, 6 * SIZE(BO)
  737. LD a2, 7 * SIZE(BO)
  738. MUL a2, c10, t1
  739. MUL a2, c09, t2
  740. MUL a2, c12, t3
  741. MUL a2, c11, t4
  742. MUL a1, c09, c09
  743. MUL a1, c10, c10
  744. MUL a1, c11, c11
  745. MUL a1, c12, c12
  746. ADD5 c09, t1, c09
  747. ADD6 c10, t2, c10
  748. ADD5 c11, t3, c11
  749. ADD6 c12, t4, c12
  750. #endif
  751. #ifdef RT
  752. LD a1, 6 * SIZE(BO)
  753. LD a2, 7 * SIZE(BO)
  754. LD a3, 4 * SIZE(BO)
  755. LD a4, 5 * SIZE(BO)
  756. MUL a2, c10, t1
  757. MUL a2, c09, t2
  758. MUL a2, c12, t3
  759. MUL a2, c11, t4
  760. MUL a1, c09, c09
  761. MUL a1, c10, c10
  762. MUL a1, c11, c11
  763. MUL a1, c12, c12
  764. ADD5 c09, t1, c09
  765. ADD6 c10, t2, c10
  766. ADD5 c11, t3, c11
  767. ADD6 c12, t4, c12
  768. MUL a3, c09, t1
  769. MUL a3, c10, t2
  770. MUL a3, c11, t3
  771. MUL a3, c12, t4
  772. SUB c01, t1, c01
  773. SUB c02, t2, c02
  774. SUB c03, t3, c03
  775. SUB c04, t4, c04
  776. MUL a4, c10, t1
  777. MUL a4, c09, t2
  778. MUL a4, c12, t3
  779. MUL a4, c11, t4
  780. ADD6 c01, t1, c01
  781. ADD5 c02, t2, c02
  782. ADD6 c03, t3, c03
  783. ADD5 c04, t4, c04
  784. LD a1, 0 * SIZE(BO)
  785. LD a2, 1 * SIZE(BO)
  786. MUL a2, c02, t1
  787. MUL a2, c01, t2
  788. MUL a2, c04, t3
  789. MUL a2, c03, t4
  790. MUL a1, c01, c01
  791. MUL a1, c02, c02
  792. MUL a1, c03, c03
  793. MUL a1, c04, c04
  794. ADD5 c01, t1, c01
  795. ADD6 c02, t2, c02
  796. ADD5 c03, t3, c03
  797. ADD6 c04, t4, c04
  798. #endif
  799. #if defined(LN) || defined(LT)
  800. ST c01, 0 * SIZE(BO)
  801. ST c02, 1 * SIZE(BO)
  802. ST c09, 2 * SIZE(BO)
  803. ST c10, 3 * SIZE(BO)
  804. ST c03, 4 * SIZE(BO)
  805. ST c04, 5 * SIZE(BO)
  806. ST c11, 6 * SIZE(BO)
  807. ST c12, 7 * SIZE(BO)
  808. #else
  809. ST c01, 0 * SIZE(AO)
  810. ST c02, 1 * SIZE(AO)
  811. ST c03, 2 * SIZE(AO)
  812. ST c04, 3 * SIZE(AO)
  813. ST c09, 4 * SIZE(AO)
  814. ST c10, 5 * SIZE(AO)
  815. ST c11, 6 * SIZE(AO)
  816. ST c12, 7 * SIZE(AO)
  817. #endif
  818. #ifdef LN
  819. lda C1, -4 * SIZE(C1)
  820. lda C2, -4 * SIZE(C2)
  821. #endif
  822. ST c01, 0 * SIZE(C1)
  823. ST c02, 1 * SIZE(C1)
  824. ST c03, 2 * SIZE(C1)
  825. ST c04, 3 * SIZE(C1)
  826. ST c09, 0 * SIZE(C2)
  827. ST c10, 1 * SIZE(C2)
  828. ST c11, 2 * SIZE(C2)
  829. ST c12, 3 * SIZE(C2)
  830. #ifndef LN
  831. lda C1, 4 * SIZE(C1)
  832. lda C2, 4 * SIZE(C2)
  833. #endif
  834. fclr t1
  835. fclr t2
  836. fclr t3
  837. fclr t4
  838. #ifdef RT
  839. sll K, ZBASE_SHIFT + 1, TMP1
  840. addq AORIG, TMP1, AORIG
  841. #endif
  842. #if defined(LT) || defined(RN)
  843. subq K, KK, TMP1
  844. sll TMP1, ZBASE_SHIFT + 1, TMP1
  845. addq AO, TMP1, AO
  846. addq BO, TMP1, BO
  847. #endif
  848. #ifdef LT
  849. addq KK, 2, KK
  850. #endif
  851. #ifdef LN
  852. subq KK, 2, KK
  853. #endif
  854. fclr c01
  855. fclr c05
  856. lda I, -1(I)
  857. bgt I, $L11
  858. .align 4
  859. $L20:
  860. and M, 1, I
  861. ble I, $L29
  862. #if defined(LT) || defined(RN)
  863. LD a1, 0 * SIZE(AO)
  864. fclr c09
  865. LD a2, 1 * SIZE(AO)
  866. fclr c13
  867. LD a3, 2 * SIZE(AO)
  868. fclr c02
  869. LD a4, 3 * SIZE(AO)
  870. fclr c06
  871. LD b1, 0 * SIZE(B)
  872. fclr c10
  873. LD b2, 1 * SIZE(B)
  874. fclr c14
  875. LD b3, 2 * SIZE(B)
  876. lda AO, 2 * SIZE(AO)
  877. LD b4, 3 * SIZE(B)
  878. lda BO, 4 * SIZE(B)
  879. lda L, -2(KK)
  880. ble KK, $L28
  881. ble L, $L25
  882. #else
  883. #ifdef LN
  884. sll K, ZBASE_SHIFT + 0, TMP1
  885. subq AORIG, TMP1, AORIG
  886. #endif
  887. sll KK, ZBASE_SHIFT + 0, TMP1
  888. addq AORIG, TMP1, AO
  889. sll KK, ZBASE_SHIFT + 1, TMP1
  890. addq B, TMP1, BO
  891. subq K, KK, TMP1
  892. LD a1, 0 * SIZE(AO)
  893. fclr c09
  894. LD a2, 1 * SIZE(AO)
  895. fclr c13
  896. LD a3, 2 * SIZE(AO)
  897. fclr c02
  898. LD a4, 3 * SIZE(AO)
  899. fclr c06
  900. LD b1, 0 * SIZE(BO)
  901. fclr c10
  902. LD b2, 1 * SIZE(BO)
  903. fclr c14
  904. LD b3, 2 * SIZE(BO)
  905. lda AO, 2 * SIZE(AO)
  906. LD b4, 3 * SIZE(BO)
  907. lda BO, 4 * SIZE(BO)
  908. lda L, -2(TMP1)
  909. ble TMP1, $L28
  910. ble L, $L25
  911. #endif
  912. .align 5
  913. $L22:
  914. ADD1 c09, t1, c09
  915. unop
  916. MUL a1, b1, t1
  917. unop
  918. ADD3 c10, t2, c10
  919. unop
  920. MUL a2, b1, t2
  921. LD b1, 0 * SIZE(BO)
  922. ADD4 c13, t3, c13
  923. unop
  924. MUL a1, b2, t3
  925. lda BO, 8 * SIZE(BO)
  926. ADD2 c14, t4, c14
  927. unop
  928. MUL a2, b2, t4
  929. LD b2, -7 * SIZE(BO)
  930. ADD1 c01, t1, c01
  931. unop
  932. MUL a1, b3, t1
  933. unop
  934. ADD3 c02, t2, c02
  935. unop
  936. MUL a2, b3, t2
  937. LD b3, -6 * SIZE(BO)
  938. ADD4 c05, t3, c05
  939. unop
  940. MUL a1, b4, t3
  941. LD a1, 2 * SIZE(AO)
  942. ADD2 c06, t4, c06
  943. MUL a2, b4, t4
  944. LD b5, -5 * SIZE(BO)
  945. ADD1 c09, t1, c09
  946. unop
  947. MUL a3, b1, t1
  948. LD a2, 3 * SIZE(AO)
  949. ADD3 c10, t2, c10
  950. unop
  951. MUL a4, b1, t2
  952. LD b1, -4 * SIZE(BO)
  953. ADD4 c13, t3, c13
  954. unop
  955. MUL a3, b2, t3
  956. lda AO, 4 * SIZE(AO)
  957. ADD2 c14, t4, c14
  958. MUL a4, b2, t4
  959. LD b2, -3 * SIZE(BO)
  960. ADD1 c01, t1, c01
  961. lda L, -2(L)
  962. MUL a3, b3, t1
  963. LD b4, -1 * SIZE(BO)
  964. ADD3 c02, t2, c02
  965. unop
  966. MUL a4, b3, t2
  967. LD b3, -2 * SIZE(BO)
  968. ADD4 c05, t3, c05
  969. unop
  970. MUL a3, b5, t3
  971. LD a3, 0 * SIZE(AO)
  972. ADD2 c06, t4, c06
  973. MUL a4, b5, t4
  974. LD a4, 1 * SIZE(AO)
  975. bgt L, $L22
  976. .align 4
  977. $L25:
  978. ADD1 c09, t1, c09
  979. MUL a1, b1, t1
  980. #if defined(LT) || defined(RN)
  981. blbs KK, $L27
  982. #else
  983. blbs TMP1, $L27
  984. #endif
  985. .align 4
  986. ADD3 c10, t2, c10
  987. unop
  988. MUL a2, b1, t2
  989. LD b1, 0 * SIZE(BO)
  990. ADD4 c13, t3, c13
  991. unop
  992. MUL a1, b2, t3
  993. unop
  994. ADD2 c14, t4, c14
  995. unop
  996. MUL a2, b2, t4
  997. LD b2, 1 * SIZE(BO)
  998. ADD1 c01, t1, c01
  999. unop
  1000. MUL a1, b3, t1
  1001. lda AO, 2 * SIZE(AO)
  1002. ADD3 c02, t2, c02
  1003. unop
  1004. MUL a2, b3, t2
  1005. LD b3, 2 * SIZE(BO)
  1006. ADD4 c05, t3, c05
  1007. unop
  1008. MUL a1, b4, t3
  1009. LD a1, -2 * SIZE(AO)
  1010. ADD2 c06, t4, c06
  1011. unop
  1012. MUL a2, b4, t4
  1013. LD a2, -1 * SIZE(AO)
  1014. ADD1 c09, t1, c09
  1015. LD b4, 3 * SIZE(BO)
  1016. MUL a1, b1, t1
  1017. lda BO, 4 * SIZE(BO)
  1018. .align 4
  1019. $L27:
  1020. ADD3 c10, t2, c10
  1021. MUL a2, b1, t2
  1022. ADD4 c13, t3, c13
  1023. MUL a1, b2, t3
  1024. ADD2 c14, t4, c14
  1025. MUL a2, b2, t4
  1026. ADD1 c01, t1, c01
  1027. MUL a1, b3, t1
  1028. ADD3 c02, t2, c02
  1029. MUL a2, b3, t2
  1030. ADD4 c05, t3, c05
  1031. MUL a1, b4, t3
  1032. ADD2 c06, t4, c06
  1033. lda AO, 2 * SIZE(AO)
  1034. MUL a2, b4, t4
  1035. lda BO, 4 * SIZE(BO)
  1036. ADD1 c09, t1, c09
  1037. ADD3 c10, t2, c10
  1038. ADD4 c13, t3, c13
  1039. ADD2 c14, t4, c14
  1040. ADD c01, c06, c01
  1041. ADD c02, c05, c02
  1042. ADD c09, c14, c09
  1043. ADD c10, c13, c10
  1044. .align 4
  1045. $L28:
  1046. #if defined(LN) || defined(RT)
  1047. #ifdef LN
  1048. subq KK, 1, TMP1
  1049. #else
  1050. subq KK, 2, TMP1
  1051. #endif
  1052. sll TMP1, ZBASE_SHIFT + 0, TMP2
  1053. addq AORIG, TMP2, AO
  1054. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1055. addq B, TMP2, BO
  1056. #else
  1057. lda AO, -2 * SIZE(AO)
  1058. lda BO, -4 * SIZE(BO)
  1059. #endif
  1060. #if defined(LN) || defined(LT)
  1061. LD a1, 0 * SIZE(BO)
  1062. LD a2, 1 * SIZE(BO)
  1063. LD a3, 2 * SIZE(BO)
  1064. LD a4, 3 * SIZE(BO)
  1065. SUB a1, c01, c01
  1066. SUB a2, c02, c02
  1067. SUB a3, c09, c09
  1068. SUB a4, c10, c10
  1069. #else
  1070. LD a1, 0 * SIZE(AO)
  1071. LD a2, 1 * SIZE(AO)
  1072. LD a3, 2 * SIZE(AO)
  1073. LD a4, 3 * SIZE(AO)
  1074. SUB a1, c01, c01
  1075. SUB a2, c02, c02
  1076. SUB a3, c09, c09
  1077. SUB a4, c10, c10
  1078. #endif
  1079. #if defined(LN) || defined(LT)
  1080. LD a1, 0 * SIZE(AO)
  1081. LD a2, 1 * SIZE(AO)
  1082. MUL a2, c02, t1
  1083. MUL a2, c01, t2
  1084. MUL a2, c10, t3
  1085. MUL a2, c09, t4
  1086. MUL a1, c01, c01
  1087. MUL a1, c02, c02
  1088. MUL a1, c09, c09
  1089. MUL a1, c10, c10
  1090. ADD5 c01, t1, c01
  1091. ADD6 c02, t2, c02
  1092. ADD5 c09, t3, c09
  1093. ADD6 c10, t4, c10
  1094. #endif
  1095. #ifdef RN
  1096. LD a1, 0 * SIZE(BO)
  1097. LD a2, 1 * SIZE(BO)
  1098. LD a3, 2 * SIZE(BO)
  1099. LD a4, 3 * SIZE(BO)
  1100. MUL a2, c02, t1
  1101. MUL a2, c01, t2
  1102. MUL a1, c01, c01
  1103. MUL a1, c02, c02
  1104. ADD5 c01, t1, c01
  1105. ADD6 c02, t2, c02
  1106. MUL a3, c01, t1
  1107. MUL a3, c02, t2
  1108. SUB c09, t1, c09
  1109. SUB c10, t2, c10
  1110. MUL a4, c02, t1
  1111. MUL a4, c01, t2
  1112. ADD6 c09, t1, c09
  1113. ADD5 c10, t2, c10
  1114. LD a1, 6 * SIZE(BO)
  1115. LD a2, 7 * SIZE(BO)
  1116. MUL a2, c10, t1
  1117. MUL a2, c09, t2
  1118. MUL a1, c09, c09
  1119. MUL a1, c10, c10
  1120. ADD5 c09, t1, c09
  1121. ADD6 c10, t2, c10
  1122. #endif
  1123. #ifdef RT
  1124. LD a1, 6 * SIZE(BO)
  1125. LD a2, 7 * SIZE(BO)
  1126. LD a3, 4 * SIZE(BO)
  1127. LD a4, 5 * SIZE(BO)
  1128. MUL a2, c10, t1
  1129. MUL a2, c09, t2
  1130. MUL a1, c09, c09
  1131. MUL a1, c10, c10
  1132. ADD5 c09, t1, c09
  1133. ADD6 c10, t2, c10
  1134. MUL a3, c09, t1
  1135. MUL a3, c10, t2
  1136. SUB c01, t1, c01
  1137. SUB c02, t2, c02
  1138. MUL a4, c10, t1
  1139. MUL a4, c09, t2
  1140. ADD6 c01, t1, c01
  1141. ADD5 c02, t2, c02
  1142. LD a1, 0 * SIZE(BO)
  1143. LD a2, 1 * SIZE(BO)
  1144. MUL a2, c02, t1
  1145. MUL a2, c01, t2
  1146. MUL a1, c01, c01
  1147. MUL a1, c02, c02
  1148. ADD5 c01, t1, c01
  1149. ADD6 c02, t2, c02
  1150. #endif
  1151. #if defined(LN) || defined(LT)
  1152. ST c01, 0 * SIZE(BO)
  1153. ST c02, 1 * SIZE(BO)
  1154. ST c09, 2 * SIZE(BO)
  1155. ST c10, 3 * SIZE(BO)
  1156. #else
  1157. ST c01, 0 * SIZE(AO)
  1158. ST c02, 1 * SIZE(AO)
  1159. ST c09, 2 * SIZE(AO)
  1160. ST c10, 3 * SIZE(AO)
  1161. #endif
  1162. #ifdef LN
  1163. lda C1, -2 * SIZE(C1)
  1164. lda C2, -2 * SIZE(C2)
  1165. #endif
  1166. ST c01, 0 * SIZE(C1)
  1167. ST c02, 1 * SIZE(C1)
  1168. ST c09, 0 * SIZE(C2)
  1169. ST c10, 1 * SIZE(C2)
  1170. #ifndef LN
  1171. lda C1, 2 * SIZE(C1)
  1172. lda C2, 2 * SIZE(C2)
  1173. #endif
  1174. #ifdef RT
  1175. sll K, ZBASE_SHIFT, TMP1
  1176. addq AORIG, TMP1, AORIG
  1177. #endif
  1178. #if defined(LT) || defined(RN)
  1179. subq K, KK, TMP1
  1180. sll TMP1, ZBASE_SHIFT + 0, TMP2
  1181. addq AO, TMP2, AO
  1182. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1183. addq BO, TMP2, BO
  1184. #endif
  1185. #ifdef LT
  1186. addq KK, 1, KK
  1187. #endif
  1188. #ifdef LN
  1189. subq KK, 1, KK
  1190. #endif
  1191. .align 4
  1192. $L29:
  1193. #ifdef LN
  1194. sll K, ZBASE_SHIFT + 1, TMP1
  1195. addq B, TMP1, B
  1196. #endif
  1197. #if defined(LT) || defined(RN)
  1198. mov BO, B
  1199. #endif
  1200. #ifdef RN
  1201. addq KK, 2, KK
  1202. #endif
  1203. #ifdef RT
  1204. subq KK, 2, KK
  1205. #endif
  1206. lda J, -1(J)
  1207. bgt J, $L01
  1208. .align 4
  1209. $L30:
  1210. and N, 1, J
  1211. ble J, $L999
  1212. #ifdef RT
  1213. sll K, ZBASE_SHIFT, TMP1
  1214. subq B, TMP1, B
  1215. subq C, LDC, C1
  1216. subq C, LDC, C
  1217. #else
  1218. mov C, C1
  1219. addq C, LDC, C
  1220. #endif
  1221. #ifdef LN
  1222. addq M, OFFSET, KK
  1223. #endif
  1224. #ifdef LT
  1225. mov OFFSET, KK
  1226. #endif
  1227. #if defined(LN) || defined(RT)
  1228. mov A, AORIG
  1229. #else
  1230. mov A, AO
  1231. #endif
  1232. sra M, 1, I
  1233. ble I, $L50
  1234. .align 4
  1235. $L41:
  1236. #if defined(LT) || defined(RN)
  1237. LD a1, 0 * SIZE(AO)
  1238. fclr t1
  1239. LD a2, 1 * SIZE(AO)
  1240. fclr t2
  1241. LD a3, 2 * SIZE(AO)
  1242. fclr t3
  1243. LD a4, 3 * SIZE(AO)
  1244. fclr t4
  1245. LD b1, 0 * SIZE(B)
  1246. fclr c01
  1247. LD b2, 1 * SIZE(B)
  1248. fclr c05
  1249. LD b3, 2 * SIZE(B)
  1250. fclr c02
  1251. LD b4, 3 * SIZE(B)
  1252. fclr c06
  1253. lda BO, 2 * SIZE(B)
  1254. fclr c03
  1255. lda AO, 4 * SIZE(AO)
  1256. fclr c07
  1257. lda L, -2(KK)
  1258. fclr c04
  1259. fclr c08
  1260. ble KK, $L48
  1261. ble L, $L45
  1262. #else
  1263. #ifdef LN
  1264. sll K, ZBASE_SHIFT + 1, TMP1
  1265. subq AORIG, TMP1, AORIG
  1266. #endif
  1267. sll KK, ZBASE_SHIFT + 1, TMP1
  1268. addq AORIG, TMP1, AO
  1269. sll KK, ZBASE_SHIFT, TMP1
  1270. addq B, TMP1, BO
  1271. subq K, KK, TMP1
  1272. LD a1, 0 * SIZE(AO)
  1273. fclr t1
  1274. LD a2, 1 * SIZE(AO)
  1275. fclr t2
  1276. LD a3, 2 * SIZE(AO)
  1277. fclr t3
  1278. LD a4, 3 * SIZE(AO)
  1279. fclr t4
  1280. LD b1, 0 * SIZE(BO)
  1281. fclr c01
  1282. LD b2, 1 * SIZE(BO)
  1283. fclr c05
  1284. LD b3, 2 * SIZE(BO)
  1285. fclr c02
  1286. LD b4, 3 * SIZE(BO)
  1287. fclr c06
  1288. lda BO, 2 * SIZE(BO)
  1289. fclr c03
  1290. lda AO, 4 * SIZE(AO)
  1291. fclr c07
  1292. lda L, -2(TMP1)
  1293. fclr c04
  1294. fclr c08
  1295. ble TMP1, $L48
  1296. ble L, $L45
  1297. #endif
  1298. .align 5
  1299. $L42:
  1300. ADD4 c05, t1, c05
  1301. unop
  1302. MUL a1, b1, t1
  1303. unop
  1304. ADD2 c06, t2, c06
  1305. lda L, -2(L)
  1306. MUL a2, b1, t2
  1307. unop
  1308. ADD4 c07, t3, c07
  1309. unop
  1310. MUL a3, b1, t3
  1311. unop
  1312. ADD2 c08, t4, c08
  1313. unop
  1314. MUL a4, b1, t4
  1315. LD b1, 2 * SIZE(BO)
  1316. ADD1 c01, t1, c01
  1317. unop
  1318. MUL a1, b2, t1
  1319. LD a1, 0 * SIZE(AO)
  1320. ADD3 c02, t2, c02
  1321. lda BO, 4 * SIZE(BO)
  1322. MUL a2, b2, t2
  1323. LD a2, 1 * SIZE(AO)
  1324. ADD1 c03, t3, c03
  1325. unop
  1326. MUL a3, b2, t3
  1327. LD a3, 2 * SIZE(AO)
  1328. ADD3 c04, t4, c04
  1329. unop
  1330. MUL a4, b2, t4
  1331. LD a5, 3 * SIZE(AO)
  1332. ADD4 c05, t1, c05
  1333. unop
  1334. MUL a1, b3, t1
  1335. LD b2, -1 * SIZE(BO)
  1336. ADD2 c06, t2, c06
  1337. unop
  1338. MUL a2, b3, t2
  1339. unop
  1340. ADD4 c07, t3, c07
  1341. unop
  1342. MUL a3, b3, t3
  1343. lda AO, 8 * SIZE(AO)
  1344. ADD2 c08, t4, c08
  1345. unop
  1346. MUL a5, b3, t4
  1347. LD b3, 0 * SIZE(BO)
  1348. ADD1 c01, t1, c01
  1349. unop
  1350. MUL a1, b4, t1
  1351. LD a1, -4 * SIZE(AO)
  1352. ADD3 c02, t2, c02
  1353. unop
  1354. MUL a2, b4, t2
  1355. LD a2, -3 * SIZE(AO)
  1356. ADD1 c03, t3, c03
  1357. LD a4, -1 * SIZE(AO)
  1358. MUL a3, b4, t3
  1359. LD a3, -2 * SIZE(AO)
  1360. ADD3 c04, t4, c04
  1361. MUL a5, b4, t4
  1362. LD b4, 1 * SIZE(BO)
  1363. bgt L, $L42
  1364. .align 4
  1365. $L45:
  1366. ADD4 c05, t1, c05
  1367. MUL b1, a1, t1
  1368. #if defined(LT) || defined(RN)
  1369. blbs KK, $L47
  1370. #else
  1371. blbs TMP1, $L47
  1372. #endif
  1373. .align 4
  1374. ADD2 c06, t2, c06
  1375. MUL a2, b1, t2
  1376. ADD4 c07, t3, c07
  1377. MUL a3, b1, t3
  1378. ADD2 c08, t4, c08
  1379. unop
  1380. MUL a4, b1, t4
  1381. LD b1, 0 * SIZE(BO)
  1382. ADD1 c01, t1, c01
  1383. unop
  1384. MUL a1, b2, t1
  1385. LD a1, 0 * SIZE(AO)
  1386. ADD3 c02, t2, c02
  1387. unop
  1388. MUL a2, b2, t2
  1389. LD a2, 1 * SIZE(AO)
  1390. ADD1 c03, t3, c03
  1391. unop
  1392. MUL a3, b2, t3
  1393. LD a3, 2 * SIZE(AO)
  1394. ADD3 c04, t4, c04
  1395. MUL a4, b2, t4
  1396. LD a4, 3 * SIZE(AO)
  1397. lda AO, 4 * SIZE(AO)
  1398. ADD4 c05, t1, c05
  1399. LD b2, 1 * SIZE(BO)
  1400. MUL a1, b1, t1
  1401. lda BO, 2 * SIZE(BO)
  1402. .align 4
  1403. $L47:
  1404. ADD2 c06, t2, c06
  1405. MUL a2, b1, t2
  1406. ADD4 c07, t3, c07
  1407. MUL a3, b1, t3
  1408. ADD2 c08, t4, c08
  1409. MUL a4, b1, t4
  1410. ADD1 c01, t1, c01
  1411. MUL a1, b2, t1
  1412. ADD3 c02, t2, c02
  1413. MUL a2, b2, t2
  1414. ADD1 c03, t3, c03
  1415. MUL a3, b2, t3
  1416. ADD3 c04, t4, c04
  1417. lda AO, 4 * SIZE(AO)
  1418. MUL a4, b2, t4
  1419. lda BO, 2 * SIZE(BO)
  1420. ADD4 c05, t1, c05
  1421. ADD2 c06, t2, c06
  1422. ADD4 c07, t3, c07
  1423. ADD2 c08, t4, c08
  1424. ADD c01, c06, c01
  1425. ADD c02, c05, c02
  1426. ADD c03, c08, c03
  1427. ADD c04, c07, c04
  1428. $L48:
  1429. #if defined(LN) || defined(RT)
  1430. #ifdef LN
  1431. subq KK, 2, TMP1
  1432. #else
  1433. subq KK, 1, TMP1
  1434. #endif
  1435. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1436. addq AORIG, TMP2, AO
  1437. sll TMP1, ZBASE_SHIFT, TMP2
  1438. addq B, TMP2, BO
  1439. #else
  1440. lda AO, -4 * SIZE(AO)
  1441. lda BO, -2 * SIZE(BO)
  1442. #endif
  1443. #if defined(LN) || defined(LT)
  1444. LD a1, 0 * SIZE(BO)
  1445. LD a2, 1 * SIZE(BO)
  1446. LD a3, 2 * SIZE(BO)
  1447. LD a4, 3 * SIZE(BO)
  1448. SUB a1, c01, c01
  1449. SUB a2, c02, c02
  1450. SUB a3, c03, c03
  1451. SUB a4, c04, c04
  1452. #else
  1453. LD a1, 0 * SIZE(AO)
  1454. LD a2, 1 * SIZE(AO)
  1455. LD a3, 2 * SIZE(AO)
  1456. LD a4, 3 * SIZE(AO)
  1457. SUB a1, c01, c01
  1458. SUB a2, c02, c02
  1459. SUB a3, c03, c03
  1460. SUB a4, c04, c04
  1461. #endif
  1462. #ifdef LN
  1463. LD a1, 6 * SIZE(AO)
  1464. LD a2, 7 * SIZE(AO)
  1465. LD a3, 4 * SIZE(AO)
  1466. LD a4, 5 * SIZE(AO)
  1467. MUL a2, c04, t1
  1468. MUL a2, c03, t2
  1469. MUL a1, c03, c03
  1470. MUL a1, c04, c04
  1471. ADD5 c03, t1, c03
  1472. ADD6 c04, t2, c04
  1473. MUL a3, c03, t1
  1474. MUL a3, c04, t2
  1475. SUB c01, t1, c01
  1476. SUB c02, t2, c02
  1477. MUL a4, c04, t1
  1478. MUL a4, c03, t2
  1479. ADD6 c01, t1, c01
  1480. ADD5 c02, t2, c02
  1481. LD a1, 0 * SIZE(AO)
  1482. LD a2, 1 * SIZE(AO)
  1483. MUL a2, c02, t1
  1484. MUL a2, c01, t2
  1485. MUL a1, c01, c01
  1486. MUL a1, c02, c02
  1487. ADD5 c01, t1, c01
  1488. ADD6 c02, t2, c02
  1489. #endif
  1490. #ifdef LT
  1491. LD a1, 0 * SIZE(AO)
  1492. LD a2, 1 * SIZE(AO)
  1493. LD a3, 2 * SIZE(AO)
  1494. LD a4, 3 * SIZE(AO)
  1495. MUL a2, c02, t1
  1496. MUL a2, c01, t2
  1497. MUL a1, c01, c01
  1498. MUL a1, c02, c02
  1499. ADD5 c01, t1, c01
  1500. ADD6 c02, t2, c02
  1501. MUL a3, c01, t1
  1502. MUL a3, c02, t2
  1503. SUB c03, t1, c03
  1504. SUB c04, t2, c04
  1505. MUL a4, c02, t1
  1506. MUL a4, c01, t2
  1507. ADD6 c03, t1, c03
  1508. ADD5 c04, t2, c04
  1509. LD a1, 6 * SIZE(AO)
  1510. LD a2, 7 * SIZE(AO)
  1511. MUL a2, c04, t1
  1512. MUL a2, c03, t2
  1513. MUL a1, c03, c03
  1514. MUL a1, c04, c04
  1515. ADD5 c03, t1, c03
  1516. ADD6 c04, t2, c04
  1517. #endif
  1518. #if defined(RN) || defined(RT)
  1519. LD a1, 0 * SIZE(BO)
  1520. LD a2, 1 * SIZE(BO)
  1521. MUL a2, c02, t1
  1522. MUL a2, c01, t2
  1523. MUL a2, c04, t3
  1524. MUL a2, c03, t4
  1525. MUL a1, c01, c01
  1526. MUL a1, c02, c02
  1527. MUL a1, c03, c03
  1528. MUL a1, c04, c04
  1529. ADD5 c01, t1, c01
  1530. ADD6 c02, t2, c02
  1531. ADD5 c03, t3, c03
  1532. ADD6 c04, t4, c04
  1533. #endif
  1534. #if defined(LN) || defined(LT)
  1535. ST c01, 0 * SIZE(BO)
  1536. ST c02, 1 * SIZE(BO)
  1537. ST c03, 2 * SIZE(BO)
  1538. ST c04, 3 * SIZE(BO)
  1539. #else
  1540. ST c01, 0 * SIZE(AO)
  1541. ST c02, 1 * SIZE(AO)
  1542. ST c03, 2 * SIZE(AO)
  1543. ST c04, 3 * SIZE(AO)
  1544. #endif
  1545. #ifdef LN
  1546. lda C1, -4 * SIZE(C1)
  1547. #endif
  1548. ST c01, 0 * SIZE(C1)
  1549. ST c02, 1 * SIZE(C1)
  1550. ST c03, 2 * SIZE(C1)
  1551. ST c04, 3 * SIZE(C1)
  1552. #ifndef LN
  1553. lda C1, 4 * SIZE(C1)
  1554. #endif
  1555. #ifdef RT
  1556. sll K, ZBASE_SHIFT + 1, TMP1
  1557. addq AORIG, TMP1, AORIG
  1558. #endif
  1559. #if defined(LT) || defined(RN)
  1560. subq K, KK, TMP1
  1561. sll TMP1, ZBASE_SHIFT + 1, TMP2
  1562. addq AO, TMP2, AO
  1563. sll TMP1, ZBASE_SHIFT, TMP2
  1564. addq BO, TMP2, BO
  1565. #endif
  1566. #ifdef LT
  1567. addq KK, 2, KK
  1568. #endif
  1569. #ifdef LN
  1570. subq KK, 2, KK
  1571. #endif
  1572. lda I, -1(I)
  1573. bgt I, $L41
  1574. .align 4
  1575. $L50:
  1576. and M, 1, I
  1577. ble I, $L59
  1578. #if defined(LT) || defined(RN)
  1579. LD a1, 0 * SIZE(AO)
  1580. fclr t1
  1581. LD a2, 1 * SIZE(AO)
  1582. fclr t2
  1583. LD a3, 2 * SIZE(AO)
  1584. fclr t3
  1585. LD a4, 3 * SIZE(AO)
  1586. fclr t4
  1587. LD b1, 0 * SIZE(B)
  1588. fclr c01
  1589. LD b2, 1 * SIZE(B)
  1590. fclr c05
  1591. LD b3, 2 * SIZE(B)
  1592. fclr c02
  1593. LD b4, 3 * SIZE(B)
  1594. fclr c06
  1595. lda AO, 2 * SIZE(AO)
  1596. lda BO, 2 * SIZE(B)
  1597. lda L, -2(KK)
  1598. ble KK, $L58
  1599. ble L, $L55
  1600. #else
  1601. #ifdef LN
  1602. sll K, ZBASE_SHIFT, TMP1
  1603. subq AORIG, TMP1, AORIG
  1604. #endif
  1605. sll KK, ZBASE_SHIFT, TMP1
  1606. addq AORIG, TMP1, AO
  1607. sll KK, ZBASE_SHIFT, TMP1
  1608. addq B, TMP1, BO
  1609. subq K, KK, TMP1
  1610. LD a1, 0 * SIZE(AO)
  1611. fclr t1
  1612. LD a2, 1 * SIZE(AO)
  1613. fclr t2
  1614. LD a3, 2 * SIZE(AO)
  1615. fclr t3
  1616. LD a4, 3 * SIZE(AO)
  1617. fclr t4
  1618. LD b1, 0 * SIZE(BO)
  1619. fclr c01
  1620. LD b2, 1 * SIZE(BO)
  1621. fclr c05
  1622. LD b3, 2 * SIZE(BO)
  1623. fclr c02
  1624. LD b4, 3 * SIZE(BO)
  1625. fclr c06
  1626. lda AO, 2 * SIZE(AO)
  1627. lda BO, 2 * SIZE(BO)
  1628. lda L, -2(TMP1)
  1629. ble TMP1, $L58
  1630. ble L, $L55
  1631. #endif
  1632. .align 5
  1633. $L52:
  1634. ADD1 c01, t1, c01
  1635. unop
  1636. MUL a1, b1, t1
  1637. unop
  1638. ADD3 c02, t2, c02
  1639. lda AO, 4 * SIZE(AO)
  1640. MUL a2, b1, t2
  1641. LD b1, 2 * SIZE(BO)
  1642. ADD4 c05, t3, c05
  1643. lda L, -2(L)
  1644. MUL a1, b2, t3
  1645. LD a1, -2 * SIZE(AO)
  1646. ADD2 c06, t4, c06
  1647. unop
  1648. MUL a2, b2, t4
  1649. LD a2, -1 * SIZE(AO)
  1650. ADD1 c01, t1, c01
  1651. LD b2, 3 * SIZE(BO)
  1652. MUL a3, b3, t1
  1653. lda BO, 4 * SIZE(BO)
  1654. ADD3 c02, t2, c02
  1655. unop
  1656. MUL a4, b3, t2
  1657. LD b3, 0 * SIZE(BO)
  1658. ADD4 c05, t3, c05
  1659. unop
  1660. MUL a3, b4, t3
  1661. LD a3, 0 * SIZE(AO)
  1662. ADD2 c06, t4, c06
  1663. MUL a4, b4, t4
  1664. LD b4, 1 * SIZE(BO)
  1665. unop
  1666. LD a4, 1 * SIZE(AO)
  1667. unop
  1668. unop
  1669. bgt L, $L52
  1670. .align 4
  1671. $L55:
  1672. ADD1 c01, t1, c01
  1673. MUL a1, b1, t1
  1674. #if defined(LT) || defined(RN)
  1675. blbs KK, $L57
  1676. #else
  1677. blbs TMP1, $L57
  1678. #endif
  1679. .align 4
  1680. ADD3 c02, t2, c02
  1681. unop
  1682. MUL a2, b1, t2
  1683. LD b1, 0 * SIZE(BO)
  1684. ADD4 c05, t3, c05
  1685. lda BO, 2 * SIZE(BO)
  1686. MUL a1, b2, t3
  1687. LD a1, 0 * SIZE(AO)
  1688. ADD2 c06, t4, c06
  1689. unop
  1690. MUL a2, b2, t4
  1691. LD a2, 1 * SIZE(AO)
  1692. ADD1 c01, t1, c01
  1693. LD b2, -1 * SIZE(BO)
  1694. MUL a1, b1, t1
  1695. lda AO, 2 * SIZE(AO)
  1696. .align 4
  1697. $L57:
  1698. ADD3 c02, t2, c02
  1699. MUL a2, b1, t2
  1700. ADD4 c05, t3, c05
  1701. MUL a1, b2, t3
  1702. ADD2 c06, t4, c06
  1703. lda AO, 2 * SIZE(AO)
  1704. MUL a2, b2, t4
  1705. lda BO, 2 * SIZE(BO)
  1706. ADD1 c01, t1, c01
  1707. ADD3 c02, t2, c02
  1708. ADD4 c05, t3, c05
  1709. ADD2 c06, t4, c06
  1710. ADD c01, c06, c01
  1711. ADD c02, c05, c02
  1712. $L58:
  1713. #if defined(LN) || defined(RT)
  1714. subq KK, 1, TMP1
  1715. sll TMP1, ZBASE_SHIFT, TMP2
  1716. addq AORIG, TMP2, AO
  1717. sll TMP1, ZBASE_SHIFT, TMP2
  1718. addq B, TMP2, BO
  1719. #else
  1720. lda AO, -2 * SIZE(AO)
  1721. lda BO, -2 * SIZE(BO)
  1722. #endif
  1723. #if defined(LN) || defined(LT)
  1724. LD a1, 0 * SIZE(BO)
  1725. LD a2, 1 * SIZE(BO)
  1726. SUB a1, c01, c01
  1727. SUB a2, c02, c02
  1728. #else
  1729. LD a1, 0 * SIZE(AO)
  1730. LD a2, 1 * SIZE(AO)
  1731. SUB a1, c01, c01
  1732. SUB a2, c02, c02
  1733. #endif
  1734. #if defined(LN) || defined(LT)
  1735. LD a1, 0 * SIZE(AO)
  1736. LD a2, 1 * SIZE(AO)
  1737. MUL a2, c02, t1
  1738. MUL a2, c01, t2
  1739. MUL a1, c01, c01
  1740. MUL a1, c02, c02
  1741. ADD5 c01, t1, c01
  1742. ADD6 c02, t2, c02
  1743. #endif
  1744. #if defined(RN) || defined(RT)
  1745. LD a1, 0 * SIZE(BO)
  1746. LD a2, 1 * SIZE(BO)
  1747. MUL a2, c02, t1
  1748. MUL a2, c01, t2
  1749. MUL a1, c01, c01
  1750. MUL a1, c02, c02
  1751. ADD5 c01, t1, c01
  1752. ADD6 c02, t2, c02
  1753. #endif
  1754. #if defined(LN) || defined(LT)
  1755. ST c01, 0 * SIZE(BO)
  1756. ST c02, 1 * SIZE(BO)
  1757. #else
  1758. ST c01, 0 * SIZE(AO)
  1759. ST c02, 1 * SIZE(AO)
  1760. #endif
  1761. #ifdef LN
  1762. lda C1, -2 * SIZE(C1)
  1763. #endif
  1764. ST c01, 0 * SIZE(C1)
  1765. ST c02, 1 * SIZE(C1)
  1766. #ifndef LN
  1767. lda C1, 2 * SIZE(C1)
  1768. #endif
  1769. #ifdef RT
  1770. sll K, ZBASE_SHIFT, TMP1
  1771. addq AORIG, TMP1, AORIG
  1772. #endif
  1773. #if defined(LT) || defined(RN)
  1774. subq K, KK, TMP1
  1775. sll TMP1, ZBASE_SHIFT, TMP2
  1776. addq AO, TMP2, AO
  1777. sll TMP1, ZBASE_SHIFT, TMP2
  1778. addq BO, TMP2, BO
  1779. #endif
  1780. #ifdef LT
  1781. addq KK, 1, KK
  1782. #endif
  1783. #ifdef LN
  1784. subq KK, 1, KK
  1785. #endif
  1786. .align 4
  1787. $L59:
  1788. #ifdef LN
  1789. sll K, ZBASE_SHIFT, TMP1
  1790. addq B, TMP1, B
  1791. #endif
  1792. #if defined(LT) || defined(RN)
  1793. mov BO, B
  1794. #endif
  1795. #ifdef RN
  1796. addq KK, 1, KK
  1797. #endif
  1798. #ifdef RT
  1799. subq KK, 1, KK
  1800. #endif
  1801. .align 4
  1802. $L999:
  1803. ldt $f2, 0($sp)
  1804. ldt $f3, 8($sp)
  1805. ldt $f4, 16($sp)
  1806. ldt $f5, 24($sp)
  1807. ldt $f6, 32($sp)
  1808. ldt $f7, 40($sp)
  1809. ldt $f8, 48($sp)
  1810. ldt $f9, 56($sp)
  1811. clr $0
  1812. lda $sp, STACKSIZE($sp)
  1813. ret
  1814. .ident VERSION
  1815. .end CNAME