You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zscal_hummer.S 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define X r6
  42. #define INCX r7
  43. #define INCX2 r4
  44. #define XX r5
  45. #define Y r8
  46. #define YY r9
  47. #define ALPHA f1
  48. #define ALPHA_I f2
  49. #define A1 f0
  50. #define A2 f16
  51. #define A3 f17
  52. #define A4 f3
  53. #define A5 f4
  54. #define A6 f5
  55. #define A7 f6
  56. #define A8 f7
  57. #define B1 f8
  58. #define B2 f9
  59. #define B3 f10
  60. #define B4 f11
  61. #define B5 f12
  62. #define B6 f13
  63. #define B7 f14
  64. #define B8 f15
  65. PROLOGUE
  66. PROFCODE
  67. li r10, -16
  68. stfpdux f14, SP, r10
  69. stfpdux f15, SP, r10
  70. stfpdux f16, SP, r10
  71. stfpdux f17, SP, r10
  72. li r10, 0
  73. stwu r10, -4(SP)
  74. stwu r10, -4(SP)
  75. stwu r10, -4(SP)
  76. stwu r10, -4(SP)
  77. lfpdx A1, SP, r10 # Zero clear
  78. fsmfp ALPHA, ALPHA_I
  79. slwi INCX, INCX, BASE_SHIFT
  80. add INCX2, INCX, INCX
  81. cmpwi cr0, N, 0
  82. ble LL(999)
  83. cmpwi cr0, INCX, SIZE
  84. bne LL(100)
  85. fcmpu cr7, ALPHA, A1
  86. bne cr7, LL(50)
  87. fscmp cr7, ALPHA, A1
  88. bne cr7, LL(50)
  89. andi. r0, X, 2 * SIZE - 1
  90. bne LL(20)
  91. sub X, X, INCX2
  92. srawi. r0, N, 2
  93. mtspr CTR, r0
  94. beq- LL(15)
  95. .align 4
  96. LL(12):
  97. STFPDUX A1, X, INCX2
  98. STFPDUX A1, X, INCX2
  99. STFPDUX A1, X, INCX2
  100. STFPDUX A1, X, INCX2
  101. bdnz LL(12)
  102. .align 4
  103. LL(15):
  104. andi. r0, N, 3
  105. beq LL(999)
  106. andi. r0, N, 2
  107. beq LL(17)
  108. STFPDUX A1, X, INCX2
  109. STFPDUX A1, X, INCX2
  110. .align 4
  111. LL(17):
  112. andi. r0, N, 1
  113. beq LL(999)
  114. STFPDUX A1, X, INCX2
  115. b LL(999)
  116. .align 4
  117. LL(20):
  118. sub X, X, INCX2
  119. STFDX A1, X, INCX2
  120. addi X, X, SIZE
  121. addi N, N, -1
  122. cmpwi cr0, N, 0
  123. ble LL(29)
  124. srawi. r0, N, 2
  125. mtspr CTR, r0
  126. beq- LL(25)
  127. .align 4
  128. LL(22):
  129. STFPDUX A1, X, INCX2
  130. STFPDUX A1, X, INCX2
  131. STFPDUX A1, X, INCX2
  132. STFPDUX A1, X, INCX2
  133. bdnz LL(22)
  134. .align 4
  135. LL(25):
  136. andi. r0, N, 3
  137. beq LL(29)
  138. andi. r0, N, 2
  139. beq LL(27)
  140. STFPDUX A1, X, INCX2
  141. STFPDUX A1, X, INCX2
  142. .align 4
  143. LL(27):
  144. andi. r0, N, 1
  145. beq LL(29)
  146. STFPDUX A1, X, INCX2
  147. .align 4
  148. LL(29):
  149. STFDX A1, X, INCX2
  150. b LL(999)
  151. .align 4
  152. LL(50):
  153. sub Y, X, INCX2
  154. sub X, X, INCX2
  155. andi. r0, X, 2 * SIZE - 1
  156. bne LL(60)
  157. srawi. r0, N, 3
  158. mtspr CTR, r0
  159. beq- LL(55)
  160. LFPDUX A1, X, INCX2
  161. LFPDUX A2, X, INCX2
  162. LFPDUX A3, X, INCX2
  163. LFPDUX A4, X, INCX2
  164. LFPDUX A5, X, INCX2
  165. fxpmul B1, ALPHA, A1
  166. LFPDUX A6, X, INCX2
  167. fxpmul B2, ALPHA, A2
  168. LFPDUX A7, X, INCX2
  169. fxpmul B3, ALPHA, A3
  170. LFPDUX A8, X, INCX2
  171. fxpmul B4, ALPHA, A4
  172. fxpmul B5, ALPHA, A5
  173. fxcxnpma B1, ALPHA, A1, B1
  174. fxcxnpma B2, ALPHA, A2, B2
  175. bdz LL(53)
  176. .align 4
  177. LL(52):
  178. fxcxnpma B3, ALPHA, A3, B3
  179. LFPDUX A1, X, INCX2
  180. fxpmul B6, ALPHA, A6
  181. STFPDUX B1, Y, INCX2
  182. fxcxnpma B4, ALPHA, A4, B4
  183. LFPDUX A2, X, INCX2
  184. fxpmul B7, ALPHA, A7
  185. STFPDUX B2, Y, INCX2
  186. fxcxnpma B5, ALPHA, A5, B5
  187. LFPDUX A3, X, INCX2
  188. fxpmul B8, ALPHA, A8
  189. STFPDUX B3, Y, INCX2
  190. fxcxnpma B6, ALPHA, A6, B6
  191. LFPDUX A4, X, INCX2
  192. fxpmul B1, ALPHA, A1
  193. STFPDUX B4, Y, INCX2
  194. fxcxnpma B7, ALPHA, A7, B7
  195. LFPDUX A5, X, INCX2
  196. fxpmul B2, ALPHA, A2
  197. STFPDUX B5, Y, INCX2
  198. fxcxnpma B8, ALPHA, A8, B8
  199. LFPDUX A6, X, INCX2
  200. fxpmul B3, ALPHA, A3
  201. STFPDUX B6, Y, INCX2
  202. fxcxnpma B1, ALPHA, A1, B1
  203. LFPDUX A7, X, INCX2
  204. fxpmul B4, ALPHA, A4
  205. STFPDUX B7, Y, INCX2
  206. fxcxnpma B2, ALPHA, A2, B2
  207. LFPDUX A8, X, INCX2
  208. fxpmul B5, ALPHA, A5
  209. STFPDUX B8, Y, INCX2
  210. bdnz LL(52)
  211. .align 4
  212. LL(53):
  213. fxcxnpma B3, ALPHA, A3, B3
  214. fxpmul B6, ALPHA, A6
  215. STFPDUX B1, Y, INCX2
  216. fxcxnpma B4, ALPHA, A4, B4
  217. fxpmul B7, ALPHA, A7
  218. STFPDUX B2, Y, INCX2
  219. fxcxnpma B5, ALPHA, A5, B5
  220. fxpmul B8, ALPHA, A8
  221. STFPDUX B3, Y, INCX2
  222. fxcxnpma B6, ALPHA, A6, B6
  223. STFPDUX B4, Y, INCX2
  224. fxcxnpma B7, ALPHA, A7, B7
  225. STFPDUX B5, Y, INCX2
  226. fxcxnpma B8, ALPHA, A8, B8
  227. STFPDUX B6, Y, INCX2
  228. STFPDUX B7, Y, INCX2
  229. STFPDUX B8, Y, INCX2
  230. .align 4
  231. LL(55):
  232. andi. r0, N, 7
  233. beq LL(999)
  234. andi. r0, N, 4
  235. beq LL(56)
  236. LFPDUX A1, X, INCX2
  237. LFPDUX A2, X, INCX2
  238. LFPDUX A3, X, INCX2
  239. LFPDUX A4, X, INCX2
  240. fxpmul B1, ALPHA, A1
  241. fxpmul B2, ALPHA, A2
  242. fxpmul B3, ALPHA, A3
  243. fxpmul B4, ALPHA, A4
  244. fxcxnpma B1, ALPHA, A1, B1
  245. fxcxnpma B2, ALPHA, A2, B2
  246. fxcxnpma B3, ALPHA, A3, B3
  247. fxcxnpma B4, ALPHA, A4, B4
  248. STFPDUX B1, Y, INCX2
  249. STFPDUX B2, Y, INCX2
  250. STFPDUX B3, Y, INCX2
  251. STFPDUX B4, Y, INCX2
  252. .align 4
  253. LL(56):
  254. andi. r0, N, 2
  255. beq LL(57)
  256. LFPDUX A1, X, INCX2
  257. LFPDUX A2, X, INCX2
  258. fxpmul B1, ALPHA, A1
  259. fxpmul B2, ALPHA, A2
  260. fxcxnpma B1, ALPHA, A1, B1
  261. fxcxnpma B2, ALPHA, A2, B2
  262. STFPDUX B1, Y, INCX2
  263. STFPDUX B2, Y, INCX2
  264. .align 4
  265. LL(57):
  266. andi. r0, N, 1
  267. beq LL(999)
  268. LFPDUX A1, X, INCX2
  269. fxpmul B1, ALPHA, A1
  270. fxcxnpma B1, ALPHA, A1, B1
  271. STFPDUX B1, Y, INCX2
  272. b LL(999)
  273. .align 4
  274. LL(60):
  275. addi XX, X, SIZE
  276. addi YY, Y, SIZE
  277. srawi. r0, N, 2
  278. mtspr CTR, r0
  279. beq- LL(65)
  280. LFDUX A1, X, INCX2
  281. LFDUX A2, XX, INCX2
  282. LFDUX A3, X, INCX2
  283. LFDUX A4, XX, INCX2
  284. LFDUX A5, X, INCX2
  285. fmul B1, ALPHA, A1
  286. LFDUX A6, XX, INCX2
  287. fmul B2, ALPHA_I, A1
  288. LFDUX A7, X, INCX2
  289. fmul B3, ALPHA, A3
  290. LFDUX A8, XX, INCX2
  291. fmul B4, ALPHA_I, A3
  292. fmul B5, ALPHA, A5
  293. fnmsub B1, ALPHA_I, A2, B1
  294. fmadd B2, ALPHA , A2, B2
  295. bdz LL(63)
  296. .align 4
  297. LL(62):
  298. fnmsub B3, ALPHA_I, A4, B3
  299. LFDUX A1, X, INCX2
  300. fmul B6, ALPHA_I, A5
  301. STFDUX B1, Y, INCX2
  302. fmadd B4, ALPHA , A4, B4
  303. LFDUX A2, XX, INCX2
  304. fmul B7, ALPHA, A7
  305. STFDUX B2, YY, INCX2
  306. fnmsub B5, ALPHA_I, A6, B5
  307. LFDUX A3, X, INCX2
  308. fmul B8, ALPHA_I, A7
  309. STFDUX B3, Y, INCX2
  310. fmadd B6, ALPHA , A6, B6
  311. LFDUX A4, XX, INCX2
  312. fmul B1, ALPHA, A1
  313. STFDUX B4, YY, INCX2
  314. fnmsub B7, ALPHA_I, A8, B7
  315. LFDUX A5, X, INCX2
  316. fmul B2, ALPHA_I, A1
  317. STFDUX B5, Y, INCX2
  318. fmadd B8, ALPHA , A8, B8
  319. LFDUX A6, XX, INCX2
  320. fmul B3, ALPHA, A3
  321. STFDUX B6, YY, INCX2
  322. fnmsub B1, ALPHA_I, A2, B1
  323. LFDUX A7, X, INCX2
  324. fmul B4, ALPHA_I, A3
  325. STFDUX B7, Y, INCX2
  326. fmadd B2, ALPHA , A2, B2
  327. LFDUX A8, XX, INCX2
  328. fmul B5, ALPHA, A5
  329. STFDUX B8, YY, INCX2
  330. bdnz LL(62)
  331. .align 4
  332. LL(63):
  333. fnmsub B3, ALPHA_I, A4, B3
  334. fmul B6, ALPHA_I, A5
  335. STFDUX B1, Y, INCX2
  336. fmadd B4, ALPHA , A4, B4
  337. fmul B7, ALPHA, A7
  338. STFDUX B2, YY, INCX2
  339. fnmsub B5, ALPHA_I, A6, B5
  340. fmul B8, ALPHA_I, A7
  341. STFDUX B3, Y, INCX2
  342. fmadd B6, ALPHA , A6, B6
  343. STFDUX B4, YY, INCX2
  344. fnmsub B7, ALPHA_I, A8, B7
  345. STFDUX B5, Y, INCX2
  346. fmadd B8, ALPHA , A8, B8
  347. STFDUX B6, YY, INCX2
  348. STFDUX B7, Y, INCX2
  349. STFDUX B8, YY, INCX2
  350. .align 4
  351. LL(65):
  352. andi. r0, N, 3
  353. beq LL(999)
  354. andi. r0, N, 2
  355. beq LL(67)
  356. LFDUX A1, X, INCX2
  357. LFDUX A2, XX, INCX2
  358. LFDUX A3, X, INCX2
  359. LFDUX A4, XX, INCX2
  360. fmul B1, ALPHA, A1
  361. fmul B2, ALPHA, A2
  362. fmul B3, ALPHA, A3
  363. fmul B4, ALPHA, A4
  364. fnmsub B1, ALPHA_I, A2, B1
  365. fmadd B2, ALPHA_I, A1, B2
  366. fnmsub B3, ALPHA_I, A4, B3
  367. fmadd B4, ALPHA_I, A3, B4
  368. STFDUX B1, Y, INCX2
  369. STFDUX B2, YY, INCX2
  370. STFDUX B3, Y, INCX2
  371. STFDUX B4, YY, INCX2
  372. .align 4
  373. LL(67):
  374. andi. r0, N, 1
  375. beq LL(999)
  376. LFDUX A1, X, INCX2
  377. LFDUX A2, XX, INCX2
  378. fmul B1, ALPHA, A1
  379. fmul B2, ALPHA, A2
  380. fnmsub B1, ALPHA_I, A2, B1
  381. fmadd B2, ALPHA_I, A1, B2
  382. STFDUX B1, Y, INCX2
  383. STFDUX B2, YY, INCX2
  384. b LL(999)
  385. .align 4
  386. LL(100):
  387. fcmpu cr7, ALPHA, A1
  388. bne cr7, LL(150)
  389. fscmp cr7, ALPHA, A1
  390. bne cr7, LL(150)
  391. andi. r0, X, 2 * SIZE - 1
  392. bne LL(120)
  393. sub X, X, INCX2
  394. srawi. r0, N, 2
  395. mtspr CTR, r0
  396. beq- LL(115)
  397. .align 4
  398. LL(112):
  399. STFPDUX A1, X, INCX2
  400. STFPDUX A1, X, INCX2
  401. STFPDUX A1, X, INCX2
  402. STFPDUX A1, X, INCX2
  403. bdnz LL(112)
  404. .align 4
  405. LL(115):
  406. andi. r0, N, 3
  407. beq LL(999)
  408. andi. r0, N, 2
  409. beq LL(117)
  410. STFPDUX A1, X, INCX2
  411. STFPDUX A1, X, INCX2
  412. .align 4
  413. LL(117):
  414. andi. r0, N, 1
  415. beq LL(999)
  416. STFPDUX A1, X, INCX2
  417. b LL(999)
  418. .align 4
  419. LL(120):
  420. subi INCX2, INCX2, SIZE
  421. li INCX, SIZE
  422. sub X, X, INCX2
  423. srawi. r0, N, 2
  424. mtspr CTR, r0
  425. beq- LL(125)
  426. .align 4
  427. LL(122):
  428. STFDUX A1, X, INCX2
  429. STFDUX A1, X, INCX
  430. STFDUX A1, X, INCX2
  431. STFDUX A1, X, INCX
  432. STFDUX A1, X, INCX2
  433. STFDUX A1, X, INCX
  434. STFDUX A1, X, INCX2
  435. STFDUX A1, X, INCX
  436. bdnz LL(122)
  437. .align 4
  438. LL(125):
  439. andi. r0, N, 3
  440. beq LL(999)
  441. andi. r0, N, 2
  442. beq LL(127)
  443. STFDUX A1, X, INCX2
  444. STFDUX A1, X, INCX
  445. STFDUX A1, X, INCX2
  446. STFDUX A1, X, INCX
  447. .align 4
  448. LL(127):
  449. andi. r0, N, 1
  450. beq LL(999)
  451. STFDUX A1, X, INCX2
  452. STFDUX A1, X, INCX
  453. b LL(999)
  454. .align 4
  455. LL(150):
  456. sub Y, X, INCX2
  457. sub X, X, INCX2
  458. andi. r0, X, 2 * SIZE - 1
  459. bne LL(160)
  460. srawi. r0, N, 3
  461. mtspr CTR, r0
  462. beq- LL(155)
  463. LFPDUX A1, X, INCX2
  464. LFPDUX A2, X, INCX2
  465. LFPDUX A3, X, INCX2
  466. LFPDUX A4, X, INCX2
  467. LFPDUX A5, X, INCX2
  468. fxpmul B1, ALPHA, A1
  469. LFPDUX A6, X, INCX2
  470. fxpmul B2, ALPHA, A2
  471. LFPDUX A7, X, INCX2
  472. fxpmul B3, ALPHA, A3
  473. LFPDUX A8, X, INCX2
  474. fxpmul B4, ALPHA, A4
  475. fxpmul B5, ALPHA, A5
  476. fxcxnpma B1, ALPHA, A1, B1
  477. fxcxnpma B2, ALPHA, A2, B2
  478. bdz LL(153)
  479. .align 4
  480. LL(152):
  481. fxcxnpma B3, ALPHA, A3, B3
  482. LFPDUX A1, X, INCX2
  483. fxpmul B6, ALPHA, A6
  484. STFPDUX B1, Y, INCX2
  485. fxcxnpma B4, ALPHA, A4, B4
  486. LFPDUX A2, X, INCX2
  487. fxpmul B7, ALPHA, A7
  488. STFPDUX B2, Y, INCX2
  489. fxcxnpma B5, ALPHA, A5, B5
  490. LFPDUX A3, X, INCX2
  491. fxpmul B8, ALPHA, A8
  492. STFPDUX B3, Y, INCX2
  493. fxcxnpma B6, ALPHA, A6, B6
  494. LFPDUX A4, X, INCX2
  495. fxpmul B1, ALPHA, A1
  496. STFPDUX B4, Y, INCX2
  497. fxcxnpma B7, ALPHA, A7, B7
  498. LFPDUX A5, X, INCX2
  499. fxpmul B2, ALPHA, A2
  500. STFPDUX B5, Y, INCX2
  501. fxcxnpma B8, ALPHA, A8, B8
  502. LFPDUX A6, X, INCX2
  503. fxpmul B3, ALPHA, A3
  504. STFPDUX B6, Y, INCX2
  505. fxcxnpma B1, ALPHA, A1, B1
  506. LFPDUX A7, X, INCX2
  507. fxpmul B4, ALPHA, A4
  508. STFPDUX B7, Y, INCX2
  509. fxcxnpma B2, ALPHA, A2, B2
  510. LFPDUX A8, X, INCX2
  511. fxpmul B5, ALPHA, A5
  512. STFPDUX B8, Y, INCX2
  513. bdnz LL(152)
  514. .align 4
  515. LL(153):
  516. fxcxnpma B3, ALPHA, A3, B3
  517. fxpmul B6, ALPHA, A6
  518. STFPDUX B1, Y, INCX2
  519. fxcxnpma B4, ALPHA, A4, B4
  520. fxpmul B7, ALPHA, A7
  521. STFPDUX B2, Y, INCX2
  522. fxcxnpma B5, ALPHA, A5, B5
  523. fxpmul B8, ALPHA, A8
  524. STFPDUX B3, Y, INCX2
  525. fxcxnpma B6, ALPHA, A6, B6
  526. STFPDUX B4, Y, INCX2
  527. fxcxnpma B7, ALPHA, A7, B7
  528. STFPDUX B5, Y, INCX2
  529. fxcxnpma B8, ALPHA, A8, B8
  530. STFPDUX B6, Y, INCX2
  531. STFPDUX B7, Y, INCX2
  532. STFPDUX B8, Y, INCX2
  533. .align 4
  534. LL(155):
  535. andi. r0, N, 7
  536. beq LL(999)
  537. andi. r0, N, 4
  538. beq LL(156)
  539. LFPDUX A1, X, INCX2
  540. LFPDUX A2, X, INCX2
  541. LFPDUX A3, X, INCX2
  542. LFPDUX A4, X, INCX2
  543. fxpmul B1, ALPHA, A1
  544. fxpmul B2, ALPHA, A2
  545. fxpmul B3, ALPHA, A3
  546. fxpmul B4, ALPHA, A4
  547. fxcxnpma B1, ALPHA, A1, B1
  548. fxcxnpma B2, ALPHA, A2, B2
  549. fxcxnpma B3, ALPHA, A3, B3
  550. fxcxnpma B4, ALPHA, A4, B4
  551. STFPDUX B1, Y, INCX2
  552. STFPDUX B2, Y, INCX2
  553. STFPDUX B3, Y, INCX2
  554. STFPDUX B4, Y, INCX2
  555. .align 4
  556. LL(156):
  557. andi. r0, N, 2
  558. beq LL(157)
  559. LFPDUX A1, X, INCX2
  560. LFPDUX A2, X, INCX2
  561. fxpmul B1, ALPHA, A1
  562. fxpmul B2, ALPHA, A2
  563. fxcxnpma B1, ALPHA, A1, B1
  564. fxcxnpma B2, ALPHA, A2, B2
  565. STFPDUX B1, Y, INCX2
  566. STFPDUX B2, Y, INCX2
  567. .align 4
  568. LL(157):
  569. andi. r0, N, 1
  570. beq LL(999)
  571. LFPDUX A1, X, INCX2
  572. fxpmul B1, ALPHA, A1
  573. fxcxnpma B1, ALPHA, A1, B1
  574. STFPDUX B1, Y, INCX2
  575. b LL(999)
  576. .align 4
  577. LL(160):
  578. addi XX, X, SIZE
  579. addi YY, Y, SIZE
  580. srawi. r0, N, 2
  581. mtspr CTR, r0
  582. beq- LL(165)
  583. LFDUX A1, X, INCX2
  584. LFDUX A2, XX, INCX2
  585. LFDUX A3, X, INCX2
  586. LFDUX A4, XX, INCX2
  587. LFDUX A5, X, INCX2
  588. fmul B1, ALPHA, A1
  589. LFDUX A6, XX, INCX2
  590. fmul B2, ALPHA_I, A1
  591. LFDUX A7, X, INCX2
  592. fmul B3, ALPHA, A3
  593. LFDUX A8, XX, INCX2
  594. fmul B4, ALPHA_I, A3
  595. fmul B5, ALPHA, A5
  596. fnmsub B1, ALPHA_I, A2, B1
  597. fmadd B2, ALPHA , A2, B2
  598. bdz LL(163)
  599. .align 4
  600. LL(162):
  601. fnmsub B3, ALPHA_I, A4, B3
  602. LFDUX A1, X, INCX2
  603. fmul B6, ALPHA_I, A5
  604. STFDUX B1, Y, INCX2
  605. fmadd B4, ALPHA , A4, B4
  606. LFDUX A2, XX, INCX2
  607. fmul B7, ALPHA, A7
  608. STFDUX B2, YY, INCX2
  609. fnmsub B5, ALPHA_I, A6, B5
  610. LFDUX A3, X, INCX2
  611. fmul B8, ALPHA_I, A7
  612. STFDUX B3, Y, INCX2
  613. fmadd B6, ALPHA , A6, B6
  614. LFDUX A4, XX, INCX2
  615. fmul B1, ALPHA, A1
  616. STFDUX B4, YY, INCX2
  617. fnmsub B7, ALPHA_I, A8, B7
  618. LFDUX A5, X, INCX2
  619. fmul B2, ALPHA_I, A1
  620. STFDUX B5, Y, INCX2
  621. fmadd B8, ALPHA , A8, B8
  622. LFDUX A6, XX, INCX2
  623. fmul B3, ALPHA, A3
  624. STFDUX B6, YY, INCX2
  625. fnmsub B1, ALPHA_I, A2, B1
  626. LFDUX A7, X, INCX2
  627. fmul B4, ALPHA_I, A3
  628. STFDUX B7, Y, INCX2
  629. fmadd B2, ALPHA , A2, B2
  630. LFDUX A8, XX, INCX2
  631. fmul B5, ALPHA, A5
  632. STFDUX B8, YY, INCX2
  633. bdnz LL(162)
  634. .align 4
  635. LL(163):
  636. fnmsub B3, ALPHA_I, A4, B3
  637. fmul B6, ALPHA_I, A5
  638. STFDUX B1, Y, INCX2
  639. fmadd B4, ALPHA , A4, B4
  640. fmul B7, ALPHA, A7
  641. STFDUX B2, YY, INCX2
  642. fnmsub B5, ALPHA_I, A6, B5
  643. fmul B8, ALPHA_I, A7
  644. STFDUX B3, Y, INCX2
  645. fmadd B6, ALPHA , A6, B6
  646. STFDUX B4, YY, INCX2
  647. fnmsub B7, ALPHA_I, A8, B7
  648. STFDUX B5, Y, INCX2
  649. fmadd B8, ALPHA , A8, B8
  650. STFDUX B6, YY, INCX2
  651. STFDUX B7, Y, INCX2
  652. STFDUX B8, YY, INCX2
  653. .align 4
  654. LL(165):
  655. andi. r0, N, 3
  656. beq LL(999)
  657. andi. r0, N, 2
  658. beq LL(167)
  659. LFDUX A1, X, INCX2
  660. LFDUX A2, XX, INCX2
  661. LFDUX A3, X, INCX2
  662. LFDUX A4, XX, INCX2
  663. fmul B1, ALPHA, A1
  664. fmul B2, ALPHA, A2
  665. fmul B3, ALPHA, A3
  666. fmul B4, ALPHA, A4
  667. fnmsub B1, ALPHA_I, A2, B1
  668. fmadd B2, ALPHA_I, A1, B2
  669. fnmsub B3, ALPHA_I, A4, B3
  670. fmadd B4, ALPHA_I, A3, B4
  671. STFDUX B1, Y, INCX2
  672. STFDUX B2, YY, INCX2
  673. STFDUX B3, Y, INCX2
  674. STFDUX B4, YY, INCX2
  675. .align 4
  676. LL(167):
  677. andi. r0, N, 1
  678. beq LL(999)
  679. LFDUX A1, X, INCX2
  680. LFDUX A2, XX, INCX2
  681. fmul B1, ALPHA, A1
  682. fmul B2, ALPHA, A2
  683. fnmsub B1, ALPHA_I, A2, B1
  684. fmadd B2, ALPHA_I, A1, B2
  685. STFDUX B1, Y, INCX2
  686. STFDUX B2, YY, INCX2
  687. .align 4
  688. LL(999):
  689. li r10, 16
  690. lfpdux f17, SP, r10
  691. lfpdux f16, SP, r10
  692. lfpdux f15, SP, r10
  693. lfpdux f14, SP, r10
  694. addi SP, SP, 16
  695. blr
  696. EPILOGUE