You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

copy_hummer.S 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define X r4
  42. #define INCX r5
  43. #define Y r6
  44. #define INCY r7
  45. #define INCX2 r8
  46. #define INCY2 r9
  47. #define X2 r10
  48. #define Y2 r11
  49. #define A1 f0
  50. #define A2 f1
  51. #define A3 f2
  52. #define A4 f3
  53. #define A5 f4
  54. #define A6 f5
  55. #define A7 f6
  56. #define A8 f7
  57. #define A9 f8
  58. #define T1 f9
  59. #define T2 f10
  60. #define T3 f11
  61. #define T4 f12
  62. #define T5 f13
  63. #define T6 f14
  64. #define T7 f15
  65. PROLOGUE
  66. PROFCODE
  67. li r10, -16
  68. stfpdux f14, SP, r10
  69. stfpdux f15, SP, r10
  70. slwi INCX, INCX, BASE_SHIFT
  71. slwi INCY, INCY, BASE_SHIFT
  72. add INCX2, INCX, INCX
  73. add INCY2, INCY, INCY
  74. cmpwi cr0, N, 0
  75. ble LL(999)
  76. cmpwi cr0, INCY, SIZE
  77. bne LL(60)
  78. cmpwi cr0, INCX, SIZE
  79. bne LL(50)
  80. sub X, X, INCX2
  81. sub Y, Y, INCY2
  82. andi. r0, X, 2 * SIZE - 1
  83. bne LL(30)
  84. andi. r0, Y, 2 * SIZE - 1
  85. bne LL(20)
  86. .align 4
  87. LL(10): /* X : aligned Y : aligned */
  88. srawi. r0, N, 4
  89. mtspr CTR, r0
  90. beq- LL(15)
  91. LFPDUX A1, X, INCX2
  92. LFPDUX A2, X, INCX2
  93. LFPDUX A3, X, INCX2
  94. LFPDUX A4, X, INCX2
  95. LFPDUX A5, X, INCX2
  96. LFPDUX A6, X, INCX2
  97. LFPDUX A7, X, INCX2
  98. LFPDUX A8, X, INCX2
  99. bdz LL(13)
  100. .align 4
  101. LL(12):
  102. STFPDUX A1, Y, INCY2
  103. LFPDUX A1, X, INCX2
  104. STFPDUX A2, Y, INCY2
  105. LFPDUX A2, X, INCX2
  106. STFPDUX A3, Y, INCY2
  107. LFPDUX A3, X, INCX2
  108. STFPDUX A4, Y, INCY2
  109. LFPDUX A4, X, INCX2
  110. STFPDUX A5, Y, INCY2
  111. LFPDUX A5, X, INCX2
  112. STFPDUX A6, Y, INCY2
  113. LFPDUX A6, X, INCX2
  114. STFPDUX A7, Y, INCY2
  115. LFPDUX A7, X, INCX2
  116. STFPDUX A8, Y, INCY2
  117. LFPDUX A8, X, INCX2
  118. bdnz LL(12)
  119. .align 4
  120. LL(13):
  121. STFPDUX A1, Y, INCY2
  122. STFPDUX A2, Y, INCY2
  123. STFPDUX A3, Y, INCY2
  124. STFPDUX A4, Y, INCY2
  125. STFPDUX A5, Y, INCY2
  126. STFPDUX A6, Y, INCY2
  127. STFPDUX A7, Y, INCY2
  128. STFPDUX A8, Y, INCY2
  129. .align 4
  130. LL(15):
  131. andi. r0, N, 15
  132. beq LL(999)
  133. andi. r0, N, 8
  134. beq LL(16)
  135. LFPDUX A1, X, INCX2
  136. LFPDUX A2, X, INCX2
  137. LFPDUX A3, X, INCX2
  138. LFPDUX A4, X, INCX2
  139. STFPDUX A1, Y, INCY2
  140. STFPDUX A2, Y, INCY2
  141. STFPDUX A3, Y, INCY2
  142. STFPDUX A4, Y, INCY2
  143. .align 4
  144. LL(16):
  145. andi. r0, N, 4
  146. beq LL(17)
  147. LFPDUX A1, X, INCX2
  148. LFPDUX A2, X, INCX2
  149. STFPDUX A1, Y, INCY2
  150. STFPDUX A2, Y, INCY2
  151. .align 4
  152. LL(17):
  153. andi. r0, N, 2
  154. beq LL(18)
  155. LFPDUX A1, X, INCX2
  156. STFPDUX A1, Y, INCY2
  157. .align 4
  158. LL(18):
  159. andi. r0, N, 1
  160. beq LL(999)
  161. LFDUX A1, X, INCX2
  162. STFDUX A1, Y, INCY2
  163. .align 4
  164. b LL(999)
  165. .align 4
  166. LL(20): /* X ): aligned Y ): unaligned */
  167. LFXDUX A1, X, INCX2
  168. addi N, N, -1
  169. cmpwi cr0, N, 0
  170. STFSDX A1, Y, INCY2
  171. add Y, Y, INCY
  172. ble LL(999)
  173. .align 4
  174. srawi. r0, N, 4
  175. mtspr CTR, r0
  176. beq- LL(25)
  177. LFXDUX T1, X, INCX2
  178. LFXDUX T2, X, INCX2
  179. LFXDUX T3, X, INCX2
  180. LFXDUX T4, X, INCX2
  181. LFPDUX A6, X, INCX2
  182. fsmr A1, T1
  183. LFPDUX A7, X, INCX2
  184. fsmr T1, T2
  185. LFPDUX A8, X, INCX2
  186. fsmr T2, T3
  187. LFPDUX A9, X, INCX2
  188. fsmr T3, T4
  189. bdz LL(23)
  190. .align 4
  191. LL(22):
  192. STFPDUX A1, Y, INCY2
  193. fxmr T5, A6
  194. STFPDUX T1, Y, INCY2
  195. fxmr T6, A7
  196. STFPDUX T2, Y, INCY2
  197. fxmr T7, A8
  198. STFPDUX T3, Y, INCY2
  199. fxmr A1, A9
  200. fsmr T4, T5
  201. LFPDUX A2, X, INCX2
  202. fsmr T5, T6
  203. LFPDUX A3, X, INCX2
  204. fsmr T6, T7
  205. LFPDUX A4, X, INCX2
  206. fsmr T7, A1
  207. LFPDUX A5, X, INCX2
  208. STFPDUX T4, Y, INCY2
  209. fxmr T1, A2
  210. STFPDUX T5, Y, INCY2
  211. fxmr T2, A3
  212. STFPDUX T6, Y, INCY2
  213. fxmr T3, A4
  214. STFPDUX T7, Y, INCY2
  215. fxmr T4, A5
  216. LFPDUX A6, X, INCX2
  217. fsmr A1, T1
  218. LFPDUX A7, X, INCX2
  219. fsmr T1, T2
  220. LFPDUX A8, X, INCX2
  221. fsmr T2, T3
  222. LFPDUX A9, X, INCX2
  223. fsmr T3, T4
  224. bdnz LL(22)
  225. .align 4
  226. LL(23):
  227. STFPDUX A1, Y, INCY2
  228. fxmr T5, A6
  229. STFPDUX T1, Y, INCY2
  230. fxmr T6, A7
  231. STFPDUX T2, Y, INCY2
  232. fxmr T7, A8
  233. STFPDUX T3, Y, INCY2
  234. fxmr A1, A9
  235. fsmr T4, T5
  236. fsmr T5, T6
  237. fsmr T6, T7
  238. fsmr T7, A1
  239. STFPDUX T4, Y, INCY2
  240. STFPDUX T5, Y, INCY2
  241. STFPDUX T6, Y, INCY2
  242. STFPDUX T7, Y, INCY2
  243. .align 4
  244. LL(25):
  245. andi. r0, N, 15
  246. beq LL(999)
  247. andi. r0, N, 8
  248. beq LL(26)
  249. LFXDUX A2, X, INCX2
  250. LFXDUX A3, X, INCX2
  251. LFXDUX A4, X, INCX2
  252. LFXDUX A5, X, INCX2
  253. fsmr A1, A2
  254. fsmr A2, A3
  255. fsmr A3, A4
  256. fsmr A4, A5
  257. STFPDUX A1, Y, INCY2
  258. STFPDUX A2, Y, INCY2
  259. STFPDUX A3, Y, INCY2
  260. STFPDUX A4, Y, INCY2
  261. fpmr A1, A5
  262. .align 4
  263. LL(26):
  264. andi. r0, N, 4
  265. beq LL(27)
  266. LFXDUX A2, X, INCX2
  267. LFXDUX A3, X, INCX2
  268. fsmr A1, A2
  269. fsmr A2, A3
  270. STFPDUX A1, Y, INCY2
  271. STFPDUX A2, Y, INCY2
  272. fpmr A1, A3
  273. .align 4
  274. LL(27):
  275. andi. r0, N, 2
  276. beq LL(28)
  277. LFXDUX A2, X, INCX2
  278. fsmr A1, A2
  279. STFPDUX A1, Y, INCY2
  280. fpmr A1, A2
  281. .align 4
  282. LL(28):
  283. andi. r0, N, 1
  284. beq LL(999)
  285. STFDUX A1, Y, INCY2
  286. b LL(999)
  287. .align 4
  288. LL(30): /* X : unaligned Y : aligned */
  289. andi. r0, Y, 2 * SIZE - 1
  290. bne LL(40)
  291. LFDX A1, X, INCX2
  292. add X, X, INCX
  293. srawi. r0, N, 4
  294. mtspr CTR, r0
  295. beq- LL(35)
  296. LFXDUX T1, X, INCX2
  297. LFXDUX T2, X, INCX2
  298. LFXDUX T3, X, INCX2
  299. LFXDUX T4, X, INCX2
  300. LFPDUX A6, X, INCX2
  301. fsmr A1, T1
  302. LFPDUX A7, X, INCX2
  303. fsmr T1, T2
  304. LFPDUX A8, X, INCX2
  305. fsmr T2, T3
  306. LFPDUX A9, X, INCX2
  307. fsmr T3, T4
  308. bdz LL(33)
  309. .align 4
  310. LL(32):
  311. fxmr T5, A6
  312. STFPDUX A1, Y, INCY2
  313. fxmr T6, A7
  314. STFPDUX T1, Y, INCY2
  315. fxmr T7, A8
  316. STFPDUX T2, Y, INCY2
  317. fxmr A1, A9
  318. STFPDUX T3, Y, INCY2
  319. fsmr T4, T5
  320. LFPDUX A2, X, INCX2
  321. fsmr T5, T6
  322. LFPDUX A3, X, INCX2
  323. fsmr T6, T7
  324. LFPDUX A4, X, INCX2
  325. fsmr T7, A1
  326. LFPDUX A5, X, INCX2
  327. STFPDUX T4, Y, INCY2
  328. fxmr T1, A2
  329. STFPDUX T5, Y, INCY2
  330. fxmr T2, A3
  331. STFPDUX T6, Y, INCY2
  332. fxmr T3, A4
  333. STFPDUX T7, Y, INCY2
  334. fxmr T4, A5
  335. LFPDUX A6, X, INCX2
  336. fsmr A1, T1
  337. LFPDUX A7, X, INCX2
  338. fsmr T1, T2
  339. LFPDUX A8, X, INCX2
  340. fsmr T2, T3
  341. LFPDUX A9, X, INCX2
  342. fsmr T3, T4
  343. bdnz LL(32)
  344. .align 4
  345. LL(33):
  346. STFPDUX A1, Y, INCY2
  347. fxmr T5, A6
  348. STFPDUX T1, Y, INCY2
  349. fxmr T6, A7
  350. STFPDUX T2, Y, INCY2
  351. fxmr T7, A8
  352. STFPDUX T3, Y, INCY2
  353. fxmr A1, A9
  354. fsmr T4, T5
  355. fsmr T5, T6
  356. fsmr T6, T7
  357. fsmr T7, A1
  358. STFPDUX T4, Y, INCY2
  359. STFPDUX T5, Y, INCY2
  360. STFPDUX T6, Y, INCY2
  361. STFPDUX T7, Y, INCY2
  362. .align 4
  363. LL(35):
  364. andi. r0, N, 15
  365. beq LL(999)
  366. andi. r0, N, 8
  367. beq LL(36)
  368. LFXDUX A2, X, INCX2
  369. LFXDUX A3, X, INCX2
  370. LFXDUX A4, X, INCX2
  371. LFXDUX A5, X, INCX2
  372. fsmr A1, A2
  373. fsmr A2, A3
  374. fsmr A3, A4
  375. fsmr A4, A5
  376. STFPDUX A1, Y, INCY2
  377. STFPDUX A2, Y, INCY2
  378. STFPDUX A3, Y, INCY2
  379. STFPDUX A4, Y, INCY2
  380. fpmr A1, A5
  381. .align 4
  382. LL(36):
  383. andi. r0, N, 4
  384. beq LL(37)
  385. LFXDUX A2, X, INCX2
  386. LFXDUX A3, X, INCX2
  387. fsmr A1, A2
  388. fsmr A2, A3
  389. STFPDUX A1, Y, INCY2
  390. STFPDUX A2, Y, INCY2
  391. fpmr A1, A3
  392. .align 4
  393. LL(37):
  394. andi. r0, N, 2
  395. beq LL(38)
  396. LFXDUX A2, X, INCX2
  397. fsmr A1, A2
  398. STFPDUX A1, Y, INCY2
  399. fpmr A1, A2
  400. .align 4
  401. LL(38):
  402. andi. r0, N, 1
  403. beq LL(999)
  404. STFDUX A1, Y, INCY2
  405. b LL(999)
  406. .align 4
  407. LL(40): /* X : unaligned Y : unaligned */
  408. LFDX A1, X, INCX2
  409. add X, X, INCX
  410. addi N, N, -1
  411. cmpwi cr0, N, 0
  412. STFDX A1, Y, INCY2
  413. add Y, Y, INCY
  414. ble LL(999)
  415. srawi. r0, N, 4
  416. mtspr CTR, r0
  417. beq- LL(45)
  418. LFPDUX A1, X, INCX2
  419. LFPDUX A2, X, INCX2
  420. LFPDUX A3, X, INCX2
  421. LFPDUX A4, X, INCX2
  422. LFPDUX A5, X, INCX2
  423. LFPDUX A6, X, INCX2
  424. LFPDUX A7, X, INCX2
  425. LFPDUX A8, X, INCX2
  426. bdz LL(43)
  427. .align 4
  428. LL(42):
  429. STFPDUX A1, Y, INCY2
  430. LFPDUX A1, X, INCX2
  431. STFPDUX A2, Y, INCY2
  432. LFPDUX A2, X, INCX2
  433. STFPDUX A3, Y, INCY2
  434. LFPDUX A3, X, INCX2
  435. STFPDUX A4, Y, INCY2
  436. LFPDUX A4, X, INCX2
  437. STFPDUX A5, Y, INCY2
  438. LFPDUX A5, X, INCX2
  439. STFPDUX A6, Y, INCY2
  440. LFPDUX A6, X, INCX2
  441. STFPDUX A7, Y, INCY2
  442. LFPDUX A7, X, INCX2
  443. STFPDUX A8, Y, INCY2
  444. LFPDUX A8, X, INCX2
  445. bdnz LL(42)
  446. .align 4
  447. LL(43):
  448. STFPDUX A1, Y, INCY2
  449. STFPDUX A2, Y, INCY2
  450. STFPDUX A3, Y, INCY2
  451. STFPDUX A4, Y, INCY2
  452. STFPDUX A5, Y, INCY2
  453. STFPDUX A6, Y, INCY2
  454. STFPDUX A7, Y, INCY2
  455. STFPDUX A8, Y, INCY2
  456. .align 4
  457. LL(45):
  458. andi. r0, N, 15
  459. beq LL(999)
  460. andi. r0, N, 8
  461. beq LL(46)
  462. LFPDUX A1, X, INCX2
  463. LFPDUX A2, X, INCX2
  464. LFPDUX A3, X, INCX2
  465. LFPDUX A4, X, INCX2
  466. STFPDUX A1, Y, INCY2
  467. STFPDUX A2, Y, INCY2
  468. STFPDUX A3, Y, INCY2
  469. STFPDUX A4, Y, INCY2
  470. .align 4
  471. LL(46):
  472. andi. r0, N, 4
  473. beq LL(47)
  474. LFPDUX A1, X, INCX2
  475. LFPDUX A2, X, INCX2
  476. STFPDUX A1, Y, INCY2
  477. STFPDUX A2, Y, INCY2
  478. .align 4
  479. LL(47):
  480. andi. r0, N, 2
  481. beq LL(48)
  482. LFPDUX A1, X, INCX2
  483. STFPDUX A1, Y, INCY2
  484. .align 4
  485. LL(48):
  486. andi. r0, N, 1
  487. beq LL(999)
  488. LFDUX A1, X, INCX2
  489. STFDUX A1, Y, INCY2
  490. .align 4
  491. b LL(999)
  492. .align 4
  493. # INCX != 1, INCY == 1
  494. LL(50):
  495. andi. r0, Y, 2 * SIZE - 1
  496. beq LL(51)
  497. LFD A1, 0 * SIZE(X)
  498. add X, X, INCX
  499. STFD A1, 0 * SIZE(Y)
  500. add Y, Y, INCY
  501. addi N, N, -1
  502. cmpwi cr0, N, 0
  503. ble LL(999)
  504. .align 4
  505. LL(51):
  506. sub X, X, INCX
  507. sub Y, Y, INCY2
  508. srawi. r0, N, 4
  509. mtspr CTR, r0
  510. beq- LL(55)
  511. .align 4
  512. LL(52):
  513. LFDUX A1, X, INCX
  514. LFDUX A2, X, INCX
  515. LFDUX A3, X, INCX
  516. LFDUX A4, X, INCX
  517. LFDUX A5, X, INCX
  518. LFDUX A6, X, INCX
  519. LFDUX A7, X, INCX
  520. LFDUX A8, X, INCX
  521. LFDUX A9, X, INCX
  522. LFDUX T1, X, INCX
  523. LFDUX T2, X, INCX
  524. LFDUX T3, X, INCX
  525. fsmfp A1, A2
  526. LFDUX T4, X, INCX
  527. fsmfp A3, A4
  528. LFDUX T5, X, INCX
  529. fsmfp A5, A6
  530. LFDUX T6, X, INCX
  531. fsmfp A7, A8
  532. LFDUX T7, X, INCX
  533. fsmfp A9, T1
  534. STFPDUX A1, Y, INCY2
  535. fsmfp T2, T3
  536. STFPDUX A3, Y, INCY2
  537. fsmfp T4, T5
  538. STFPDUX A5, Y, INCY2
  539. fsmfp T6, T7
  540. STFPDUX A7, Y, INCY2
  541. STFPDUX A9, Y, INCY2
  542. STFPDUX T2, Y, INCY2
  543. STFPDUX T4, Y, INCY2
  544. STFPDUX T6, Y, INCY2
  545. bdnz LL(52)
  546. .align 4
  547. LL(55):
  548. andi. r0, N, 15
  549. beq LL(999)
  550. andi. r0, N, 8
  551. beq LL(56)
  552. LFDUX A1, X, INCX
  553. LFDUX A2, X, INCX
  554. LFDUX A3, X, INCX
  555. LFDUX A4, X, INCX
  556. LFDUX A5, X, INCX
  557. LFDUX A6, X, INCX
  558. LFDUX A7, X, INCX
  559. LFDUX A8, X, INCX
  560. fsmfp A1, A2
  561. fsmfp A3, A4
  562. fsmfp A5, A6
  563. fsmfp A7, A8
  564. STFPDUX A1, Y, INCY2
  565. STFPDUX A3, Y, INCY2
  566. STFPDUX A5, Y, INCY2
  567. STFPDUX A7, Y, INCY2
  568. .align 4
  569. LL(56):
  570. andi. r0, N, 4
  571. beq LL(57)
  572. LFDUX A1, X, INCX
  573. LFDUX A2, X, INCX
  574. LFDUX A3, X, INCX
  575. LFDUX A4, X, INCX
  576. fsmfp A1, A2
  577. fsmfp A3, A4
  578. STFPDUX A1, Y, INCY2
  579. STFPDUX A3, Y, INCY2
  580. .align 4
  581. LL(57):
  582. andi. r0, N, 2
  583. beq LL(58)
  584. LFDUX A1, X, INCX
  585. LFDUX A2, X, INCX
  586. fsmfp A1, A2
  587. STFPDUX A1, Y, INCY2
  588. .align 4
  589. LL(58):
  590. andi. r0, N, 1
  591. beq LL(999)
  592. LFDUX A1, X, INCX
  593. STFDUX A1, Y, INCY2
  594. b LL(999)
  595. .align 4
  596. # INCX == 1, INCY != 1
  597. LL(60):
  598. cmpwi cr0, INCY, SIZE
  599. bne LL(100)
  600. andi. r0, X, 2 * SIZE - 1
  601. beq LL(61)
  602. LFD A1, 0 * SIZE(X)
  603. add X, X, INCX
  604. STFD A1, 0 * SIZE(Y)
  605. add Y, Y, INCY
  606. addi N, N, -1
  607. cmpwi cr0, N, 0
  608. ble LL(999)
  609. .align 4
  610. LL(61):
  611. sub X, X, INCX2
  612. sub Y, Y, INCY
  613. srawi. r0, N, 4
  614. mtspr CTR, r0
  615. beq- LL(65)
  616. LFPDUX A1, X, INCX2
  617. LFPDUX A2, X, INCX2
  618. LFPDUX A3, X, INCX2
  619. LFPDUX A4, X, INCX2
  620. LFPDUX A5, X, INCX2
  621. LFPDUX A6, X, INCX2
  622. LFPDUX A7, X, INCX2
  623. LFPDUX A8, X, INCX2
  624. bdz LL(63)
  625. .align 4
  626. LL(62):
  627. STFDUX A1, Y, INCY
  628. STFSDUX A1, Y, INCY
  629. LFPDUX A1, X, INCX2
  630. STFDUX A2, Y, INCY
  631. STFSDUX A2, Y, INCY
  632. LFPDUX A2, X, INCX2
  633. STFDUX A3, Y, INCY
  634. STFSDUX A3, Y, INCY
  635. LFPDUX A3, X, INCX2
  636. STFDUX A4, Y, INCY
  637. STFSDUX A4, Y, INCY
  638. LFPDUX A4, X, INCX2
  639. STFDUX A5, Y, INCY
  640. STFSDUX A5, Y, INCY
  641. LFPDUX A5, X, INCX2
  642. STFDUX A6, Y, INCY
  643. STFSDUX A6, Y, INCY
  644. LFPDUX A6, X, INCX2
  645. STFDUX A7, Y, INCY
  646. STFSDUX A7, Y, INCY
  647. LFPDUX A7, X, INCX2
  648. STFDUX A8, Y, INCY
  649. STFSDUX A8, Y, INCY
  650. LFPDUX A8, X, INCX2
  651. bdnz LL(62)
  652. .align 4
  653. LL(63):
  654. STFDUX A1, Y, INCY
  655. STFSDUX A1, Y, INCY
  656. STFDUX A2, Y, INCY
  657. STFSDUX A2, Y, INCY
  658. STFDUX A3, Y, INCY
  659. STFSDUX A3, Y, INCY
  660. STFDUX A4, Y, INCY
  661. STFSDUX A4, Y, INCY
  662. STFDUX A5, Y, INCY
  663. STFSDUX A5, Y, INCY
  664. STFDUX A6, Y, INCY
  665. STFSDUX A6, Y, INCY
  666. STFDUX A7, Y, INCY
  667. STFSDUX A7, Y, INCY
  668. STFDUX A8, Y, INCY
  669. STFSDUX A8, Y, INCY
  670. .align 4
  671. LL(65):
  672. andi. r0, N, 15
  673. beq LL(999)
  674. andi. r0, N, 8
  675. beq LL(66)
  676. LFPDUX A1, X, INCX2
  677. LFPDUX A2, X, INCX2
  678. LFPDUX A3, X, INCX2
  679. LFPDUX A4, X, INCX2
  680. STFDUX A1, Y, INCY
  681. STFSDUX A1, Y, INCY
  682. STFDUX A2, Y, INCY
  683. STFSDUX A2, Y, INCY
  684. STFDUX A3, Y, INCY
  685. STFSDUX A3, Y, INCY
  686. STFDUX A4, Y, INCY
  687. STFSDUX A4, Y, INCY
  688. .align 4
  689. LL(66):
  690. andi. r0, N, 4
  691. beq LL(67)
  692. LFPDUX A1, X, INCX2
  693. LFPDUX A2, X, INCX2
  694. STFDUX A1, Y, INCY
  695. STFSDUX A1, Y, INCY
  696. STFDUX A2, Y, INCY
  697. STFSDUX A2, Y, INCY
  698. .align 4
  699. LL(67):
  700. andi. r0, N, 2
  701. beq LL(68)
  702. LFPDUX A1, X, INCX2
  703. STFDUX A1, Y, INCY
  704. STFSDUX A1, Y, INCY
  705. .align 4
  706. LL(68):
  707. andi. r0, N, 1
  708. beq LL(999)
  709. LFDUX A1, X, INCX2
  710. STFDUX A1, Y, INCY
  711. b LL(999)
  712. .align 4
  713. LL(100):
  714. sub X, X, INCX
  715. sub Y, Y, INCY
  716. srawi. r0, N, 3
  717. mtspr CTR, r0
  718. beq- LL(115)
  719. LFDUX A1, X, INCX
  720. LFDUX A2, X, INCX
  721. LFDUX A3, X, INCX
  722. LFDUX A4, X, INCX
  723. LFDUX A5, X, INCX
  724. LFDUX A6, X, INCX
  725. LFDUX A7, X, INCX
  726. LFDUX A8, X, INCX
  727. bdz LL(113)
  728. .align 4
  729. LL(112):
  730. STFDUX A1, Y, INCY
  731. LFDUX A1, X, INCX
  732. STFDUX A2, Y, INCY
  733. LFDUX A2, X, INCX
  734. STFDUX A3, Y, INCY
  735. LFDUX A3, X, INCX
  736. STFDUX A4, Y, INCY
  737. LFDUX A4, X, INCX
  738. STFDUX A5, Y, INCY
  739. LFDUX A5, X, INCX
  740. STFDUX A6, Y, INCY
  741. LFDUX A6, X, INCX
  742. STFDUX A7, Y, INCY
  743. LFDUX A7, X, INCX
  744. STFDUX A8, Y, INCY
  745. LFDUX A8, X, INCX
  746. bdnz LL(112)
  747. .align 4
  748. LL(113):
  749. STFDUX A1, Y, INCY
  750. STFDUX A2, Y, INCY
  751. STFDUX A3, Y, INCY
  752. STFDUX A4, Y, INCY
  753. STFDUX A5, Y, INCY
  754. STFDUX A6, Y, INCY
  755. STFDUX A7, Y, INCY
  756. STFDUX A8, Y, INCY
  757. .align 4
  758. LL(115):
  759. andi. r0, N, 7
  760. beq LL(999)
  761. andi. r0, N, 4
  762. beq LL(117)
  763. LFDUX A1, X, INCX
  764. LFDUX A2, X, INCX
  765. LFDUX A3, X, INCX
  766. LFDUX A4, X, INCX
  767. STFDUX A1, Y, INCY
  768. STFDUX A2, Y, INCY
  769. STFDUX A3, Y, INCY
  770. STFDUX A4, Y, INCY
  771. .align 4
  772. LL(117):
  773. andi. r0, N, 2
  774. beq LL(118)
  775. LFDUX A1, X, INCX
  776. LFDUX A2, X, INCX
  777. STFDUX A1, Y, INCY
  778. STFDUX A2, Y, INCY
  779. .align 4
  780. LL(118):
  781. andi. r0, N, 1
  782. beq LL(999)
  783. LFDUX A1, X, INCX
  784. STFDUX A1, Y, INCY
  785. .align 4
  786. LL(999):
  787. li r10, 16
  788. addi SP, SP, -16
  789. lfpdux f15, SP, r10
  790. lfpdux f14, SP, r10
  791. addi SP, SP, 16
  792. blr
  793. EPILOGUE