You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zswap_hummer.S 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define X r6
  42. #define INCX r7
  43. #define Y r8
  44. #define INCY r9
  45. #define INCX2 r4
  46. #define INCY2 r5
  47. #define X2 r10
  48. #define Y2 r11
  49. #define A1 f0
  50. #define A2 f1
  51. #define A3 f2
  52. #define A4 f3
  53. #define A5 f4
  54. #define B1 f5
  55. #define B2 f6
  56. #define B3 f7
  57. #define B4 f8
  58. #define B5 f9
  59. #define T1 f10
  60. #define T2 f11
  61. #define T3 f12
  62. #define T4 f13
  63. #define T5 f14
  64. #define T6 f15
  65. #define T7 f16
  66. PROLOGUE
  67. PROFCODE
  68. li r10, -16
  69. stfpdux f14, SP, r10
  70. stfpdux f15, SP, r10
  71. stfpdux f16, SP, r10
  72. slwi INCX, INCX, BASE_SHIFT
  73. slwi INCY, INCY, BASE_SHIFT
  74. add INCX2, INCX, INCX
  75. add INCY2, INCY, INCY
  76. cmpwi cr0, N, 0
  77. ble LL(999)
  78. cmpwi cr0, INCX, SIZE
  79. bne LL(100)
  80. cmpwi cr0, INCY, SIZE
  81. bne LL(100)
  82. sub X, X, INCX2
  83. sub Y, Y, INCY2
  84. mr X2, X
  85. mr Y2, Y
  86. andi. r0, X, 2 * SIZE - 1
  87. bne LL(30)
  88. andi. r0, Y, 2 * SIZE - 1
  89. bne LL(20)
  90. .align 4
  91. LL(10): /* X : aligned Y : aligned */
  92. srawi. r0, N, 2
  93. mtspr CTR, r0
  94. beq- LL(15)
  95. LFPDUX A1, X, INCX2
  96. LFPDUX B1, Y, INCY2
  97. LFPDUX A2, X, INCX2
  98. LFPDUX B2, Y, INCY2
  99. LFPDUX A3, X, INCX2
  100. LFPDUX B3, Y, INCY2
  101. LFPDUX A4, X, INCX2
  102. LFPDUX B4, Y, INCY2
  103. bdz LL(13)
  104. .align 4
  105. LL(12):
  106. STFPDUX B1, X2, INCY2
  107. LFPDUX B1, Y, INCY2
  108. STFPDUX A1, Y2, INCY2
  109. LFPDUX A1, X, INCX2
  110. STFPDUX B2, X2, INCY2
  111. LFPDUX B2, Y, INCY2
  112. STFPDUX A2, Y2, INCY2
  113. LFPDUX A2, X, INCX2
  114. STFPDUX B3, X2, INCY2
  115. LFPDUX B3, Y, INCY2
  116. STFPDUX A3, Y2, INCY2
  117. LFPDUX A3, X, INCX2
  118. STFPDUX B4, X2, INCY2
  119. LFPDUX B4, Y, INCY2
  120. STFPDUX A4, Y2, INCY2
  121. LFPDUX A4, X, INCX2
  122. bdnz LL(12)
  123. .align 4
  124. LL(13):
  125. STFPDUX B1, X2, INCY2
  126. STFPDUX A1, Y2, INCY2
  127. STFPDUX B2, X2, INCY2
  128. STFPDUX A2, Y2, INCY2
  129. STFPDUX B3, X2, INCY2
  130. STFPDUX A3, Y2, INCY2
  131. STFPDUX B4, X2, INCY2
  132. STFPDUX A4, Y2, INCY2
  133. .align 4
  134. LL(15):
  135. andi. r0, N, 3
  136. beq LL(999)
  137. andi. r0, N, 2
  138. beq LL(16)
  139. LFPDUX A1, X, INCX2
  140. LFPDUX B1, Y, INCY2
  141. LFPDUX A2, X, INCX2
  142. LFPDUX B2, Y, INCY2
  143. STFPDUX B1, X2, INCY2
  144. STFPDUX A1, Y2, INCY2
  145. STFPDUX B2, X2, INCY2
  146. STFPDUX A2, Y2, INCY2
  147. .align 4
  148. LL(16):
  149. andi. r0, N, 1
  150. beq LL(999)
  151. LFPDUX A1, X, INCX2
  152. LFPDUX B1, Y, INCY2
  153. STFPDUX B1, X2, INCY2
  154. STFPDUX A1, Y2, INCY2
  155. b LL(999)
  156. .align 4
  157. LL(20): /* X : aligned Y : unaligned */
  158. LFXDUX A1, X, INCX2
  159. LFDX B1, Y, INCY2
  160. STFSDX A1, Y2, INCY2
  161. add Y, Y, INCY
  162. add Y2, Y2, INCY
  163. addi N, N, -1
  164. cmpwi cr0, N, 0
  165. ble LL(29)
  166. .align 4
  167. srawi. r0, N, 2
  168. mtspr CTR, r0
  169. beq- LL(25)
  170. LFXDUX T1, X, INCX2
  171. LFXDUX T2, Y, INCY2
  172. LFXDUX T3, X, INCX2
  173. LFXDUX T4, Y, INCY2
  174. LFPDUX A4, X, INCX2
  175. fsmr A1, T1
  176. LFPDUX B4, Y, INCY2
  177. fsmr B1, T2
  178. LFPDUX A5, X, INCX2
  179. fsmr T1, T3
  180. LFPDUX B5, Y, INCY2
  181. fsmr T2, T4
  182. bdz LL(23)
  183. .align 4
  184. LL(22):
  185. fxmr T5, A4
  186. STFPDUX A1, Y2, INCY2
  187. fxmr T6, B4
  188. STFPDUX B1, X2, INCX2
  189. fxmr A1, A5
  190. STFPDUX T1, Y2, INCY2
  191. fxmr B1, B5
  192. STFPDUX T2, X2, INCX2
  193. fsmr T3, T5
  194. LFPDUX A2, X, INCX2
  195. fsmr T4, T6
  196. LFPDUX B2, Y, INCY2
  197. fsmr T5, A1
  198. LFPDUX A3, X, INCX2
  199. fsmr T6, B1
  200. LFPDUX B3, Y, INCY2
  201. fxmr T1, A2
  202. STFPDUX T3, Y2, INCY2
  203. fxmr T2, B2
  204. STFPDUX T4, X2, INCX2
  205. fxmr T3, A3
  206. STFPDUX T5, Y2, INCY2
  207. fxmr T4, B3
  208. STFPDUX T6, X2, INCX2
  209. fsmr A1, T1
  210. LFPDUX A4, X, INCX2
  211. fsmr B1, T2
  212. LFPDUX B4, Y, INCY2
  213. fsmr T1, T3
  214. LFPDUX A5, X, INCX2
  215. fsmr T2, T4
  216. LFPDUX B5, Y, INCY2
  217. bdnz LL(22)
  218. .align 4
  219. LL(23):
  220. fxmr T5, A4
  221. STFPDUX A1, Y2, INCY2
  222. fxmr T6, B4
  223. STFPDUX B1, X2, INCX2
  224. fxmr A1, A5
  225. STFPDUX T1, Y2, INCY2
  226. fxmr B1, B5
  227. STFPDUX T2, X2, INCX2
  228. fsmr T3, T5
  229. fsmr T4, T6
  230. fsmr T5, A1
  231. fsmr T6, B1
  232. STFPDUX T3, Y2, INCY2
  233. STFPDUX T4, X2, INCX2
  234. STFPDUX T5, Y2, INCY2
  235. STFPDUX T6, X2, INCX2
  236. .align 4
  237. LL(25):
  238. andi. r0, N, 3
  239. beq LL(29)
  240. andi. r0, N, 2
  241. beq LL(27)
  242. LFXDUX A2, X, INCX2
  243. LFXDUX B2, Y, INCY2
  244. LFXDUX A3, X, INCX2
  245. LFXDUX B3, Y, INCY2
  246. fsmr A1, A2
  247. fsmr B1, B2
  248. fsmr A2, A3
  249. fsmr B2, B3
  250. STFPDUX A1, Y2, INCY2
  251. STFPDUX B1, X2, INCX2
  252. STFPDUX A2, Y2, INCY2
  253. fpmr A1, A3
  254. STFPDUX B2, X2, INCX2
  255. fpmr B1, B3
  256. .align 4
  257. LL(27):
  258. andi. r0, N, 1
  259. beq LL(29)
  260. LFXDUX A2, X, INCX2
  261. LFXDUX B2, Y, INCY2
  262. fsmr A1, A2
  263. fsmr B1, B2
  264. STFPDUX A1, Y2, INCY2
  265. fpmr A1, A2
  266. STFPDUX B1, X2, INCX2
  267. fpmr B1, B2
  268. .align 4
  269. LL(29):
  270. LFSDX B1, Y, INCY2
  271. STFDX A1, Y2, INCY2
  272. STFPDX B1, X2, INCX2
  273. b LL(999)
  274. .align 4
  275. LL(30): /* X : unaligned Y : aligned */
  276. andi. r0, Y, 2 * SIZE - 1
  277. bne LL(40)
  278. LFXDUX A1, Y, INCY2
  279. LFDX B1, X, INCX2
  280. STFSDX A1, X2, INCX2
  281. add X, X, INCX
  282. add X2, X2, INCX
  283. addi N, N, -1
  284. cmpwi cr0, N, 0
  285. ble LL(39)
  286. .align 4
  287. srawi. r0, N, 2
  288. mtspr CTR, r0
  289. beq- LL(35)
  290. LFXDUX T1, Y, INCY2
  291. LFXDUX T2, X, INCX2
  292. LFXDUX T3, Y, INCY2
  293. LFXDUX T4, X, INCX2
  294. LFPDUX A4, Y, INCY2
  295. fsmr A1, T1
  296. LFPDUX B4, X, INCX2
  297. fsmr B1, T2
  298. LFPDUX A5, Y, INCY2
  299. fsmr T1, T3
  300. LFPDUX B5, X, INCX2
  301. fsmr T2, T4
  302. bdz LL(33)
  303. .align 4
  304. LL(32):
  305. fxmr T5, A4
  306. STFPDUX A1, X2, INCX2
  307. fxmr T6, B4
  308. STFPDUX B1, Y2, INCY2
  309. fxmr A1, A5
  310. STFPDUX T1, X2, INCX2
  311. fxmr B1, B5
  312. STFPDUX T2, Y2, INCY2
  313. fsmr T3, T5
  314. LFPDUX A2, Y, INCY2
  315. fsmr T4, T6
  316. LFPDUX B2, X, INCX2
  317. fsmr T5, A1
  318. LFPDUX A3, Y, INCY2
  319. fsmr T6, B1
  320. LFPDUX B3, X, INCX2
  321. fxmr T1, A2
  322. STFPDUX T3, X2, INCX2
  323. fxmr T2, B2
  324. STFPDUX T4, Y2, INCY2
  325. fxmr T3, A3
  326. STFPDUX T5, X2, INCX2
  327. fxmr T4, B3
  328. STFPDUX T6, Y2, INCY2
  329. fsmr A1, T1
  330. LFPDUX A4, Y, INCY2
  331. fsmr B1, T2
  332. LFPDUX B4, X, INCX2
  333. fsmr T1, T3
  334. LFPDUX A5, Y, INCY2
  335. fsmr T2, T4
  336. LFPDUX B5, X, INCX2
  337. bdnz LL(32)
  338. .align 4
  339. LL(33):
  340. fxmr T5, A4
  341. STFPDUX A1, X2, INCX2
  342. fxmr T6, B4
  343. STFPDUX B1, Y2, INCY2
  344. fxmr A1, A5
  345. STFPDUX T1, X2, INCX2
  346. fxmr B1, B5
  347. STFPDUX T2, Y2, INCY2
  348. fsmr T3, T5
  349. fsmr T4, T6
  350. fsmr T5, A1
  351. fsmr T6, B1
  352. STFPDUX T3, X2, INCX2
  353. STFPDUX T4, Y2, INCY2
  354. STFPDUX T5, X2, INCX2
  355. STFPDUX T6, Y2, INCY2
  356. .align 4
  357. LL(35):
  358. andi. r0, N, 3
  359. beq LL(39)
  360. andi. r0, N, 2
  361. beq LL(37)
  362. LFXDUX A2, Y, INCY2
  363. LFXDUX B2, X, INCX2
  364. LFXDUX A3, Y, INCY2
  365. LFXDUX B3, X, INCX2
  366. fsmr A1, A2
  367. fsmr B1, B2
  368. fsmr A2, A3
  369. fsmr B2, B3
  370. STFPDUX A1, X2, INCX2
  371. STFPDUX B1, Y2, INCY2
  372. STFPDUX A2, X2, INCX2
  373. fpmr A1, A3
  374. STFPDUX B2, Y2, INCY2
  375. fpmr B1, B3
  376. .align 4
  377. LL(37):
  378. andi. r0, N, 1
  379. beq LL(39)
  380. LFXDUX A2, Y, INCY2
  381. LFXDUX B2, X, INCX2
  382. fsmr A1, A2
  383. fsmr B1, B2
  384. STFPDUX A1, X2, INCX2
  385. fpmr A1, A2
  386. STFPDUX B1, Y2, INCY2
  387. fpmr B1, B2
  388. .align 4
  389. LL(39):
  390. LFSDX B1, X, INCX2
  391. STFDX A1, X2, INCX2
  392. STFPDX B1, Y2, INCY2
  393. b LL(999)
  394. .align 4
  395. LL(40): /* X : unaligned Y : unaligned */
  396. LFDX A1, Y, INCY2
  397. LFDX B1, X, INCX2
  398. add X, X, INCX
  399. add Y, Y, INCY
  400. addi N, N, -1
  401. cmpwi cr0, N, 0
  402. STFDX A1, X2, INCX2
  403. STFDX B1, Y2, INCY2
  404. add X2, X2, INCX
  405. add Y2, Y2, INCY
  406. ble LL(49)
  407. srawi. r0, N, 2
  408. mtspr CTR, r0
  409. beq- LL(45)
  410. LFPDUX A1, X, INCX2
  411. LFPDUX B1, Y, INCY2
  412. LFPDUX A2, X, INCX2
  413. LFPDUX B2, Y, INCY2
  414. LFPDUX A3, X, INCX2
  415. LFPDUX B3, Y, INCY2
  416. LFPDUX A4, X, INCX2
  417. LFPDUX B4, Y, INCY2
  418. bdz LL(43)
  419. .align 4
  420. LL(42):
  421. STFPDUX B1, X2, INCY2
  422. LFPDUX B1, Y, INCY2
  423. STFPDUX A1, Y2, INCY2
  424. LFPDUX A1, X, INCX2
  425. STFPDUX B2, X2, INCY2
  426. LFPDUX B2, Y, INCY2
  427. STFPDUX A2, Y2, INCY2
  428. LFPDUX A2, X, INCX2
  429. STFPDUX B3, X2, INCY2
  430. LFPDUX B3, Y, INCY2
  431. STFPDUX A3, Y2, INCY2
  432. LFPDUX A3, X, INCX2
  433. STFPDUX B4, X2, INCY2
  434. LFPDUX B4, Y, INCY2
  435. STFPDUX A4, Y2, INCY2
  436. LFPDUX A4, X, INCX2
  437. bdnz LL(42)
  438. .align 4
  439. LL(43):
  440. STFPDUX B1, X2, INCY2
  441. STFPDUX A1, Y2, INCY2
  442. STFPDUX B2, X2, INCY2
  443. STFPDUX A2, Y2, INCY2
  444. STFPDUX B3, X2, INCY2
  445. STFPDUX A3, Y2, INCY2
  446. STFPDUX B4, X2, INCY2
  447. STFPDUX A4, Y2, INCY2
  448. .align 4
  449. LL(45):
  450. andi. r0, N, 3
  451. beq LL(49)
  452. andi. r0, N, 2
  453. beq LL(46)
  454. LFPDUX A1, X, INCX2
  455. LFPDUX B1, Y, INCY2
  456. LFPDUX A2, X, INCX2
  457. LFPDUX B2, Y, INCY2
  458. STFPDUX B1, X2, INCY2
  459. STFPDUX A1, Y2, INCY2
  460. STFPDUX B2, X2, INCY2
  461. STFPDUX A2, Y2, INCY2
  462. .align 4
  463. LL(46):
  464. andi. r0, N, 1
  465. beq LL(49)
  466. LFPDUX A1, X, INCX2
  467. LFPDUX B1, Y, INCY2
  468. STFPDUX B1, X2, INCY2
  469. STFPDUX A1, Y2, INCY2
  470. .align 4
  471. LL(49):
  472. LFDX A1, Y, INCY2
  473. LFDX B1, X, INCX2
  474. STFDX A1, X2, INCX2
  475. STFDX B1, Y2, INCY2
  476. b LL(999)
  477. .align 4
  478. LL(100):
  479. subi INCX2, INCX2, SIZE
  480. subi INCY2, INCY2, SIZE
  481. li INCX, SIZE
  482. li INCY, SIZE
  483. sub X, X, INCX2
  484. sub Y, Y, INCY2
  485. mr X2, X
  486. mr Y2, Y
  487. srawi. r0, N, 1
  488. mtspr CTR, r0
  489. beq- LL(115)
  490. LFDUX A1, X, INCX2
  491. LFDUX B1, Y, INCY2
  492. LFDUX A2, X, INCX
  493. LFDUX B2, Y, INCY
  494. LFDUX A3, X, INCX2
  495. LFDUX B3, Y, INCY2
  496. LFDUX A4, X, INCX
  497. LFDUX B4, Y, INCY
  498. bdz LL(113)
  499. .align 4
  500. LL(112):
  501. STFDUX B1, X2, INCX2
  502. LFDUX B1, Y, INCY2
  503. STFDUX A1, Y2, INCY2
  504. LFDUX A1, X, INCX2
  505. STFDUX B2, X2, INCX
  506. LFDUX B2, Y, INCY
  507. STFDUX A2, Y2, INCY
  508. LFDUX A2, X, INCX
  509. STFDUX B3, X2, INCX2
  510. LFDUX B3, Y, INCY2
  511. STFDUX A3, Y2, INCY2
  512. LFDUX A3, X, INCX2
  513. STFDUX B4, X2, INCX
  514. LFDUX B4, Y, INCY
  515. STFDUX A4, Y2, INCY
  516. LFDUX A4, X, INCX
  517. bdnz LL(112)
  518. .align 4
  519. LL(113):
  520. STFDUX B1, X2, INCX2
  521. STFDUX A1, Y2, INCY2
  522. STFDUX B2, X2, INCX
  523. STFDUX A2, Y2, INCY
  524. STFDUX B3, X2, INCX2
  525. STFDUX A3, Y2, INCY2
  526. STFDUX B4, X2, INCX
  527. STFDUX A4, Y2, INCY
  528. .align 4
  529. LL(115):
  530. andi. r0, N, 1
  531. beq LL(999)
  532. LFDUX A1, X, INCX2
  533. LFDUX A2, X, INCX
  534. LFDUX B1, Y, INCY2
  535. LFDUX B2, Y, INCY
  536. STFDUX B1, X2, INCX2
  537. STFDUX B2, X2, INCX
  538. STFDUX A1, Y2, INCY2
  539. STFDUX A2, Y2, INCY
  540. .align 4
  541. LL(999):
  542. li r10, 16
  543. addi SP, SP, -16
  544. lfpdux f16, SP, r10
  545. lfpdux f15, SP, r10
  546. lfpdux f14, SP, r10
  547. addi SP, SP, 16
  548. blr
  549. EPILOGUE