You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zswap.S 9.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #ifdef XDOUBLE
  41. #define PREFETCH_SIZE ( 8 * 16)
  42. #elif defined(DOUBLE)
  43. #define PREFETCH_SIZE (16 * 16)
  44. #else
  45. #define PREFETCH_SIZE (32 * 16)
  46. #endif
  47. #define SP r12
  48. #ifdef XDOUBLE
  49. #define N r32
  50. #define X r14
  51. #define INCX r15
  52. #define Y r16
  53. #define INCY r17
  54. #else
  55. #define N r32
  56. #define X r37
  57. #define INCX r38
  58. #define Y r39
  59. #define INCY r36
  60. #endif
  61. #define PRE1 r2
  62. #define PRE2 r3
  63. #define I r18
  64. #define J r19
  65. #define YY r20
  66. #define XX r21
  67. #define INCXM1 r22
  68. #define INCYM1 r23
  69. #define INCX8 r24
  70. #define INCY8 r25
  71. #define PR r30
  72. #define ARLC r31
  73. PROLOGUE
  74. .prologue
  75. PROFCODE
  76. { .mmi
  77. adds r14 = 16, SP
  78. adds r15 = 24, SP
  79. adds r16 = 32, SP
  80. }
  81. { .mmb
  82. adds r17 = 40, SP
  83. cmp.gt p15, p0 = r0, N
  84. (p15) br.ret.sptk.many b0
  85. }
  86. ;;
  87. #ifdef XDOUBLE
  88. { .mmi
  89. ld8 X = [r14]
  90. ld8 INCX = [r15]
  91. nop __LINE__
  92. }
  93. { .mmi
  94. ld8 Y = [r16]
  95. ld8 INCY = [r17]
  96. nop __LINE__
  97. }
  98. ;;
  99. #else
  100. { .mmi
  101. ld8 INCY = [r14]
  102. nop __LINE__
  103. nop __LINE__
  104. }
  105. ;;
  106. #endif
  107. { .mii
  108. .save ar.lc, ARLC
  109. mov ARLC = ar.lc
  110. shl INCX = INCX, ZBASE_SHIFT
  111. }
  112. ;;
  113. .body
  114. { .mii
  115. and J = 7, N
  116. mov PR = pr
  117. shl INCY = INCY, ZBASE_SHIFT
  118. }
  119. ;;
  120. { .mmi
  121. mov XX = X
  122. mov YY = Y
  123. shr I = N, 3
  124. }
  125. ;;
  126. { .mmi
  127. adds I = -1, I
  128. cmp.eq p9, p0 = r0, J
  129. mov pr.rot = 0
  130. }
  131. ;;
  132. { .mmi
  133. shladd INCX8 = INCX, 3, r0
  134. shladd INCY8 = INCY, 3, r0
  135. mov ar.ec= 3
  136. }
  137. { .mmi
  138. adds INCXM1 = -SIZE, INCX
  139. adds INCYM1 = -SIZE, INCY
  140. cmp.eq p16, p0 = r0, r0
  141. }
  142. ;;
  143. { .mmi
  144. adds PRE1 = PREFETCH_SIZE * SIZE, X
  145. adds PRE2 = PREFETCH_SIZE * SIZE, Y
  146. mov ar.lc = I
  147. }
  148. { .mib
  149. cmp.eq p8 ,p0 = -1, I
  150. tbit.z p0, p12 = J, 2
  151. (p8) br.cond.dpnt .L55
  152. }
  153. ;;
  154. .align 32
  155. .L52:
  156. { .mmi
  157. (p18) STFD [XX] = f37, 1 * SIZE
  158. (p18) STFD [YY] = f34, 1 * SIZE
  159. }
  160. { .mmi
  161. (p16) LDFD f32 = [X], 1 * SIZE
  162. (p16) LDFD f35 = [Y], 1 * SIZE
  163. }
  164. ;;
  165. { .mmi
  166. (p18) STFD [XX] = f43
  167. (p18) STFD [YY] = f40
  168. (p18) add XX = XX, INCXM1
  169. }
  170. { .mmi
  171. (p16) LDFD f38 = [X], INCXM1
  172. (p16) LDFD f41 = [Y], INCYM1
  173. (p18) add YY = YY, INCYM1
  174. }
  175. ;;
  176. { .mmi
  177. (p18) STFD [XX] = f49, 1 * SIZE
  178. (p18) STFD [YY] = f46, 1 * SIZE
  179. }
  180. { .mmi
  181. (p16) LDFD f44 = [X], 1 * SIZE
  182. (p16) LDFD f47 = [Y], 1 * SIZE
  183. }
  184. ;;
  185. { .mmi
  186. (p18) STFD [XX] = f55
  187. (p18) STFD [YY] = f52
  188. (p18) add XX = XX, INCXM1
  189. }
  190. { .mmi
  191. (p16) LDFD f50 = [X], INCXM1
  192. (p16) LDFD f53 = [Y], INCYM1
  193. (p18) add YY = YY, INCYM1
  194. }
  195. ;;
  196. { .mmi
  197. (p18) STFD [XX] = f61, 1 * SIZE
  198. (p18) STFD [YY] = f58, 1 * SIZE
  199. }
  200. { .mmi
  201. (p16) LDFD f56 = [X], 1 * SIZE
  202. (p16) LDFD f59 = [Y], 1 * SIZE
  203. }
  204. ;;
  205. { .mmi
  206. (p18) STFD [XX] = f67
  207. (p18) STFD [YY] = f64
  208. (p18) add XX = XX, INCXM1
  209. }
  210. { .mmi
  211. (p16) LDFD f62 = [X], INCXM1
  212. (p16) LDFD f65 = [Y], INCYM1
  213. (p18) add YY = YY, INCYM1
  214. }
  215. ;;
  216. { .mmi
  217. (p18) STFD [XX] = f73, 1 * SIZE
  218. (p18) STFD [YY] = f70, 1 * SIZE
  219. }
  220. { .mmi
  221. (p16) LDFD f68 = [X], 1 * SIZE
  222. (p16) LDFD f71 = [Y], 1 * SIZE
  223. }
  224. ;;
  225. { .mmi
  226. (p18) STFD [XX] = f79
  227. (p18) STFD [YY] = f76
  228. (p18) add XX = XX, INCXM1
  229. }
  230. { .mmi
  231. (p16) LDFD f74 = [X], INCXM1
  232. (p16) LDFD f77 = [Y], INCYM1
  233. (p18) add YY = YY, INCYM1
  234. }
  235. ;;
  236. { .mmi
  237. (p18) STFD [XX] = f85, 1 * SIZE
  238. (p18) STFD [YY] = f82, 1 * SIZE
  239. }
  240. { .mmi
  241. (p16) LDFD f80 = [X], 1 * SIZE
  242. (p16) LDFD f83 = [Y], 1 * SIZE
  243. }
  244. ;;
  245. { .mmi
  246. (p18) STFD [XX] = f91
  247. (p18) STFD [YY] = f88
  248. (p18) add XX = XX, INCXM1
  249. }
  250. { .mmi
  251. (p16) LDFD f86 = [X], INCXM1
  252. (p16) LDFD f89 = [Y], INCYM1
  253. (p18) add YY = YY, INCYM1
  254. }
  255. ;;
  256. { .mmi
  257. (p18) STFD [XX] = f97, 1 * SIZE
  258. (p18) STFD [YY] = f94, 1 * SIZE
  259. }
  260. { .mmi
  261. (p16) LDFD f92 = [X], 1 * SIZE
  262. (p16) LDFD f95 = [Y], 1 * SIZE
  263. }
  264. ;;
  265. { .mmi
  266. (p18) STFD [XX] = f103
  267. (p18) STFD [YY] = f100
  268. (p18) add XX = XX, INCXM1
  269. }
  270. { .mmi
  271. (p16) LDFD f98 = [X], INCXM1
  272. (p16) LDFD f101 = [Y], INCYM1
  273. (p18) add YY = YY, INCYM1
  274. }
  275. ;;
  276. { .mmi
  277. (p18) STFD [XX] = f109, 1 * SIZE
  278. (p18) STFD [YY] = f106, 1 * SIZE
  279. }
  280. { .mmi
  281. (p16) LDFD f104 = [X], 1 * SIZE
  282. (p16) LDFD f107 = [Y], 1 * SIZE
  283. }
  284. ;;
  285. { .mmi
  286. (p18) STFD [XX] = f115
  287. (p18) STFD [YY] = f112
  288. (p18) add XX = XX, INCXM1
  289. }
  290. { .mmi
  291. (p16) LDFD f110 = [X], INCXM1
  292. (p16) LDFD f113 = [Y], INCYM1
  293. (p18) add YY = YY, INCYM1
  294. }
  295. ;;
  296. { .mmi
  297. (p18) STFD [XX] = f121, 1 * SIZE
  298. (p18) STFD [YY] = f118, 1 * SIZE
  299. }
  300. { .mmi
  301. (p16) LDFD f116 = [X], 1 * SIZE
  302. (p16) LDFD f119 = [Y], 1 * SIZE
  303. }
  304. ;;
  305. { .mmi
  306. (p18) STFD [XX] = f127
  307. (p18) STFD [YY] = f124
  308. (p18) add XX = XX, INCXM1
  309. }
  310. { .mmi
  311. (p16) LDFD f122 = [X], INCXM1
  312. (p16) LDFD f125 = [Y], INCYM1
  313. (p18) add YY = YY, INCYM1
  314. }
  315. { .mmb
  316. (p16) lfetch.excl.nt1 [PRE1], INCX8
  317. (p16) lfetch.excl.nt1 [PRE2], INCY8
  318. br.ctop.sptk.few .L52
  319. }
  320. ;;
  321. .align 32
  322. .L55:
  323. { .mmi
  324. (p12) LDFD f32 = [X], 1 * SIZE
  325. (p12) LDFD f80 = [Y], 1 * SIZE
  326. mov ar.lc = ARLC
  327. }
  328. ;;
  329. { .mmi
  330. (p12) LDFD f33 = [X], INCXM1
  331. (p12) LDFD f81 = [Y], INCYM1
  332. mov pr = PR, -65474
  333. }
  334. ;;
  335. { .mmb
  336. (p12) LDFD f34 = [X], 1 * SIZE
  337. (p12) LDFD f82 = [Y], 1 * SIZE
  338. (p9) br.ret.sptk.many b0
  339. }
  340. ;;
  341. { .mmi
  342. (p12) LDFD f35 = [X], INCXM1
  343. (p12) LDFD f83 = [Y], INCYM1
  344. tbit.z p0, p13 = N, 1
  345. }
  346. ;;
  347. { .mmi
  348. (p12) LDFD f36 = [X], 1 * SIZE
  349. (p12) LDFD f84 = [Y], 1 * SIZE
  350. tbit.z p0, p14 = N, 0
  351. }
  352. ;;
  353. { .mmi
  354. (p12) LDFD f37 = [X], INCXM1
  355. (p12) LDFD f85 = [Y], INCYM1
  356. }
  357. ;;
  358. { .mmi
  359. (p12) STFD [XX] = f80, 1 * SIZE
  360. (p12) STFD [YY] = f32, 1 * SIZE
  361. }
  362. { .mmi
  363. (p12) LDFD f38 = [X], 1 * SIZE
  364. (p12) LDFD f86 = [Y], 1 * SIZE
  365. }
  366. ;;
  367. { .mmi
  368. (p12) STFD [XX] = f81
  369. (p12) STFD [YY] = f33
  370. (p12) add XX = XX, INCXM1
  371. }
  372. { .mmi
  373. (p12) LDFD f39 = [X], INCXM1
  374. (p12) LDFD f87 = [Y], INCYM1
  375. (p12) add YY = YY, INCYM1
  376. }
  377. ;;
  378. { .mmi
  379. (p12) STFD [XX] = f82, 1 * SIZE
  380. (p12) STFD [YY] = f34, 1 * SIZE
  381. }
  382. { .mmi
  383. (p13) LDFD f40 = [X], 1 * SIZE
  384. (p13) LDFD f88 = [Y], 1 * SIZE
  385. }
  386. ;;
  387. { .mmi
  388. (p12) STFD [XX] = f83
  389. (p12) STFD [YY] = f35
  390. (p12) add XX = XX, INCXM1
  391. }
  392. { .mmi
  393. (p13) LDFD f41 = [X], INCXM1
  394. (p13) LDFD f89 = [Y], INCYM1
  395. (p12) add YY = YY, INCYM1
  396. }
  397. ;;
  398. { .mmi
  399. (p12) STFD [XX] = f84, 1 * SIZE
  400. (p12) STFD [YY] = f36, 1 * SIZE
  401. }
  402. { .mmi
  403. (p13) LDFD f42 = [X], 1 * SIZE
  404. (p13) LDFD f90 = [Y], 1 * SIZE
  405. }
  406. ;;
  407. { .mmi
  408. (p12) STFD [XX] = f85
  409. (p12) STFD [YY] = f37
  410. (p12) add XX = XX, INCXM1
  411. }
  412. { .mmi
  413. (p13) LDFD f43 = [X], INCXM1
  414. (p13) LDFD f91 = [Y], INCYM1
  415. (p12) add YY = YY, INCYM1
  416. }
  417. ;;
  418. { .mmi
  419. (p12) STFD [XX] = f86, 1 * SIZE
  420. (p12) STFD [YY] = f38, 1 * SIZE
  421. }
  422. { .mmi
  423. (p14) LDFD f44 = [X], 1 * SIZE
  424. (p14) LDFD f92 = [Y], 1 * SIZE
  425. }
  426. ;;
  427. { .mmi
  428. (p12) STFD [XX] = f87
  429. (p12) STFD [YY] = f39
  430. (p12) add XX = XX, INCXM1
  431. }
  432. { .mmi
  433. (p14) LDFD f45 = [X]
  434. (p14) LDFD f93 = [Y]
  435. (p12) add YY = YY, INCYM1
  436. }
  437. ;;
  438. { .mmi
  439. (p13) STFD [XX] = f88, 1 * SIZE
  440. (p13) STFD [YY] = f40, 1 * SIZE
  441. }
  442. ;;
  443. (p13) STFD [XX] = f89
  444. (p13) add XX = XX, INCXM1
  445. (p13) STFD [YY] = f41
  446. (p13) add YY = YY, INCYM1
  447. ;;
  448. (p13) STFD [XX] = f90, 1 * SIZE
  449. (p13) STFD [YY] = f42, 1 * SIZE
  450. ;;
  451. (p13) STFD [XX] = f91
  452. (p13) add XX = XX, INCXM1
  453. (p13) STFD [YY] = f43
  454. (p13) add YY = YY, INCYM1
  455. ;;
  456. (p14) STFD [XX] = f92, 1 * SIZE
  457. (p14) STFD [YY] = f44, 1 * SIZE
  458. ;;
  459. (p14) STFD [XX] = f93
  460. (p14) STFD [YY] = f45
  461. br.ret.sptk.many b0
  462. ;;
  463. EPILOGUE