You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zrot_ppc440.S 7.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define X r4
  42. #define INCX r5
  43. #define Y r6
  44. #define INCY r7
  45. #define PRE r8
  46. #define XX r9
  47. #define YY r10
  48. #define INCXM1 r11
  49. #define INCYM1 r12
  50. #define C f1
  51. #define S f2
  52. #define STACKSIZE 32
  53. PROLOGUE
  54. PROFCODE
  55. addi SP, SP, -STACKSIZE
  56. li r0, 0
  57. stfd f14, 0(SP)
  58. stfd f15, 8(SP)
  59. stfd f16, 16(SP)
  60. stfd f17, 24(SP)
  61. slwi INCX, INCX, ZBASE_SHIFT
  62. slwi INCY, INCY, ZBASE_SHIFT
  63. subi INCXM1, INCX, SIZE
  64. subi INCYM1, INCY, SIZE
  65. li PRE, 2 * 16 * SIZE
  66. cmpwi cr0, N, 0
  67. ble- LL(999)
  68. sub X, X, INCXM1
  69. sub Y, Y, INCYM1
  70. mr XX, X
  71. mr YY, Y
  72. srawi. r0, N, 2
  73. mtspr CTR, r0
  74. beq- LL(150)
  75. LFDX f0, X, INCXM1
  76. LFDX f3, Y, INCYM1
  77. LFDUX f4, X, INCX
  78. FMUL f10, C, f0
  79. LFDUX f5, Y, INCY
  80. FMUL f11, C, f3
  81. LFDX f6, X, INCXM1
  82. FMUL f12, C, f4
  83. LFDX f7, Y, INCYM1
  84. FMUL f13, C, f5
  85. LFDUX f8, X, INCX
  86. FMADD f10, S, f3, f10
  87. LFDUX f9, Y, INCY
  88. FNMSUB f11, S, f0, f11
  89. LFDX f0, X, INCXM1
  90. FMADD f12, S, f5, f12
  91. LFDX f3, Y, INCYM1
  92. FNMSUB f13, S, f4, f13
  93. LFDUX f4, X, INCX
  94. bdz LL(111)
  95. .align 4
  96. LL(110):
  97. FMUL f14, C, f6
  98. LFDUX f5, Y, INCY
  99. FMUL f15, C, f7
  100. STFDX f10, XX, INCXM1
  101. FMUL f16, C, f8
  102. STFDX f11, YY, INCYM1
  103. FMUL f17, C, f9
  104. STFDUX f12, XX, INCX
  105. #ifdef PPCG4
  106. dcbtst X, PRE
  107. #endif
  108. FMADD f14, S, f7, f14
  109. STFDUX f13, YY, INCY
  110. FNMSUB f15, S, f6, f15
  111. LFDX f6, X, INCXM1
  112. FMADD f16, S, f9, f16
  113. LFDX f7, Y, INCYM1
  114. FNMSUB f17, S, f8, f17
  115. LFDUX f8, X, INCX
  116. FMUL f10, C, f0
  117. LFDUX f9, Y, INCY
  118. FMUL f11, C, f3
  119. STFDX f14, XX, INCXM1
  120. FMUL f12, C, f4
  121. STFDX f15, YY, INCYM1
  122. FMUL f13, C, f5
  123. STFDUX f16, XX, INCX
  124. #ifdef PPCG4
  125. dcbtst Y, PRE
  126. #endif
  127. FMADD f10, S, f3, f10
  128. STFDUX f17, YY, INCY
  129. FNMSUB f11, S, f0, f11
  130. LFDX f0, X, INCXM1
  131. FMADD f12, S, f5, f12
  132. LFDX f3, Y, INCYM1
  133. FNMSUB f13, S, f4, f13
  134. LFDUX f4, X, INCX
  135. FMUL f14, C, f6
  136. LFDUX f5, Y, INCY
  137. FMUL f15, C, f7
  138. STFDX f10, XX, INCXM1
  139. FMUL f16, C, f8
  140. STFDX f11, YY, INCYM1
  141. FMUL f17, C, f9
  142. STFDUX f12, XX, INCX
  143. #if defined(PPCG4) && defined(DOUBLE)
  144. dcbt X, PRE
  145. #endif
  146. FMADD f14, S, f7, f14
  147. STFDUX f13, YY, INCY
  148. FNMSUB f15, S, f6, f15
  149. LFDX f6, X, INCXM1
  150. FMADD f16, S, f9, f16
  151. LFDX f7, Y, INCYM1
  152. FNMSUB f17, S, f8, f17
  153. LFDUX f8, X, INCX
  154. FMUL f10, C, f0
  155. STFDX f14, XX, INCXM1
  156. FMUL f11, C, f3
  157. STFDX f15, YY, INCYM1
  158. FMUL f12, C, f4
  159. STFDUX f16, XX, INCX
  160. FMUL f13, C, f5
  161. STFDUX f17, YY, INCY
  162. #if defined(PPCG4) && defined(DOUBLE)
  163. dcbtst Y, PRE
  164. #endif
  165. FMADD f10, S, f3, f10
  166. LFDUX f9, Y, INCY
  167. FNMSUB f11, S, f0, f11
  168. LFDX f0, X, INCXM1
  169. FMADD f12, S, f5, f12
  170. LFDX f3, Y, INCYM1
  171. FNMSUB f13, S, f4, f13
  172. LFDUX f4, X, INCX
  173. bdnz LL(110)
  174. .align 4
  175. LL(111):
  176. FMUL f14, C, f6
  177. LFDUX f5, Y, INCY
  178. FMUL f15, C, f7
  179. STFDX f10, XX, INCXM1
  180. FMUL f16, C, f8
  181. STFDX f11, YY, INCYM1
  182. FMUL f17, C, f9
  183. STFDUX f12, XX, INCX
  184. FMADD f14, S, f7, f14
  185. STFDUX f13, YY, INCY
  186. FNMSUB f15, S, f6, f15
  187. LFDX f6, X, INCXM1
  188. FMADD f16, S, f9, f16
  189. LFDX f7, Y, INCYM1
  190. FNMSUB f17, S, f8, f17
  191. LFDUX f8, X, INCX
  192. FMUL f10, C, f0
  193. LFDUX f9, Y, INCY
  194. FMUL f11, C, f3
  195. STFDX f14, XX, INCXM1
  196. FMUL f12, C, f4
  197. STFDX f15, YY, INCYM1
  198. FMUL f13, C, f5
  199. STFDUX f16, XX, INCX
  200. FMADD f10, S, f3, f10
  201. STFDUX f17, YY, INCY
  202. FNMSUB f11, S, f0, f11
  203. FMADD f12, S, f5, f12
  204. FNMSUB f13, S, f4, f13
  205. FMUL f14, C, f6
  206. STFDX f10, XX, INCXM1
  207. FMUL f15, C, f7
  208. STFDX f11, YY, INCYM1
  209. FMUL f16, C, f8
  210. STFDUX f12, XX, INCX
  211. FMUL f17, C, f9
  212. STFDUX f13, YY, INCY
  213. FMADD f14, S, f7, f14
  214. FNMSUB f15, S, f6, f15
  215. FMADD f16, S, f9, f16
  216. FNMSUB f17, S, f8, f17
  217. STFDX f14, XX, INCXM1
  218. STFDX f15, YY, INCYM1
  219. STFDUX f16, XX, INCX
  220. STFDUX f17, YY, INCY
  221. .align 4
  222. LL(150):
  223. andi. r0, N, 3
  224. mtspr CTR, r0
  225. beq LL(999)
  226. .align 4
  227. LL(160):
  228. LFDX f0, X, INCXM1
  229. LFDX f3, Y, INCYM1
  230. LFDUX f4, X, INCX
  231. LFDUX f5, Y, INCY
  232. FMUL f10, C, f0
  233. FMUL f11, C, f3
  234. FMUL f12, C, f4
  235. FMUL f13, C, f5
  236. FMADD f10, S, f3, f10
  237. FNMSUB f11, S, f0, f11
  238. FMADD f12, S, f5, f12
  239. FNMSUB f13, S, f4, f13
  240. STFDX f10, XX, INCXM1
  241. STFDX f11, YY, INCYM1
  242. STFDUX f12, XX, INCX
  243. STFDUX f13, YY, INCY
  244. bdnz LL(160)
  245. .align 4
  246. LL(999):
  247. lfd f14, 0(SP)
  248. lfd f15, 8(SP)
  249. lfd f16, 16(SP)
  250. lfd f17, 24(SP)
  251. addi SP, SP, STACKSIZE
  252. blr
  253. EPILOGUE