You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rot_ppc440.S 6.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define X r4
  42. #define INCX r5
  43. #define Y r6
  44. #define INCY r7
  45. #define PRE r8
  46. #define XX r9
  47. #define YY r10
  48. #define C f1
  49. #define S f2
  50. #define STACKSIZE 32
  51. PROLOGUE
  52. PROFCODE
  53. addi SP, SP, -STACKSIZE
  54. li r0, 0
  55. stfd f14, 0(SP)
  56. stfd f15, 8(SP)
  57. stfd f16, 16(SP)
  58. stfd f17, 24(SP)
  59. slwi INCX, INCX, BASE_SHIFT
  60. slwi INCY, INCY, BASE_SHIFT
  61. li PRE, 2 * 16 * SIZE
  62. cmpwi cr0, N, 0
  63. ble- LL(999)
  64. sub X, X, INCX
  65. sub Y, Y, INCY
  66. mr XX, X
  67. mr YY, Y
  68. srawi. r0, N, 3
  69. mtspr CTR, r0
  70. beq- LL(150)
  71. LFDUX f0, X, INCX
  72. LFDUX f3, Y, INCY
  73. LFDUX f4, X, INCX
  74. FMUL f10, C, f0
  75. LFDUX f5, Y, INCY
  76. FMUL f11, C, f3
  77. LFDUX f6, X, INCX
  78. FMUL f12, C, f4
  79. LFDUX f7, Y, INCY
  80. FMUL f13, C, f5
  81. LFDUX f8, X, INCX
  82. FMADD f10, S, f3, f10
  83. LFDUX f9, Y, INCY
  84. FNMSUB f11, S, f0, f11
  85. LFDUX f0, X, INCX
  86. FMADD f12, S, f5, f12
  87. LFDUX f3, Y, INCY
  88. FNMSUB f13, S, f4, f13
  89. LFDUX f4, X, INCX
  90. bdz LL(111)
  91. .align 4
  92. LL(110):
  93. FMUL f14, C, f6
  94. LFDUX f5, Y, INCY
  95. FMUL f15, C, f7
  96. STFDUX f10, XX, INCX
  97. FMUL f16, C, f8
  98. STFDUX f11, YY, INCY
  99. FMUL f17, C, f9
  100. STFDUX f12, XX, INCX
  101. #ifdef PPCG4
  102. dcbtst X, PRE
  103. #endif
  104. FMADD f14, S, f7, f14
  105. STFDUX f13, YY, INCY
  106. FNMSUB f15, S, f6, f15
  107. LFDUX f6, X, INCX
  108. FMADD f16, S, f9, f16
  109. LFDUX f7, Y, INCY
  110. FNMSUB f17, S, f8, f17
  111. LFDUX f8, X, INCX
  112. FMUL f10, C, f0
  113. LFDUX f9, Y, INCY
  114. FMUL f11, C, f3
  115. STFDUX f14, XX, INCX
  116. FMUL f12, C, f4
  117. STFDUX f15, YY, INCY
  118. FMUL f13, C, f5
  119. STFDUX f16, XX, INCX
  120. #ifdef PPCG4
  121. dcbtst Y, PRE
  122. #endif
  123. FMADD f10, S, f3, f10
  124. STFDUX f17, YY, INCY
  125. FNMSUB f11, S, f0, f11
  126. LFDUX f0, X, INCX
  127. FMADD f12, S, f5, f12
  128. LFDUX f3, Y, INCY
  129. FNMSUB f13, S, f4, f13
  130. LFDUX f4, X, INCX
  131. FMUL f14, C, f6
  132. LFDUX f5, Y, INCY
  133. FMUL f15, C, f7
  134. STFDUX f10, XX, INCX
  135. FMUL f16, C, f8
  136. STFDUX f11, YY, INCY
  137. FMUL f17, C, f9
  138. STFDUX f12, XX, INCX
  139. #if defined(PPCG4) && defined(DOUBLE)
  140. dcbt X, PRE
  141. #endif
  142. FMADD f14, S, f7, f14
  143. STFDUX f13, YY, INCY
  144. FNMSUB f15, S, f6, f15
  145. LFDUX f6, X, INCX
  146. FMADD f16, S, f9, f16
  147. LFDUX f7, Y, INCY
  148. FNMSUB f17, S, f8, f17
  149. LFDUX f8, X, INCX
  150. FMUL f10, C, f0
  151. LFDUX f9, Y, INCY
  152. FMUL f11, C, f3
  153. STFDUX f14, XX, INCX
  154. FMUL f12, C, f4
  155. STFDUX f15, YY, INCY
  156. FMUL f13, C, f5
  157. STFDUX f16, XX, INCX
  158. #if defined(PPCG4) && defined(DOUBLE)
  159. dcbtst Y, PRE
  160. #endif
  161. FMADD f10, S, f3, f10
  162. STFDUX f17, YY, INCY
  163. FNMSUB f11, S, f0, f11
  164. LFDUX f0, X, INCX
  165. FMADD f12, S, f5, f12
  166. LFDUX f3, Y, INCY
  167. FNMSUB f13, S, f4, f13
  168. LFDUX f4, X, INCX
  169. bdnz LL(110)
  170. .align 4
  171. LL(111):
  172. FMUL f14, C, f6
  173. LFDUX f5, Y, INCY
  174. FMUL f15, C, f7
  175. STFDUX f10, XX, INCX
  176. FMUL f16, C, f8
  177. STFDUX f11, YY, INCY
  178. FMUL f17, C, f9
  179. STFDUX f12, XX, INCX
  180. FMADD f14, S, f7, f14
  181. STFDUX f13, YY, INCY
  182. FNMSUB f15, S, f6, f15
  183. LFDUX f6, X, INCX
  184. FMADD f16, S, f9, f16
  185. LFDUX f7, Y, INCY
  186. FNMSUB f17, S, f8, f17
  187. LFDUX f8, X, INCX
  188. FMUL f10, C, f0
  189. LFDUX f9, Y, INCY
  190. FMUL f11, C, f3
  191. STFDUX f14, XX, INCX
  192. FMUL f12, C, f4
  193. STFDUX f15, YY, INCY
  194. FMUL f13, C, f5
  195. STFDUX f16, XX, INCX
  196. FMUL f14, C, f6
  197. STFDUX f17, YY, INCY
  198. FMUL f15, C, f7
  199. FMUL f16, C, f8
  200. FMUL f17, C, f9
  201. FMADD f10, S, f3, f10
  202. FNMSUB f11, S, f0, f11
  203. FMADD f12, S, f5, f12
  204. FNMSUB f13, S, f4, f13
  205. FMADD f14, S, f7, f14
  206. STFDUX f10, XX, INCX
  207. FNMSUB f15, S, f6, f15
  208. STFDUX f11, YY, INCY
  209. FMADD f16, S, f9, f16
  210. STFDUX f12, XX, INCX
  211. FNMSUB f17, S, f8, f17
  212. STFDUX f13, YY, INCY
  213. STFDUX f14, XX, INCX
  214. STFDUX f15, YY, INCY
  215. STFDUX f16, XX, INCX
  216. STFDUX f17, YY, INCY
  217. .align 4
  218. LL(150):
  219. andi. r0, N, 7
  220. mtspr CTR, r0
  221. beq LL(999)
  222. .align 4
  223. LL(160):
  224. LFDUX f0, X, INCX
  225. LFDUX f3, Y, INCY
  226. FMUL f10, C, f0
  227. FMUL f11, C, f3
  228. FMADD f10, S, f3, f10
  229. FNMSUB f11, S, f0, f11
  230. STFDUX f10, XX, INCX
  231. STFDUX f11, YY, INCY
  232. bdnz LL(160)
  233. .align 4
  234. LL(999):
  235. lfd f14, 0(SP)
  236. lfd f15, 8(SP)
  237. lfd f16, 16(SP)
  238. lfd f17, 24(SP)
  239. addi SP, SP, STACKSIZE
  240. blr
  241. EPILOGUE