You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

asum_ppc440.S 6.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define X r4
  42. #define INCX r5
  43. #define PREX r6
  44. #define ATTR r7
  45. #define FZERO f0
  46. #define STACKSIZE 160
  47. PROLOGUE
  48. PROFCODE
  49. addi SP, SP, -STACKSIZE
  50. li r0, 0
  51. stfd f14, 0(SP)
  52. stfd f15, 8(SP)
  53. stfd f16, 16(SP)
  54. stfd f17, 24(SP)
  55. stfd f18, 32(SP)
  56. stfd f19, 40(SP)
  57. stfd f20, 48(SP)
  58. stfd f21, 56(SP)
  59. stfd f22, 64(SP)
  60. stfd f23, 72(SP)
  61. stfd f24, 80(SP)
  62. stfd f25, 88(SP)
  63. stfd f26, 96(SP)
  64. stfd f27, 104(SP)
  65. stfd f28, 112(SP)
  66. stfd f29, 120(SP)
  67. stfd f30, 128(SP)
  68. stfd f31, 136(SP)
  69. stw r0, 144(SP)
  70. lfs FZERO,144(SP)
  71. #ifdef F_INTERFACE
  72. LDINT N, 0(N)
  73. LDINT INCX, 0(INCX)
  74. #endif
  75. slwi INCX, INCX, BASE_SHIFT
  76. fmr f1, FZERO
  77. li PREX, 3 * 16 * SIZE
  78. fmr f2, FZERO
  79. sub X, X, INCX
  80. fmr f3, FZERO
  81. fmr f4, FZERO
  82. fmr f5, FZERO
  83. fmr f6, FZERO
  84. cmpwi cr0, N, 0
  85. fmr f7, FZERO
  86. ble- LL(999)
  87. cmpwi cr0, INCX, 0
  88. ble- LL(999)
  89. srawi. r0, N, 4
  90. mtspr CTR, r0
  91. beq- LL(150)
  92. LFDUX f8, X, INCX
  93. LFDUX f9, X, INCX
  94. LFDUX f10, X, INCX
  95. LFDUX f11, X, INCX
  96. LFDUX f12, X, INCX
  97. LFDUX f13, X, INCX
  98. LFDUX f14, X, INCX
  99. LFDUX f15, X, INCX
  100. fabs f16, f8
  101. LFDUX f24, X, INCX
  102. fabs f17, f9
  103. LFDUX f25, X, INCX
  104. fabs f18, f10
  105. LFDUX f26, X, INCX
  106. fabs f19, f11
  107. LFDUX f27, X, INCX
  108. fabs f20, f12
  109. LFDUX f28, X, INCX
  110. fabs f21, f13
  111. LFDUX f29, X, INCX
  112. fabs f22, f14
  113. LFDUX f30, X, INCX
  114. fabs f23, f15
  115. LFDUX f31, X, INCX
  116. bdz LL(120)
  117. .align 4
  118. LL(110):
  119. LFDUX f8, X, INCX
  120. FADD f0, f0, f16
  121. #ifdef PPCG4
  122. dcbt X, PREX
  123. #else
  124. nop
  125. #endif
  126. fabs f16, f24
  127. LFDUX f9, X, INCX
  128. FADD f1, f1, f17
  129. nop
  130. fabs f17, f25
  131. LFDUX f10, X, INCX
  132. FADD f2, f2, f18
  133. nop
  134. fabs f18, f26
  135. LFDUX f11, X, INCX
  136. FADD f3, f3, f19
  137. nop
  138. fabs f19, f27
  139. LFDUX f12, X, INCX
  140. FADD f4, f4, f20
  141. #if defined(PPCG4) && defined(DOUBLE)
  142. dcbt X, PREX
  143. #else
  144. nop
  145. #endif
  146. fabs f20, f28
  147. LFDUX f13, X, INCX
  148. FADD f5, f5, f21
  149. nop
  150. fabs f21, f29
  151. LFDUX f14, X, INCX
  152. FADD f6, f6, f22
  153. nop
  154. fabs f22, f30
  155. LFDUX f15, X, INCX
  156. FADD f7, f7, f23
  157. nop
  158. fabs f23, f31
  159. LFDUX f24, X, INCX
  160. FADD f0, f0, f16
  161. #ifdef PPCG4
  162. dcbt X, PREX
  163. #else
  164. nop
  165. #endif
  166. fabs f16, f8
  167. LFDUX f25, X, INCX
  168. FADD f1, f1, f17
  169. nop
  170. fabs f17, f9
  171. LFDUX f26, X, INCX
  172. FADD f2, f2, f18
  173. nop
  174. fabs f18, f10
  175. LFDUX f27, X, INCX
  176. FADD f3, f3, f19
  177. nop
  178. fabs f19, f11
  179. LFDUX f28, X, INCX
  180. FADD f4, f4, f20
  181. #if defined(PPCG4) && defined(DOUBLE)
  182. dcbt X, PREX
  183. #else
  184. nop
  185. #endif
  186. fabs f20, f12
  187. LFDUX f29, X, INCX
  188. FADD f5, f5, f21
  189. nop
  190. fabs f21, f13
  191. LFDUX f30, X, INCX
  192. FADD f6, f6, f22
  193. nop
  194. fabs f22, f14
  195. LFDUX f31, X, INCX
  196. FADD f7, f7, f23
  197. fabs f23, f15
  198. bdnz LL(110)
  199. .align 4
  200. LL(120):
  201. FADD f0, f0, f16
  202. fabs f16, f24
  203. FADD f1, f1, f17
  204. fabs f17, f25
  205. FADD f2, f2, f18
  206. fabs f18, f26
  207. FADD f3, f3, f19
  208. fabs f19, f27
  209. FADD f4, f4, f20
  210. fabs f20, f28
  211. FADD f5, f5, f21
  212. fabs f21, f29
  213. FADD f6, f6, f22
  214. fabs f22, f30
  215. FADD f7, f7, f23
  216. fabs f23, f31
  217. FADD f0, f0, f16
  218. FADD f1, f1, f17
  219. FADD f2, f2, f18
  220. FADD f3, f3, f19
  221. FADD f4, f4, f20
  222. FADD f5, f5, f21
  223. FADD f6, f6, f22
  224. FADD f7, f7, f23
  225. .align 4
  226. LL(150):
  227. andi. r0, N, 15
  228. mtspr CTR, r0
  229. beq LL(999)
  230. .align 4
  231. LL(160):
  232. LFDUX f8, X, INCX
  233. fabs f8, f8
  234. FADD f0, f0, f8
  235. bdnz LL(160)
  236. .align 4
  237. LL(999):
  238. FADD f0, f0, f1
  239. FADD f2, f2, f3
  240. FADD f4, f4, f5
  241. FADD f6, f6, f7
  242. FADD f0, f0, f2
  243. FADD f4, f4, f6
  244. FADD f1, f0, f4
  245. lfd f14, 0(SP)
  246. lfd f15, 8(SP)
  247. lfd f16, 16(SP)
  248. lfd f17, 24(SP)
  249. lfd f18, 32(SP)
  250. lfd f19, 40(SP)
  251. lfd f20, 48(SP)
  252. lfd f21, 56(SP)
  253. lfd f22, 64(SP)
  254. lfd f23, 72(SP)
  255. lfd f24, 80(SP)
  256. lfd f25, 88(SP)
  257. lfd f26, 96(SP)
  258. lfd f27, 104(SP)
  259. lfd f28, 112(SP)
  260. lfd f29, 120(SP)
  261. lfd f30, 128(SP)
  262. lfd f31, 136(SP)
  263. addi SP, SP, STACKSIZE
  264. blr
  265. EPILOGUE