You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

scal_ppc440.S 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N r3
  41. #define XX r4
  42. #define PRE r5
  43. #if defined(linux) || defined(__FreeBSD__)
  44. #ifndef __64BIT__
  45. #define X r6
  46. #define INCX r7
  47. #else
  48. #define X r7
  49. #define INCX r8
  50. #endif
  51. #endif
  52. #if defined(_AIX) || defined(__APPLE__)
  53. #if !defined(__64BIT__) && defined(DOUBLE)
  54. #define X r8
  55. #define INCX r9
  56. #else
  57. #define X r7
  58. #define INCX r8
  59. #endif
  60. #endif
  61. #define FZERO f0
  62. #define ALPHA f1
  63. PROLOGUE
  64. PROFCODE
  65. addi SP, SP, -8
  66. li r0, 0
  67. stw r0, 0(SP)
  68. lfs FZERO, 0(SP)
  69. addi SP, SP, 8
  70. slwi INCX, INCX, BASE_SHIFT
  71. li PRE, 3 * 16 * SIZE
  72. cmpwi cr0, N, 0
  73. blelr- cr0
  74. sub X, X, INCX
  75. fcmpu cr0, FZERO, ALPHA
  76. bne- cr0, LL(A1I1)
  77. srawi. r0, N, 4
  78. mtspr CTR, r0
  79. beq- cr0, LL(A0I1_Remain)
  80. .align 4
  81. LL(A0I1_kernel):
  82. #ifdef PPCG4
  83. dcbtst X, PRE
  84. #endif
  85. STFDUX FZERO, X, INCX
  86. STFDUX FZERO, X, INCX
  87. STFDUX FZERO, X, INCX
  88. STFDUX FZERO, X, INCX
  89. #if defined(PPCG4) && defined(DOUBLE)
  90. dcbtst X, PRE
  91. #endif
  92. STFDUX FZERO, X, INCX
  93. STFDUX FZERO, X, INCX
  94. STFDUX FZERO, X, INCX
  95. STFDUX FZERO, X, INCX
  96. #ifdef PPCG4
  97. dcbtst X, PRE
  98. #endif
  99. STFDUX FZERO, X, INCX
  100. STFDUX FZERO, X, INCX
  101. STFDUX FZERO, X, INCX
  102. STFDUX FZERO, X, INCX
  103. #if defined(PPCG4) && defined(DOUBLE)
  104. dcbtst X, PRE
  105. #endif
  106. STFDUX FZERO, X, INCX
  107. STFDUX FZERO, X, INCX
  108. STFDUX FZERO, X, INCX
  109. STFDUX FZERO, X, INCX
  110. bdnz LL(A0I1_kernel)
  111. .align 4
  112. LL(A0I1_Remain):
  113. andi. r0, N, 15
  114. mtspr CTR, r0
  115. beqlr+
  116. .align 4
  117. LL(A0I1_RemainKernel):
  118. STFDUX FZERO, X, INCX
  119. bdnz LL(A0I1_RemainKernel)
  120. blr
  121. .align 4
  122. LL(A1I1):
  123. mr XX, X
  124. srawi. r0, N, 3
  125. mtspr CTR, r0
  126. beq+ LL(A1I1_Remain)
  127. LFDUX f2, X, INCX
  128. LFDUX f3, X, INCX
  129. LFDUX f4, X, INCX
  130. LFDUX f5, X, INCX
  131. bdz LL(12)
  132. .align 4
  133. LL(11):
  134. LFDUX f6, X, INCX
  135. FMUL f2, ALPHA, f2
  136. LFDUX f7, X, INCX
  137. FMUL f3, ALPHA, f3
  138. LFDUX f8, X, INCX
  139. FMUL f4, ALPHA, f4
  140. LFDUX f9, X, INCX
  141. FMUL f5, ALPHA, f5
  142. #ifdef PPCG4
  143. dcbtst X, PRE
  144. #endif
  145. STFDUX f2, XX, INCX
  146. STFDUX f3, XX, INCX
  147. STFDUX f4, XX, INCX
  148. STFDUX f5, XX, INCX
  149. LFDUX f2, X, INCX
  150. FMUL f6, ALPHA, f6
  151. LFDUX f3, X, INCX
  152. FMUL f7, ALPHA, f7
  153. LFDUX f4, X, INCX
  154. FMUL f8, ALPHA, f8
  155. LFDUX f5, X, INCX
  156. FMUL f9, ALPHA, f9
  157. STFDUX f6, XX, INCX
  158. STFDUX f7, XX, INCX
  159. STFDUX f8, XX, INCX
  160. STFDUX f9, XX, INCX
  161. #if defined(PPCG4) && defined(DOUBLE)
  162. dcbtst X, PRE
  163. #endif
  164. bdnz LL(11)
  165. .align 4
  166. LL(12):
  167. LFDUX f6, X, INCX
  168. FMUL f2, ALPHA, f2
  169. LFDUX f7, X, INCX
  170. FMUL f3, ALPHA, f3
  171. LFDUX f8, X, INCX
  172. FMUL f4, ALPHA, f4
  173. LFDUX f9, X, INCX
  174. FMUL f5, ALPHA, f5
  175. STFDUX f2, XX, INCX
  176. FMUL f6, ALPHA, f6
  177. STFDUX f3, XX, INCX
  178. FMUL f7, ALPHA, f7
  179. STFDUX f4, XX, INCX
  180. FMUL f8, ALPHA, f8
  181. STFDUX f5, XX, INCX
  182. FMUL f9, ALPHA, f9
  183. STFDUX f6, XX, INCX
  184. STFDUX f7, XX, INCX
  185. STFDUX f8, XX, INCX
  186. STFDUX f9, XX, INCX
  187. .align 4
  188. LL(A1I1_Remain):
  189. andi. r0, N, 7
  190. mtspr CTR, r0
  191. beqlr+
  192. .align 4
  193. LL(A1I1_RemainKernel):
  194. LFDUX f2, X, INCX
  195. FMUL f2, ALPHA, f2
  196. STFDUX f2, XX, INCX
  197. bdnz LL(A1I1_RemainKernel)
  198. blr
  199. .align 4
  200. EPILOGUE