You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zamax.S 5.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define STACK 8
  41. #define ARGS 0
  42. #define STACK_M 4 + STACK + ARGS(%esp)
  43. #define STACK_X 8 + STACK + ARGS(%esp)
  44. #define STACK_INCX 12 + STACK + ARGS(%esp)
  45. PROLOGUE
  46. #define M %ebx
  47. #define INCX %esi
  48. #define X %ecx
  49. #define I %edx
  50. #ifndef USE_MIN
  51. #define FMOV fcmovbe
  52. #else
  53. #define FMOV fcmovnbe
  54. #endif
  55. #include "l1param.h"
  56. pushl %esi
  57. pushl %ebx
  58. PROFCODE
  59. movl STACK_M, M
  60. movl STACK_INCX, INCX
  61. movl STACK_X, X
  62. #ifdef F_INTERFACE
  63. movl (M), M
  64. movl (INCX), INCX
  65. #endif
  66. #if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95)
  67. EMMS
  68. #endif
  69. sall $ZBASE_SHIFT, INCX
  70. fldz
  71. testl M, M
  72. jle .L999
  73. testl INCX, INCX
  74. jle .L999
  75. fstp %st(0)
  76. FLD 0 * SIZE(X)
  77. fabs
  78. FLD 1 * SIZE(X)
  79. fabs
  80. faddp %st, %st(1)
  81. addl INCX, X
  82. decl M
  83. jle .L999
  84. cmpl $2 * SIZE, INCX
  85. jne .L40
  86. movl M, I
  87. sarl $2, I
  88. jle .L20
  89. ALIGN_4
  90. .L10:
  91. #ifdef PREFETCH
  92. PREFETCH (PREFETCHSIZE + 0) - PREOFFSET(X)
  93. #endif
  94. FLD 0 * SIZE(X)
  95. fabs
  96. FLD 1 * SIZE(X)
  97. fabs
  98. faddp %st, %st(1)
  99. fcomi %st(1), %st
  100. FMOV %st(1), %st(0)
  101. fstp %st(1)
  102. FLD 2 * SIZE(X)
  103. fabs
  104. FLD 3 * SIZE(X)
  105. fabs
  106. faddp %st, %st(1)
  107. fcomi %st(1), %st
  108. FMOV %st(1), %st(0)
  109. fstp %st(1)
  110. FLD 4 * SIZE(X)
  111. fabs
  112. FLD 5 * SIZE(X)
  113. fabs
  114. faddp %st, %st(1)
  115. fcomi %st(1), %st
  116. FMOV %st(1), %st(0)
  117. fstp %st(1)
  118. FLD 6 * SIZE(X)
  119. fabs
  120. FLD 7 * SIZE(X)
  121. fabs
  122. faddp %st, %st(1)
  123. fcomi %st(1), %st
  124. FMOV %st(1), %st(0)
  125. fstp %st(1)
  126. addl $8 * SIZE, X
  127. decl I
  128. jg .L10
  129. ALIGN_4
  130. .L20:
  131. movl M, I
  132. andl $3, I
  133. jle .L999
  134. ALIGN_4
  135. .L21:
  136. FLD 0 * SIZE(X)
  137. fabs
  138. FLD 1 * SIZE(X)
  139. fabs
  140. faddp %st, %st(1)
  141. fcomi %st(1), %st
  142. FMOV %st(1), %st(0)
  143. fstp %st(1)
  144. addl $2 * SIZE, X
  145. decl I
  146. jg .L21
  147. jmp .L999
  148. ALIGN_4
  149. .L40:
  150. movl M, I
  151. sarl $2, I
  152. jle .L60
  153. ALIGN_4
  154. .L50:
  155. FLD 0 * SIZE(X)
  156. fabs
  157. FLD 1 * SIZE(X)
  158. fabs
  159. addl INCX, X
  160. faddp %st, %st(1)
  161. fcomi %st(1), %st
  162. FMOV %st(1), %st(0)
  163. fstp %st(1)
  164. FLD 0 * SIZE(X)
  165. fabs
  166. FLD 1 * SIZE(X)
  167. fabs
  168. addl INCX, X
  169. faddp %st, %st(1)
  170. fcomi %st(1), %st
  171. FMOV %st(1), %st(0)
  172. fstp %st(1)
  173. FLD 0 * SIZE(X)
  174. fabs
  175. FLD 1 * SIZE(X)
  176. fabs
  177. addl INCX, X
  178. faddp %st, %st(1)
  179. fcomi %st(1), %st
  180. FMOV %st(1), %st(0)
  181. fstp %st(1)
  182. FLD 0 * SIZE(X)
  183. fabs
  184. FLD 1 * SIZE(X)
  185. fabs
  186. addl INCX, X
  187. faddp %st, %st(1)
  188. fcomi %st(1), %st
  189. FMOV %st(1), %st(0)
  190. fstp %st(1)
  191. decl I
  192. jg .L50
  193. ALIGN_4
  194. .L60:
  195. movl M, I
  196. andl $3, I
  197. jle .L999
  198. ALIGN_4
  199. .L61:
  200. FLD 0 * SIZE(X)
  201. fabs
  202. FLD 1 * SIZE(X)
  203. fabs
  204. faddp %st, %st(1)
  205. fcomi %st(1), %st
  206. FMOV %st(1), %st(0)
  207. fstp %st(1)
  208. addl INCX, X
  209. decl I
  210. jg .L61
  211. ALIGN_4
  212. .L999:
  213. popl %ebx
  214. popl %esi
  215. ret
  216. EPILOGUE