You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zdot.S 6.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define STACK 12
  41. #define ARGS 0
  42. #if defined(DOUBLE) || defined(XDOUBLE)
  43. #define RESULT 4 + STACK + ARGS(%esp)
  44. #define STACK_N 8 + STACK + ARGS(%esp)
  45. #define STACK_X 12 + STACK + ARGS(%esp)
  46. #define STACK_INCX 16 + STACK + ARGS(%esp)
  47. #define STACK_Y 20 + STACK + ARGS(%esp)
  48. #define STACK_INCY 24 + STACK + ARGS(%esp)
  49. #else
  50. #define STACK_N 4 + STACK + ARGS(%esp)
  51. #define STACK_X 8 + STACK + ARGS(%esp)
  52. #define STACK_INCX 12 + STACK + ARGS(%esp)
  53. #define STACK_Y 16 + STACK + ARGS(%esp)
  54. #define STACK_INCY 20 + STACK + ARGS(%esp)
  55. #endif
  56. #define N %ebx
  57. #define X %esi
  58. #define INCX %ecx
  59. #define Y %edi
  60. #define INCY %edx
  61. #include "l1param.h"
  62. PROLOGUE
  63. PROFCODE
  64. pushl %edi
  65. pushl %esi
  66. pushl %ebx
  67. #if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95)
  68. EMMS
  69. #endif
  70. movl STACK_N, N
  71. movl STACK_X, X
  72. movl STACK_INCX, INCX
  73. movl STACK_Y, Y
  74. movl STACK_INCY, INCY
  75. #ifdef F_INTERFACE
  76. movl (N),N
  77. movl (INCX),INCX
  78. movl (INCY),INCY
  79. #endif
  80. #if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95)
  81. EMMS
  82. #endif
  83. testl N, N
  84. jle .L88
  85. addl INCX, INCX
  86. fldz
  87. addl INCY, INCY
  88. fldz
  89. leal (, INCX, SIZE), INCX
  90. fldz
  91. leal (, INCY, SIZE), INCY
  92. fldz
  93. cmpl $2 * SIZE, INCX
  94. jne .L14
  95. cmpl $2 * SIZE, INCY
  96. jne .L14
  97. movl N, %eax
  98. sarl $1, %eax
  99. jle .L15
  100. ALIGN_3
  101. .L16:
  102. FLD 0 * SIZE(X)
  103. FLD 0 * SIZE(Y)
  104. fmul %st(1), %st
  105. faddp %st, %st(2)
  106. FMUL 1 * SIZE(Y)
  107. faddp %st, %st(2)
  108. FLD 1 * SIZE(X)
  109. FLD 0 * SIZE(Y)
  110. fmul %st(1), %st
  111. faddp %st, %st(4)
  112. FMUL 1 * SIZE(Y)
  113. faddp %st, %st(4)
  114. FLD 2 * SIZE(X)
  115. FLD 2 * SIZE(Y)
  116. fmul %st(1), %st
  117. faddp %st, %st(2)
  118. FMUL 3 * SIZE(Y)
  119. faddp %st, %st(2)
  120. FLD 3 * SIZE(X)
  121. FLD 2 * SIZE(Y)
  122. fmul %st(1), %st
  123. faddp %st, %st(4)
  124. FMUL 3 * SIZE(Y)
  125. faddp %st, %st(4)
  126. addl $4 * SIZE, X
  127. addl $4 * SIZE, Y
  128. decl %eax
  129. jg .L16
  130. ALIGN_3
  131. .L15:
  132. movl N, %eax
  133. andl $1, %eax
  134. jle .L27
  135. ALIGN_3
  136. .L22:
  137. FLD 0 * SIZE(X)
  138. FLD 0 * SIZE(Y)
  139. fmul %st(1), %st
  140. faddp %st, %st(2)
  141. FMUL 1 * SIZE(Y)
  142. faddp %st, %st(2)
  143. FLD 1 * SIZE(X)
  144. FLD 0 * SIZE(Y)
  145. fmul %st(1), %st
  146. faddp %st, %st(4)
  147. FMUL 1 * SIZE(Y)
  148. faddp %st, %st(4)
  149. jmp .L27
  150. ALIGN_3
  151. .L14:
  152. movl N, %eax
  153. sarl $1, %eax
  154. jle .L30
  155. ALIGN_3
  156. .L31:
  157. FLD 0 * SIZE(X)
  158. FLD 0 * SIZE(Y)
  159. fmul %st(1), %st
  160. faddp %st, %st(2)
  161. FMUL 1 * SIZE(Y)
  162. faddp %st, %st(2)
  163. FLD 1 * SIZE(X)
  164. FLD 0 * SIZE(Y)
  165. fmul %st(1), %st
  166. faddp %st, %st(4)
  167. FMUL 1 * SIZE(Y)
  168. faddp %st, %st(4)
  169. addl INCX, X
  170. FLD 0 * SIZE(X)
  171. addl INCY, Y
  172. FLD 0 * SIZE(Y)
  173. fmul %st(1), %st
  174. faddp %st, %st(2)
  175. FMUL 1 * SIZE(Y)
  176. faddp %st, %st(2)
  177. FLD 1 * SIZE(X)
  178. FLD 0 * SIZE(Y)
  179. fmul %st(1), %st
  180. faddp %st, %st(4)
  181. FMUL 1 * SIZE(Y)
  182. faddp %st, %st(4)
  183. addl INCX, X
  184. addl INCY, Y
  185. decl %eax
  186. jg .L31
  187. ALIGN_3
  188. .L30:
  189. movl N, %eax
  190. andl $1, %eax
  191. jle .L27
  192. ALIGN_3
  193. .L37:
  194. FLD 0 * SIZE(X)
  195. FLD 0 * SIZE(Y)
  196. fmul %st(1), %st
  197. faddp %st, %st(2)
  198. FMUL 1 * SIZE(Y)
  199. faddp %st, %st(2)
  200. FLD 1 * SIZE(X)
  201. FLD 0 * SIZE(Y)
  202. fmul %st(1), %st
  203. faddp %st, %st(4)
  204. FMUL 1 * SIZE(Y)
  205. faddp %st, %st(4)
  206. ALIGN_3
  207. .L27:
  208. #if defined(DOUBLE) || defined(XDOUBLE)
  209. movl RESULT, %eax
  210. #endif
  211. #ifndef CONJ
  212. fsubp %st, %st(3)
  213. faddp %st, %st(1)
  214. #else
  215. faddp %st, %st(3)
  216. fsubp %st, %st(1)
  217. #endif
  218. #if !defined(DOUBLE) && !defined(XDOUBLE)
  219. subl $2 * SIZE, %esp
  220. FST 1 * SIZE(%esp)
  221. FST 0 * SIZE(%esp)
  222. movl 0 * SIZE(%esp), %eax
  223. movl 1 * SIZE(%esp), %edx
  224. addl $2 * SIZE, %esp
  225. #else
  226. FST 1 * SIZE(%eax)
  227. FST 0 * SIZE(%eax)
  228. #endif
  229. popl %ebx
  230. popl %esi
  231. popl %edi
  232. #if defined(DOUBLE) || defined(XDOUBLE)
  233. ret $0x4
  234. #else
  235. ret
  236. #endif
  237. ALIGN_3
  238. .L88:
  239. #if defined(DOUBLE) || defined(XDOUBLE)
  240. movl RESULT, %eax
  241. #endif
  242. fldz
  243. fldz
  244. #if !defined(DOUBLE) && !defined(XDOUBLE)
  245. xor %eax, %eax
  246. xor %edx, %edx
  247. #else
  248. FST 1 * SIZE(%eax)
  249. FST 0 * SIZE(%eax)
  250. #endif
  251. popl %ebx
  252. popl %esi
  253. popl %edi
  254. #if defined(DOUBLE) || defined(XDOUBLE)
  255. ret $0x4
  256. #else
  257. ret
  258. #endif
  259. EPILOGUE