You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

copy.S 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #define N %i0
  41. #define X %i1
  42. #define INCX %i2
  43. #define Y %i3
  44. #define INCY %i4
  45. #define I %i5
  46. #ifdef DOUBLE
  47. #define a1 %f0
  48. #define a2 %f2
  49. #define a3 %f4
  50. #define a4 %f6
  51. #define a5 %f8
  52. #define a6 %f10
  53. #define a7 %f12
  54. #define a8 %f14
  55. #define a9 %f16
  56. #define a10 %f18
  57. #define a11 %f20
  58. #define a12 %f22
  59. #define a13 %f24
  60. #define a14 %f26
  61. #define a15 %f28
  62. #define a16 %f30
  63. #else
  64. #define a1 %f0
  65. #define a2 %f1
  66. #define a3 %f2
  67. #define a4 %f3
  68. #define a5 %f4
  69. #define a6 %f5
  70. #define a7 %f6
  71. #define a8 %f7
  72. #define a9 %f8
  73. #define a10 %f9
  74. #define a11 %f10
  75. #define a12 %f11
  76. #define a13 %f12
  77. #define a14 %f13
  78. #define a15 %f14
  79. #define a16 %f15
  80. #endif
  81. PROLOGUE
  82. SAVESP
  83. sll INCX, BASE_SHIFT, INCX
  84. sll INCY, BASE_SHIFT, INCY
  85. cmp INCX, SIZE
  86. bne .LL50
  87. nop
  88. cmp INCY, SIZE
  89. bne .LL50
  90. nop
  91. sra N, 3, I
  92. cmp I, 0
  93. ble,pn %icc, .LL15
  94. nop
  95. #define PREFETCHSIZE 32
  96. .LL11:
  97. LDF [X + 0 * SIZE], a1
  98. prefetch [X + PREFETCHSIZE * SIZE], 0
  99. LDF [X + 1 * SIZE], a2
  100. LDF [X + 2 * SIZE], a3
  101. LDF [X + 3 * SIZE], a4
  102. LDF [X + 4 * SIZE], a5
  103. LDF [X + 5 * SIZE], a6
  104. LDF [X + 6 * SIZE], a7
  105. LDF [X + 7 * SIZE], a8
  106. STF a1, [Y + 0 * SIZE]
  107. prefetch [Y + PREFETCHSIZE * SIZE], 0
  108. STF a2, [Y + 1 * SIZE]
  109. STF a3, [Y + 2 * SIZE]
  110. STF a4, [Y + 3 * SIZE]
  111. STF a5, [Y + 4 * SIZE]
  112. STF a6, [Y + 5 * SIZE]
  113. STF a7, [Y + 6 * SIZE]
  114. STF a8, [Y + 7 * SIZE]
  115. add I, -1, I
  116. cmp I, 0
  117. add Y, 8 * SIZE, Y
  118. add X, 8 * SIZE, X
  119. bg,pt %icc, .LL11
  120. nop
  121. .LL15:
  122. and N, 7, I
  123. cmp I, 0
  124. ble,a,pn %icc, .LL19
  125. nop
  126. .LL16:
  127. LDF [X + 0 * SIZE], a1
  128. add I, -1, I
  129. cmp I, 0
  130. add X, 1 * SIZE, X
  131. STF a1, [Y + 0 * SIZE]
  132. bg,pt %icc, .LL16
  133. add Y, 1 * SIZE, Y
  134. .LL19:
  135. return %i7 + 8
  136. clr %g0
  137. .LL50:
  138. sra N, 3, I
  139. cmp I, 0
  140. ble,pn %icc, .LL55
  141. nop
  142. .LL51:
  143. LDF [X + 0 * SIZE], a1
  144. add X, INCX, X
  145. LDF [X + 0 * SIZE], a2
  146. add X, INCX, X
  147. LDF [X + 0 * SIZE], a3
  148. add X, INCX, X
  149. LDF [X + 0 * SIZE], a4
  150. add X, INCX, X
  151. LDF [X + 0 * SIZE], a5
  152. add X, INCX, X
  153. LDF [X + 0 * SIZE], a6
  154. add X, INCX, X
  155. LDF [X + 0 * SIZE], a7
  156. add X, INCX, X
  157. LDF [X + 0 * SIZE], a8
  158. add X, INCX, X
  159. STF a1, [Y + 0 * SIZE]
  160. add Y, INCY, Y
  161. add I, -1, I
  162. STF a2, [Y + 0 * SIZE]
  163. add Y, INCY, Y
  164. cmp I, 0
  165. STF a3, [Y + 0 * SIZE]
  166. add Y, INCY, Y
  167. STF a4, [Y + 0 * SIZE]
  168. add Y, INCY, Y
  169. STF a5, [Y + 0 * SIZE]
  170. add Y, INCY, Y
  171. STF a6, [Y + 0 * SIZE]
  172. add Y, INCY, Y
  173. STF a7, [Y + 0 * SIZE]
  174. add Y, INCY, Y
  175. STF a8, [Y + 0 * SIZE]
  176. bg,pt %icc, .LL51
  177. add Y, INCY, Y
  178. .LL55:
  179. and N, 7, I
  180. cmp I, 0
  181. ble,a,pn %icc, .LL59
  182. nop
  183. .LL56:
  184. LDF [X + 0 * SIZE], a1
  185. add I, -1, I
  186. cmp I, 0
  187. add X, INCX, X
  188. STF a1, [Y + 0 * SIZE]
  189. bg,pt %icc, .LL56
  190. add Y, INCY, Y
  191. .LL59:
  192. return %i7 + 8
  193. clr %o0
  194. EPILOGUE