You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gemm_beta.S 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. .set noat
  41. .set noreorder
  42. .text
  43. .align 5
  44. .globl CNAME
  45. .ent CNAME
  46. CNAME:
  47. .frame $sp, 0, $26, 0
  48. #ifdef PROFILE
  49. ldgp $gp, 0($27)
  50. lda $28, _mcount
  51. jsr $28, ($28), _mcount
  52. #endif
  53. ldq $18, 16($sp)
  54. ble $16, $End
  55. ldl $19, 24($sp)
  56. ble $17, $End
  57. #ifndef PROFILE
  58. .prologue 0
  59. #else
  60. .prologue 1
  61. #endif
  62. fbeq $f19, $BETA_EQ_ZERO # if (beta == ZERO)
  63. .align 4
  64. $BETA_NE_ZERO:
  65. sra $16, 3, $2 # i = (m >> 3)
  66. mov $18, $1 # c_offset = c
  67. lda $17, -1($17) # j --
  68. ble $2,$L52
  69. .align 4
  70. $L51:
  71. lds $f31, 64($1)
  72. lda $2, -1($2)
  73. LD $f14, 0*SIZE($1)
  74. LD $f15, 1*SIZE($1)
  75. LD $f16, 2*SIZE($1)
  76. LD $f17, 3*SIZE($1)
  77. LD $f18, 4*SIZE($1)
  78. LD $f11, 5*SIZE($1)
  79. LD $f21, 6*SIZE($1)
  80. LD $f22, 7*SIZE($1)
  81. MUL $f19, $f14, $f23
  82. MUL $f19, $f15, $f24
  83. MUL $f19, $f16, $f25
  84. MUL $f19, $f17, $f26
  85. MUL $f19, $f18, $f27
  86. MUL $f19, $f11, $f28
  87. MUL $f19, $f21, $f29
  88. MUL $f19, $f22, $f30
  89. ST $f23, 0*SIZE($1)
  90. ST $f24, 1*SIZE($1)
  91. ST $f25, 2*SIZE($1)
  92. ST $f26, 3*SIZE($1)
  93. ST $f27, 4*SIZE($1)
  94. ST $f28, 5*SIZE($1)
  95. ST $f29, 6*SIZE($1)
  96. ST $f30, 7*SIZE($1)
  97. lda $1,8*SIZE($1)
  98. bgt $2,$L51
  99. .align 4
  100. $L52:
  101. and $16, 7, $2
  102. ble $2,$L54
  103. .align 4
  104. $L53:
  105. LD $f12, 0($1)
  106. lda $2, -1($2)
  107. MUL $f19, $f12, $f23
  108. ST $f23, 0($1)
  109. lda $1, SIZE($1)
  110. bgt $2,$L53
  111. .align 4
  112. $L54:
  113. SXADDQ $19, $18, $18 # c += ldc
  114. bgt $17,$BETA_NE_ZERO
  115. clr $0
  116. ret
  117. .align 4
  118. $BETA_EQ_ZERO:
  119. sra $16, 3, $2 # i = (m >> 3)
  120. lda $4, 8*SIZE($18)
  121. mov $18, $1 # c_offset = c
  122. lda $17, -1($17) # j --
  123. ble $2,$L42
  124. .align 4
  125. $L41:
  126. ST $f31, 0*SIZE($1)
  127. ST $f31, 1*SIZE($1)
  128. ST $f31, 2*SIZE($1)
  129. ST $f31, 3*SIZE($1)
  130. ST $f31, 4*SIZE($1)
  131. ST $f31, 5*SIZE($1)
  132. ST $f31, 6*SIZE($1)
  133. ST $f31, 7*SIZE($1)
  134. lda $2, -1($2)
  135. lda $4, 8*SIZE($4)
  136. lda $1, 8*SIZE($1)
  137. bgt $2,$L41
  138. .align 4
  139. $L42:
  140. and $16, 7, $2
  141. ble $2,$L44
  142. .align 4
  143. $L43:
  144. lda $2, -1($2)
  145. ST $f31, 0($1)
  146. lda $1, SIZE($1)
  147. bgt $2, $L43
  148. .align 4
  149. $L44:
  150. SXADDQ $19, $18, $18 # c += ldc
  151. bgt $17,$BETA_EQ_ZERO
  152. clr $0
  153. .align 4
  154. $End:
  155. ret
  156. .ident VERSION
  157. .end CNAME