You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

amax.S 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /*******************************************************************************
  2. Copyright (c) 2015, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. *******************************************************************************/
  27. #define ASSEMBLER
  28. #include "common.h"
  29. #define N x0 /* vector length */
  30. #define X x1 /* X vector address */
  31. #define INC_X x2 /* X stride */
  32. #define I x5 /* loop variable */
  33. /*******************************************************************************
  34. * Macro definitions
  35. *******************************************************************************/
  36. #if defined(USE_MIN)
  37. #define COND le
  38. #else
  39. #define COND ge
  40. #endif
  41. #if !defined(DOUBLE)
  42. #define REG0 wzr
  43. #define MAXF s0
  44. #define TMPF s1
  45. #define TMPVF {v1.s}[0]
  46. #define SZ 4
  47. #else
  48. #define REG0 xzr
  49. #define MAXF d0
  50. #define TMPF d1
  51. #define TMPVF {v1.d}[0]
  52. #define SZ 8
  53. #endif
  54. /******************************************************************************/
  55. .macro INIT_F1
  56. ldr MAXF, [X], #SZ
  57. #if defined(USE_ABS)
  58. fabs MAXF, MAXF
  59. #endif
  60. .endm
  61. .macro KERNEL_F1
  62. ldr TMPF, [X], #SZ
  63. #if defined(USE_ABS)
  64. fabs TMPF, TMPF
  65. #endif
  66. fcmp MAXF, TMPF
  67. fcsel MAXF, MAXF, TMPF, COND
  68. .endm
  69. .macro INIT_F4
  70. #if !defined(DOUBLE)
  71. ld1 {v0.4s}, [X], #16
  72. #if defined(USE_ABS)
  73. fabs v0.4s, v0.4s
  74. #endif
  75. #if defined(USE_MIN)
  76. fminv MAXF, v0.4s
  77. #else
  78. fmaxv MAXF, v0.4s
  79. #endif
  80. #else // DOUBLE
  81. ld2 {v0.2d,v1.2d}, [X], #32
  82. #if defined(USE_ABS)
  83. fabs v0.2d, v0.2d
  84. fabs v1.2d, v1.2d
  85. #endif
  86. #if defined(USE_MIN)
  87. fmin v0.2d, v0.2d, v1.2d
  88. fminp MAXF, v0.2d
  89. #else
  90. fmax v0.2d, v0.2d, v1.2d
  91. fmaxp MAXF, v0.2d
  92. #endif
  93. #endif
  94. .endm
  95. .macro KERNEL_F4
  96. #if !defined(DOUBLE)
  97. ld1 {v1.4s}, [X], #16
  98. #if defined(USE_ABS)
  99. fabs v1.4s, v1.4s
  100. #endif
  101. #if defined(USE_MIN)
  102. fminv TMPF, v1.4s
  103. #else
  104. fmaxv TMPF, v1.4s
  105. #endif
  106. #else // DOUBLE
  107. ld2 {v1.2d,v2.2d}, [X], #32
  108. #if defined(USE_ABS)
  109. fabs v1.2d, v1.2d
  110. fabs v2.2d, v2.2d
  111. #endif
  112. #if defined(USE_MIN)
  113. fmin v1.2d, v1.2d, v2.2d
  114. fminp TMPF, v1.2d
  115. #else
  116. fmax v1.2d, v1.2d, v2.2d
  117. fmaxp TMPF, v1.2d
  118. #endif
  119. #endif
  120. fcmp MAXF, TMPF
  121. fcsel MAXF, MAXF, TMPF, COND
  122. .endm
  123. .macro INIT_S
  124. #if !defined(DOUBLE)
  125. lsl INC_X, INC_X, #2
  126. ld1 {v0.s}[0], [X], INC_X
  127. #else
  128. lsl INC_X, INC_X, #3
  129. ld1 {v0.d}[0], [X], INC_X
  130. #endif
  131. #if defined(USE_ABS)
  132. fabs MAXF, MAXF
  133. #endif
  134. .endm
  135. .macro KERNEL_S1
  136. ld1 TMPVF, [X], INC_X
  137. #if defined(USE_ABS)
  138. fabs TMPF, TMPF
  139. #endif
  140. fcmp MAXF, TMPF
  141. fcsel MAXF, MAXF, TMPF, COND
  142. .endm
  143. /*******************************************************************************
  144. * End of macro definitions
  145. *******************************************************************************/
  146. PROLOGUE
  147. cmp N, xzr
  148. ble .Lamax_kernel_zero
  149. cmp INC_X, xzr
  150. ble .Lamax_kernel_zero
  151. cmp INC_X, #1
  152. bne .Lamax_kernel_S_BEGIN
  153. .Lamax_kernel_F_BEGIN:
  154. asr I, N, #2
  155. cmp I, xzr
  156. beq .Lamax_kernel_F1_INIT
  157. INIT_F4
  158. subs I, I, #1
  159. beq .Lamax_kernel_F1
  160. .Lamax_kernel_F4:
  161. KERNEL_F4
  162. subs I, I, #1
  163. bne .Lamax_kernel_F4
  164. .Lamax_kernel_F1:
  165. ands I, N, #3
  166. ble .Lamax_kernel_L999
  167. .Lamax_kernel_F10:
  168. KERNEL_F1
  169. subs I, I, #1
  170. bne .Lamax_kernel_F10
  171. ret
  172. .Lamax_kernel_F1_INIT:
  173. INIT_F1
  174. subs N, N, #1
  175. b .Lamax_kernel_F1
  176. .Lamax_kernel_S_BEGIN:
  177. INIT_S
  178. subs N, N, #1
  179. ble .Lamax_kernel_L999
  180. asr I, N, #2
  181. cmp I, xzr
  182. ble .Lamax_kernel_S1
  183. .Lamax_kernel_S4:
  184. KERNEL_S1
  185. KERNEL_S1
  186. KERNEL_S1
  187. KERNEL_S1
  188. subs I, I, #1
  189. bne .Lamax_kernel_S4
  190. .Lamax_kernel_S1:
  191. ands I, N, #3
  192. ble .Lamax_kernel_L999
  193. .Lamax_kernel_S10:
  194. KERNEL_S1
  195. subs I, I, #1
  196. bne .Lamax_kernel_S10
  197. .Lamax_kernel_L999:
  198. ret
  199. .Lamax_kernel_zero:
  200. fmov MAXF, REG0
  201. ret
  202. EPILOGUE