Browse Source

Merge branch 'jeromerobert-bug736' into develop

tags/v0.2.16.rc1
Zhang Xianyi 10 years ago
parent
commit
96b486acee
33 changed files with 243 additions and 284 deletions
  1. +2
    -2
      common_stackalloc.h
  2. +4
    -4
      kernel/arm/asum_vfp.S
  3. +4
    -4
      kernel/arm/cdot_vfp.S
  4. +4
    -4
      kernel/arm/cgemm_kernel_2x2_vfp.S
  5. +4
    -6
      kernel/arm/cgemm_kernel_2x2_vfpv3.S
  6. +4
    -4
      kernel/arm/cgemv_n_vfp.S
  7. +12
    -12
      kernel/arm/cgemv_t_vfp.S
  8. +22
    -22
      kernel/arm/ctrmm_kernel_2x2_vfp.S
  9. +4
    -6
      kernel/arm/ctrmm_kernel_2x2_vfpv3.S
  10. +2
    -2
      kernel/arm/ddot_vfp.S
  11. +6
    -6
      kernel/arm/dgemm_kernel_4x2_vfp.S
  12. +9
    -9
      kernel/arm/dgemm_kernel_4x4_vfpv3.S
  13. +6
    -6
      kernel/arm/dtrmm_kernel_4x2_vfp.S
  14. +9
    -9
      kernel/arm/dtrmm_kernel_4x4_vfpv3.S
  15. +8
    -8
      kernel/arm/gemv_n_vfp.S
  16. +8
    -8
      kernel/arm/gemv_n_vfpv3.S
  17. +12
    -12
      kernel/arm/gemv_t_vfp.S
  18. +12
    -12
      kernel/arm/gemv_t_vfpv3.S
  19. +2
    -2
      kernel/arm/iamax_vfp.S
  20. +4
    -4
      kernel/arm/nrm2_vfp.S
  21. +2
    -7
      kernel/arm/nrm2_vfpv3.S
  22. +4
    -4
      kernel/arm/sdot_vfp.S
  23. +6
    -6
      kernel/arm/sgemm_kernel_4x2_vfp.S
  24. +17
    -32
      kernel/arm/sgemm_kernel_4x4_vfpv3.S
  25. +6
    -6
      kernel/arm/strmm_kernel_4x2_vfp.S
  26. +16
    -33
      kernel/arm/strmm_kernel_4x4_vfpv3.S
  27. +4
    -4
      kernel/arm/zdot_vfp.S
  28. +4
    -4
      kernel/arm/zgemm_kernel_2x2_vfp.S
  29. +4
    -4
      kernel/arm/zgemm_kernel_2x2_vfpv3.S
  30. +4
    -4
      kernel/arm/zgemv_n_vfp.S
  31. +12
    -12
      kernel/arm/zgemv_t_vfp.S
  32. +22
    -22
      kernel/arm/ztrmm_kernel_2x2_vfp.S
  33. +4
    -4
      kernel/arm/ztrmm_kernel_2x2_vfpv3.S

+ 2
- 2
common_stackalloc.h View File

@@ -29,8 +29,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef STACK_ALLOC_PROTECT #ifdef STACK_ALLOC_PROTECT
// Try to detect stack smashing // Try to detect stack smashing
#include <assert.h> #include <assert.h>
#define STACK_ALLOC_PROTECT_SET volatile BLASLONG stack_check = 0x7ff8010203040506;
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7ff8010203040506);
#define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234;
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234);
#else #else
#define STACK_ALLOC_PROTECT_SET #define STACK_ALLOC_PROTECT_SET
#define STACK_ALLOC_PROTECT_CHECK #define STACK_ALLOC_PROTECT_CHECK


+ 4
- 4
kernel/arm/asum_vfp.S View File

@@ -368,11 +368,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 5 .align 5


#if defined(DOUBLE) #if defined(DOUBLE)
vldr.f64 d0 , =0
vldr.f64 d1 , =0
vsub.f64 d0 , d0 , d0
vsub.f64 d1 , d1 , d1
#else #else
vldr.f32 s0 , =0
vldr.f32 s1 , =0
vsub.f32 s0 , s0 , s0
vsub.f32 s1 , s1 , s1
#endif #endif


cmp N, #0 cmp N, #0


+ 4
- 4
kernel/arm/cdot_vfp.S View File

@@ -188,10 +188,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
mov Y, OLD_Y mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y ldr INC_Y, OLD_INC_Y


vldr.f32 s0 , =0
vldr.f32 s1 , =0
vldr.f32 s2 , =0
vldr.f32 s3 , =0
vsub.f32 s0 , s0 , s0
vsub.f32 s1 , s1 , s1
vsub.f32 s2 , s2 , s2
vsub.f32 s3 , s3 , s3


cmp N, #0 cmp N, #0
ble cdot_kernel_L999 ble cdot_kernel_L999


+ 4
- 4
kernel/arm/cgemm_kernel_2x2_vfp.S View File

@@ -138,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -340,7 +340,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s12, s8 vmov.f32 s12, s8
vmov.f32 s13, s8 vmov.f32 s13, s8
@@ -514,7 +514,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -681,7 +681,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8


.endm .endm


+ 4
- 6
kernel/arm/cgemm_kernel_2x2_vfpv3.S View File

@@ -147,6 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -367,6 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s21, s16 vmov.f32 s21, s16
@@ -548,6 +550,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -727,6 +730,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s24, s16 vmov.f32 s24, s16
vmov.f32 s25, s16 vmov.f32 s25, s16
@@ -1004,12 +1008,9 @@ cgemm_kernel_L2_M2_32:


b cgemm_kernel_L2_M2_44 b cgemm_kernel_L2_M2_44


cgemm_f32zero:
.word 0x00000000


cgemm_kernel_L2_M2_40: cgemm_kernel_L2_M2_40:


vldr.f32 s16 , cgemm_f32zero
INIT2x2 INIT2x2




@@ -1043,7 +1044,6 @@ cgemm_kernel_L2_M1_BEGIN:


cgemm_kernel_L2_M1_20: cgemm_kernel_L2_M1_20:


vldr.f32 s16 , cgemm_f32zero
INIT1x2 INIT1x2


mov BO, BC mov BO, BC
@@ -1219,7 +1219,6 @@ cgemm_kernel_L1_M2_32:


cgemm_kernel_L1_M2_40: cgemm_kernel_L1_M2_40:


vldr.f32 s16 , =0
INIT2x1 INIT2x1




@@ -1253,7 +1252,6 @@ cgemm_kernel_L1_M1_BEGIN:


cgemm_kernel_L1_M1_20: cgemm_kernel_L1_M1_20:


vldr.f32 s16 , =0
INIT1x1 INIT1x1


mov BO, BC mov BO, BC


+ 4
- 4
kernel/arm/cgemv_n_vfp.S View File

@@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_F4 .macro INIT_F4


pld [ YO, #Y_PRE ] pld [ YO, #Y_PRE ]
vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8


.endm .endm
@@ -267,7 +267,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S4 .macro INIT_S4


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8


.endm .endm


+ 12
- 12
kernel/arm/cgemv_t_vfp.S View File

@@ -116,10 +116,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F2 .macro INIT_F2


vldr.f32 s12, =0
vldr.f32 s13, =0
vldr.f32 s14, =0
vldr.f32 s15, =0
vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13
vsub.f32 s14, s14, s14
vsub.f32 s15, s15, s15


.endm .endm


@@ -172,8 +172,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f32 s12, =0
vldr.f32 s13, =0
vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13


.endm .endm


@@ -215,10 +215,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S2 .macro INIT_S2


vldr.f32 s12, =0
vldr.f32 s13, =0
vldr.f32 s14, =0
vldr.f32 s15, =0
vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13
vsub.f32 s14, s14, s14
vsub.f32 s15, s15, s15


.endm .endm


@@ -281,8 +281,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f32 s12, =0
vldr.f32 s13, =0
vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13


.endm .endm




+ 22
- 22
kernel/arm/ctrmm_kernel_2x2_vfp.S View File

@@ -136,7 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -301,10 +301,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


vldr.f32 s4, =0
vldr.f32 s5, =0
vldr.f32 s6, =0
vldr.f32 s7, =0
vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
@@ -318,10 +318,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


fstmias CO1, { s4 - s7 } fstmias CO1, { s4 - s7 }


vldr.f32 s4, =0
vldr.f32 s5, =0
vldr.f32 s6, =0
vldr.f32 s7, =0
vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
@@ -343,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s12, s8 vmov.f32 s12, s8
vmov.f32 s13, s8 vmov.f32 s13, s8
@@ -490,8 +490,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


vldr.f32 s4, =0
vldr.f32 s5, =0
vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
@@ -500,8 +500,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


fstmias CO1, { s4 - s5 } fstmias CO1, { s4 - s5 }


vldr.f32 s4, =0
vldr.f32 s5, =0
vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5


FMAC_R1 s4 , s0 , s12 FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13 FMAC_I1 s5 , s0 , s13
@@ -519,7 +519,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -663,10 +663,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


vldr.f32 s4, =0
vldr.f32 s5, =0
vldr.f32 s6, =0
vldr.f32 s7, =0
vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9
@@ -689,7 +689,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8


.endm .endm
@@ -795,8 +795,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R flds s0, ALPHA_R
flds s1, ALPHA_I flds s1, ALPHA_I


vldr.f32 s4, =0
vldr.f32 s5, =0
vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5


FMAC_R1 s4 , s0 , s8 FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9 FMAC_I1 s5 , s0 , s9


+ 4
- 6
kernel/arm/ctrmm_kernel_2x2_vfpv3.S View File

@@ -134,6 +134,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -350,6 +351,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s21, s16 vmov.f32 s21, s16
@@ -527,6 +529,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -703,6 +706,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s24, s16 vmov.f32 s24, s16
vmov.f32 s25, s16 vmov.f32 s25, s16
@@ -1016,12 +1020,9 @@ _L2_M2_32:


b _L2_M2_44 b _L2_M2_44


ctrmm_f32zero:
.word 0x00000000


_L2_M2_40: _L2_M2_40:


vldr.f32 s16 , ctrmm_f32zero
INIT2x2 INIT2x2




@@ -1073,7 +1074,6 @@ _L2_M1_BEGIN:


_L2_M1_20: _L2_M1_20:


vldr.f32 s16 , ctrmm_f32zero
INIT1x2 INIT1x2


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1337,7 +1337,6 @@ _L1_M2_32:


_L1_M2_40: _L1_M2_40:


vldr.f32 s16 , =0
INIT2x1 INIT2x1




@@ -1390,7 +1389,6 @@ _L1_M1_BEGIN:


_L1_M1_20: _L1_M1_20:


vldr.f32 s16 , =0
INIT1x1 INIT1x1


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \


+ 2
- 2
kernel/arm/ddot_vfp.S View File

@@ -152,8 +152,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
mov Y, OLD_Y mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y ldr INC_Y, OLD_INC_Y


vldr.f64 d0 , =0
vldr.f64 d1 , =0
vsub.f64 d0 , d0 , d0
vsub.f64 d1 , d1 , d1


cmp N, #0 cmp N, #0
ble ddot_kernel_L999 ble ddot_kernel_L999


+ 6
- 6
kernel/arm/dgemm_kernel_4x2_vfp.S View File

@@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9, d8 vmov.f64 d9, d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -173,7 +173,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9, d8 vmov.f64 d9, d8
vmov.f64 d12, d8 vmov.f64 d12, d8
vmov.f64 d13, d8 vmov.f64 d13, d8
@@ -233,7 +233,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d12, d8 vmov.f64 d12, d8


.endm .endm
@@ -283,7 +283,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9, d8 vmov.f64 d9, d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -338,7 +338,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8


.endm .endm
@@ -380,7 +380,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8


.endm .endm




+ 9
- 9
kernel/arm/dgemm_kernel_4x4_vfpv3.S View File

@@ -102,7 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x4 .macro INIT4x4


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -376,7 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x4 .macro INIT2x4


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d21, d16 vmov.f64 d21, d16
@@ -470,7 +470,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x4 .macro INIT1x4


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d24, d16 vmov.f64 d24, d16
vmov.f64 d28, d16 vmov.f64 d28, d16
@@ -533,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -617,7 +617,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d21, d16 vmov.f64 d21, d16
@@ -678,7 +678,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d20, d16 vmov.f64 d20, d16


.endm .endm
@@ -723,7 +723,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -782,7 +782,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16


.endm .endm
@@ -826,7 +826,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16


.endm .endm




+ 6
- 6
kernel/arm/dtrmm_kernel_4x2_vfp.S View File

@@ -90,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9, d8 vmov.f64 d9, d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -165,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9, d8 vmov.f64 d9, d8
vmov.f64 d12, d8 vmov.f64 d12, d8
vmov.f64 d13, d8 vmov.f64 d13, d8
@@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d12, d8 vmov.f64 d12, d8


.endm .endm
@@ -268,7 +268,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9, d8 vmov.f64 d9, d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -318,7 +318,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8


.endm .endm
@@ -357,7 +357,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8


.endm .endm




+ 9
- 9
kernel/arm/dtrmm_kernel_4x4_vfpv3.S View File

@@ -89,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x4 .macro INIT4x4


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x4 .macro INIT2x4


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d21, d16 vmov.f64 d21, d16
@@ -468,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x4 .macro INIT1x4


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d24, d16 vmov.f64 d24, d16
vmov.f64 d28, d16 vmov.f64 d28, d16
@@ -527,7 +527,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -601,7 +601,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d21, d16 vmov.f64 d21, d16
@@ -656,7 +656,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d20, d16 vmov.f64 d20, d16


.endm .endm
@@ -699,7 +699,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -753,7 +753,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16


.endm .endm
@@ -794,7 +794,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16


.endm .endm




+ 8
- 8
kernel/arm/gemv_n_vfp.S View File

@@ -79,7 +79,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE ]
pld [ YO , #Y_PRE+32 ] pld [ YO , #Y_PRE+32 ]


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d10 , d8 vmov.f64 d10 , d8
vmov.f64 d11 , d8 vmov.f64 d11 , d8
@@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f64 d12 , =0
vsub.f64 d12 , d12 , d12


.endm .endm


@@ -185,7 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S4 .macro INIT_S4


vldr.f64 d12 , =0
vsub.f64 d12 , d12 , d12
vmov.f64 d13 , d12 vmov.f64 d13 , d12
vmov.f64 d14 , d12 vmov.f64 d14 , d12
vmov.f64 d15 , d12 vmov.f64 d15 , d12
@@ -245,7 +245,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f64 d12 , =0
vsub.f64 d12 , d12 , d12


.endm .endm


@@ -279,7 +279,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE ]


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8
vmov.f32 s10 , s8 vmov.f32 s10 , s8
vmov.f32 s11 , s8 vmov.f32 s11 , s8
@@ -357,7 +357,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f32 s12 , =0
vsub.f32 s12 , s12 , s12


.endm .endm


@@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S4 .macro INIT_S4


vldr.f32 s12 , =0
vsub.f32 s12 , s12 , s12
vmov.f32 s13 , s12 vmov.f32 s13 , s12
vmov.f32 s14 , s12 vmov.f32 s14 , s12
vmov.f32 s15 , s12 vmov.f32 s15 , s12
@@ -445,7 +445,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f32 s12 , =0
vsub.f32 s12 , s12 , s12


.endm .endm




+ 8
- 8
kernel/arm/gemv_n_vfpv3.S View File

@@ -79,7 +79,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE ]
pld [ YO , #Y_PRE+32 ] pld [ YO , #Y_PRE+32 ]


vldr.f64 d24 , =0
vsub.f64 d24 , d24 , d24
vmov.f64 d25 , d24 vmov.f64 d25 , d24
vmov.f64 d26 , d24 vmov.f64 d26 , d24
vmov.f64 d27 , d24 vmov.f64 d27 , d24
@@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f64 d24 , =0
vsub.f64 d24 , d24 , d24


.endm .endm


@@ -175,7 +175,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S8 .macro INIT_S8


vldr.f64 d24 , =0
vsub.f64 d24 , d24 , d24
vmov.f64 d25 , d24 vmov.f64 d25 , d24
vmov.f64 d26 , d24 vmov.f64 d26 , d24
vmov.f64 d27 , d24 vmov.f64 d27 , d24
@@ -269,7 +269,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f64 d24 , =0
vsub.f64 d24 , d24 , d24


.endm .endm


@@ -302,7 +302,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE ]


vldr.f32 s24 , =0
vsub.f32 s24 , s24 , s24
vmov.f32 s25 , s24 vmov.f32 s25 , s24
vmov.f32 s26 , s24 vmov.f32 s26 , s24
vmov.f32 s27 , s24 vmov.f32 s27 , s24
@@ -368,7 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f32 s24 , =0
vsub.f32 s24 , s24 , s24


.endm .endm


@@ -396,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S8 .macro INIT_S8


vldr.f32 s24 , =0
vsub.f32 s24 , s24 , s24
vmov.f32 s25 , s24 vmov.f32 s25 , s24
vmov.f32 s26 , s24 vmov.f32 s26 , s24
vmov.f32 s27 , s24 vmov.f32 s27 , s24
@@ -489,7 +489,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f32 s24 , =0
vsub.f32 s24 , s24 , s24


.endm .endm




+ 12
- 12
kernel/arm/gemv_t_vfp.S View File

@@ -75,8 +75,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F2 .macro INIT_F2


vldr.f64 d2 , =0
vldr.f64 d3 , =0
vsub.f64 d2 , d2 , d2
vsub.f64 d3 , d3 , d3


.endm .endm


@@ -123,7 +123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f64 d2 , =0
vsub.f64 d2 , d2 , d2


.endm .endm


@@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S2 .macro INIT_S2


vldr.f64 d2 , =0
vldr.f64 d3 , =0
vsub.f64 d2 , d2 , d2
vsub.f64 d3 , d3 , d3


.endm .endm


@@ -224,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f64 d2 , =0
vsub.f64 d2 , d2 , d2


.endm .endm


@@ -276,8 +276,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F2 .macro INIT_F2


vldr.f32 s2 , =0
vldr.f32 s3 , =0
vsub.f32 s2 , s2 , s2
vsub.f32 s3 , s3 , s3


.endm .endm


@@ -321,7 +321,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f32 s2 , =0
vsub.f32 s2 , s2 , s2


.endm .endm


@@ -356,8 +356,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S2 .macro INIT_S2


vldr.f32 s2 , =0
vldr.f32 s3 , =0
vsub.f32 s2 , s2 , s2
vsub.f32 s3 , s3 , s3


.endm .endm


@@ -418,7 +418,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f32 s2 , =0
vsub.f32 s2 , s2 , s2


.endm .endm




+ 12
- 12
kernel/arm/gemv_t_vfpv3.S View File

@@ -75,8 +75,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F2 .macro INIT_F2


vldr.f64 d4 , =0
vldr.f64 d5 , =0
vsub.f64 d4 , d4 , d4
vsub.f64 d5 , d5 , d5


.endm .endm


@@ -123,8 +123,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S2 .macro INIT_S2


vldr.f64 d4 , =0
vldr.f64 d5 , =0
vsub.f64 d4 , d4 , d4
vsub.f64 d5 , d5 , d5


.endm .endm


@@ -183,7 +183,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f64 d4 , =0
vsub.f64 d4 , d4 , d4


.endm .endm


@@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f64 d4 , =0
vsub.f64 d4 , d4 , d4


.endm .endm


@@ -268,8 +268,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F2 .macro INIT_F2


vldr.f32 s4 , =0
vldr.f32 s5 , =0
vsub.f32 s4 , s4 , s4
vsub.f32 s5 , s5 , s5


.endm .endm


@@ -313,8 +313,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S2 .macro INIT_S2


vldr.f32 s4 , =0
vldr.f32 s5 , =0
vsub.f32 s4 , s4 , s4
vsub.f32 s5 , s5 , s5


.endm .endm


@@ -371,7 +371,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f32 s4 , =0
vsub.f32 s4 , s4 , s4


.endm .endm


@@ -406,7 +406,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f32 s4 , =0
vsub.f32 s4 , s4 , s4


.endm .endm




+ 2
- 2
kernel/arm/iamax_vfp.S View File

@@ -342,9 +342,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
push {r4} push {r4}


#if defined(DOUBLE) #if defined(DOUBLE)
vldr.f64 d0 , =0
vsub.f64 d0 , d0 , d0
#else #else
vldr.f32 s0 , =0
vsub.f32 s0 , s0 , s0
#endif #endif
mov INDEX, #0 mov INDEX, #0




+ 4
- 4
kernel/arm/nrm2_vfp.S View File

@@ -446,12 +446,12 @@ nrm2_begin:
#if defined(COMPLEX) #if defined(COMPLEX)


#if defined(DOUBLE) #if defined(DOUBLE)
vldr.f64 d0 , =0 // scale=0.0
vsub.f64 d0 , d0 , d0 // scale=0.0
vldr.64 d1 , znrm2_one // ssq=1.0 vldr.64 d1 , znrm2_one // ssq=1.0
vmov.f64 d7 , d1 // value 1.0 vmov.f64 d7 , d1 // value 1.0
vmov.f64 d6 , d0 // value 0.0 vmov.f64 d6 , d0 // value 0.0
#else #else
vldr.f32 s0 , =0 // scale=0.0
vsub.f32 s0 , s0 , s0 // scale=0.0
vldr.32 s1 , cnrm2_one // ssq=1.0 vldr.32 s1 , cnrm2_one // ssq=1.0
vmov.f32 s7 , s1 // value 1.0 vmov.f32 s7 , s1 // value 1.0
vmov.f32 s6 , s0 // value 0.0 vmov.f32 s6 , s0 // value 0.0
@@ -460,12 +460,12 @@ nrm2_begin:
#else #else


#if defined(DOUBLE) #if defined(DOUBLE)
vldr.f64 d0 , =0 // scale=0.0
vsub.f64 d0 , d0 , d0 // scale=0.0
vldr.64 d1 , dnrm2_one // ssq=1.0 vldr.64 d1 , dnrm2_one // ssq=1.0
vmov.f64 d7 , d1 // value 1.0 vmov.f64 d7 , d1 // value 1.0
vmov.f64 d6 , d0 // value 0.0 vmov.f64 d6 , d0 // value 0.0
#else #else
vldr.f32 s0 , =0 // scale=0.0
vsub.f32 s0 , s0 , s0 // scale=0.0
vldr.32 s1 , snrm2_one // ssq=1.0 vldr.32 s1 , snrm2_one // ssq=1.0
vmov.f32 s7 , s1 // value 1.0 vmov.f32 s7 , s1 // value 1.0
vmov.f32 s6 , s0 // value 0.0 vmov.f32 s6 , s0 // value 0.0


+ 2
- 7
kernel/arm/nrm2_vfpv3.S View File

@@ -400,22 +400,17 @@ KERNEL_S1_END_\@:
* End of macro definitions * End of macro definitions
**************************************************************************************/ **************************************************************************************/


nrm2_zeros:
.align 5
.word 0x00000000
.word 0x00000000

PROLOGUE PROLOGUE


.align 5 .align 5


#if defined(DOUBLE) #if defined(DOUBLE)
vldr.f64 d0 , nrm2_zeros // scale=0.0
vsub.f64 d0 , d0 , d0 // scale=0.0
vmov.f64 d1 , #1.0 // ssq=1.0 vmov.f64 d1 , #1.0 // ssq=1.0
vmov.f64 d7 , d1 // value 1.0 vmov.f64 d7 , d1 // value 1.0
vmov.f64 d6 , d0 // value 0.0 vmov.f64 d6 , d0 // value 0.0
#else #else
vldr.f32 s0 , nrm2_zeros // scale=0.0
vsub.f32 s0 , s0 , s0 // scale=0.0
vmov.f32 s1 , #1.0 // ssq=1.0 vmov.f32 s1 , #1.0 // ssq=1.0
vmov.f32 s7 , s1 // value 1.0 vmov.f32 s7 , s1 // value 1.0
vmov.f32 s6 , s0 // value 0.0 vmov.f32 s6 , s0 // value 0.0


+ 4
- 4
kernel/arm/sdot_vfp.S View File

@@ -242,13 +242,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(DSDOT) #if defined(DSDOT)


vldr.f64 d0 , =0
vldr.f64 d1 , =0
vsub.f64 d0 , d0 , d0
vsub.f64 d1 , d1 , d1


#else #else


vldr.f32 s0 , =0
vldr.f32 s1 , =0
vsub.f32 s0 , s0 , s0
vsub.f32 s1 , s1 , s1


#endif #endif




+ 6
- 6
kernel/arm/sgemm_kernel_4x2_vfp.S View File

@@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9, s8 vmov.f32 s9, s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9, s8 vmov.f32 s9, s8
vmov.f32 s12, s8 vmov.f32 s12, s8
vmov.f32 s13, s8 vmov.f32 s13, s8
@@ -221,7 +221,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s12, s8 vmov.f32 s12, s8


.endm .endm
@@ -271,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9, s8 vmov.f32 s9, s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -326,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8


.endm .endm
@@ -368,7 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8


.endm .endm




+ 17
- 32
kernel/arm/sgemm_kernel_4x4_vfpv3.S View File

@@ -102,6 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x4 .macro INIT4x4


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -348,6 +349,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x4 .macro INIT2x4


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s21, s16 vmov.f32 s21, s16
@@ -441,6 +443,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x4 .macro INIT1x4


vsub.f32 s16 , s16 , s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s24, s16 vmov.f32 s24, s16
vmov.f32 s28, s16 vmov.f32 s28, s16
@@ -503,6 +506,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -586,6 +590,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s21, s16 vmov.f32 s21, s16
@@ -646,6 +651,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vsub.f32 s16 , s16 , s16
vmov.f32 s20, s16 vmov.f32 s20, s16


.endm .endm
@@ -690,6 +696,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -748,6 +755,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16


.endm .endm
@@ -789,6 +797,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/******************************************************************************/ /******************************************************************************/


.macro INIT1x1

vsub.f32 s16 , s16 , s16

.endm




.macro KERNEL1x1_SUB .macro KERNEL1x1_SUB


@@ -915,24 +930,9 @@ sgemm_kernel_L4_M4_32:


b sgemm_kernel_L4_M4_44 b sgemm_kernel_L4_M4_44


/* Note on loading zero into a fp register
* vsub doesn't work since it cannot handle NaN and infinity
* vmov.Fn doesn't work with 0
* vmov.In and veor are neon
* vldr , =0 doesn't work since the function is larger than 2KB
* and the assembler is not able to insert constant pool inside
* the function body.
*
* Therefore, the best way I've found is to manually create multiple
* copies of the zero constant and `vldr` from different ones depending
* on where the instruction is.
*/
sgemm_f32zero:
.word 0x00000000


sgemm_kernel_L4_M4_40: sgemm_kernel_L4_M4_40:


vldr.f32 s16 , sgemm_f32zero
INIT4x4 INIT4x4




@@ -969,7 +969,6 @@ sgemm_kernel_L4_M2_BEGIN:


sgemm_kernel_L4_M2_20: sgemm_kernel_L4_M2_20:


vldr.f32 s16 , sgemm_f32zero
INIT2x4 INIT2x4


mov BO, BC mov BO, BC
@@ -1015,14 +1014,9 @@ sgemm_kernel_L4_M1_BEGIN:


tst I, #1 // I = I % 2 tst I, #1 // I = I % 2
ble sgemm_kernel_L4_END ble sgemm_kernel_L4_END
b sgemm_kernel_L4_M1_20

sgemm_f32zero4:
.word 0x00000000


sgemm_kernel_L4_M1_20: sgemm_kernel_L4_M1_20:


vldr.f32 s16 , sgemm_f32zero4
INIT1x4 INIT1x4


mov BO, BC mov BO, BC
@@ -1106,7 +1100,6 @@ sgemm_kernel_L2_M4_BEGIN:


sgemm_kernel_L2_M4_20: sgemm_kernel_L2_M4_20:


vldr.f32 s16 , sgemm_f32zero3
INIT4x2 INIT4x2


mov BO, BC mov BO, BC
@@ -1128,6 +1121,7 @@ sgemm_kernel_L2_M4_22:
subs L, L, #1 subs L, L, #1
bgt sgemm_kernel_L2_M4_22 bgt sgemm_kernel_L2_M4_22



sgemm_kernel_L2_M4_40: sgemm_kernel_L2_M4_40:


ands L , K1, #7 // L = L % 8 ands L , K1, #7 // L = L % 8
@@ -1148,10 +1142,7 @@ sgemm_kernel_L2_M4_END:


subs I, I, #1 subs I, I, #1
bgt sgemm_kernel_L2_M4_20 bgt sgemm_kernel_L2_M4_20
b sgemm_kernel_L2_M2_BEGIN


sgemm_f32zero3:
.word 0x00000000


sgemm_kernel_L2_M2_BEGIN: sgemm_kernel_L2_M2_BEGIN:


@@ -1164,7 +1155,6 @@ sgemm_kernel_L2_M2_BEGIN:


sgemm_kernel_L2_M2_20: sgemm_kernel_L2_M2_20:


vldr.f32 s16 , sgemm_f32zero3
INIT2x2 INIT2x2


mov BO, BC mov BO, BC
@@ -1213,7 +1203,6 @@ sgemm_kernel_L2_M1_BEGIN:


sgemm_kernel_L2_M1_20: sgemm_kernel_L2_M1_20:


vldr.f32 s16 , sgemm_f32zero3
INIT1x2 INIT1x2


mov BO, BC mov BO, BC
@@ -1289,7 +1278,6 @@ sgemm_kernel_L1_M4_BEGIN:


sgemm_kernel_L1_M4_20: sgemm_kernel_L1_M4_20:


vldr.f32 s16 , sgemm_f32zero3
INIT4x1 INIT4x1


mov BO, BC mov BO, BC
@@ -1345,7 +1333,6 @@ sgemm_kernel_L1_M2_BEGIN:


sgemm_kernel_L1_M2_20: sgemm_kernel_L1_M2_20:


vldr.f32 s16 , sgemm_f32zero2
INIT2x1 INIT2x1


mov BO, BC mov BO, BC
@@ -1394,7 +1381,7 @@ sgemm_kernel_L1_M1_BEGIN:


sgemm_kernel_L1_M1_20: sgemm_kernel_L1_M1_20:


vldr.f32 s16 , sgemm_f32zero2
INIT1x1


mov BO, BC mov BO, BC
asrs L , K1, #3 // L = L / 8 asrs L , K1, #3 // L = L / 8
@@ -1447,5 +1434,3 @@ sgemm_kernel_L999:


EPILOGUE EPILOGUE


sgemm_f32zero2:
.word 0x00000000

+ 6
- 6
kernel/arm/strmm_kernel_4x2_vfp.S View File

@@ -90,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9, s8 vmov.f32 s9, s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -156,7 +156,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9, s8 vmov.f32 s9, s8
vmov.f32 s12, s8 vmov.f32 s12, s8
vmov.f32 s13, s8 vmov.f32 s13, s8
@@ -211,7 +211,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s12, s8 vmov.f32 s12, s8


.endm .endm
@@ -259,7 +259,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9, s8 vmov.f32 s9, s8
vmov.f32 s10, s8 vmov.f32 s10, s8
vmov.f32 s11, s8 vmov.f32 s11, s8
@@ -309,7 +309,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8
vmov.f32 s9 , s8 vmov.f32 s9 , s8


.endm .endm
@@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f32 s8 , =0
vsub.f32 s8 , s8 , s8


.endm .endm




+ 16
- 33
kernel/arm/strmm_kernel_4x4_vfpv3.S View File

@@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x4 .macro INIT4x4


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -321,6 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x4 .macro INIT2x4


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s21, s16 vmov.f32 s21, s16
@@ -403,6 +405,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x4 .macro INIT1x4


vsub.f32 s16 , s16 , s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s24, s16 vmov.f32 s24, s16
vmov.f32 s28, s16 vmov.f32 s28, s16
@@ -461,6 +464,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x2 .macro INIT4x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -534,6 +538,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s20, s16 vmov.f32 s20, s16
vmov.f32 s21, s16 vmov.f32 s21, s16
@@ -588,6 +593,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vsub.f32 s16 , s16 , s16
vmov.f32 s20, s16 vmov.f32 s20, s16


.endm .endm
@@ -630,6 +636,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT4x1 .macro INIT4x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16
vmov.f32 s18, s16 vmov.f32 s18, s16
vmov.f32 s19, s16 vmov.f32 s19, s16
@@ -683,6 +690,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16 vmov.f32 s17, s16


.endm .endm
@@ -721,6 +729,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/******************************************************************************/ /******************************************************************************/


.macro INIT1x1

vsub.f32 s16 , s16 , s16

.endm




.macro KERNEL1x1_SUB .macro KERNEL1x1_SUB


@@ -940,24 +955,9 @@ _L4_M4_32:


b _L4_M4_44 b _L4_M4_44


/* Note on loading zero into a fp register
* vsub doesn't work since it cannot handle NaN and infinity
* vmov.Fn doesn't work with 0
* vmov.In and veor are neon
* vldr , =0 doesn't work since the function is larger than 2KB
* and the assembler is not able to insert constant pool inside
* the function body.
*
* Therefore, the best way I've found is to manually create multiple
* copies of the zero constant and `vldr` from different ones depending
* on where the instruction is.
*/
strmm_f32zero:
.word 0x00000000


_L4_M4_40: _L4_M4_40:


vldr.f32 s16 , strmm_f32zero
INIT4x4 INIT4x4




@@ -1014,7 +1014,6 @@ _L4_M2_BEGIN:


_L4_M2_20: _L4_M2_20:


vldr.f32 s16 , strmm_f32zero
INIT2x4 INIT2x4


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1113,7 +1112,6 @@ _L4_M1_BEGIN:


_L4_M1_20: _L4_M1_20:


vldr.f32 s16 , strmm_f32zero3
INIT1x4 INIT1x4


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1254,14 +1252,9 @@ _L2_M4_BEGIN:
ldr I, M ldr I, M
asrs I, I, #2 // I = I / 4 asrs I, I, #2 // I = I / 4
ble _L2_M2_BEGIN ble _L2_M2_BEGIN
b _L2_M4_20

strmm_f32zero3:
.word 0x00000000


_L2_M4_20: _L2_M4_20:


vldr.f32 s16 , strmm_f32zero3
INIT4x2 INIT4x2


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1367,7 +1360,6 @@ _L2_M2_BEGIN:


_L2_M2_20: _L2_M2_20:


vldr.f32 s16 , strmm_f32zero3
INIT2x2 INIT2x2


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1466,7 +1458,6 @@ _L2_M1_BEGIN:


_L2_M1_20: _L2_M1_20:


vldr.f32 s16 , strmm_f32zero4
INIT1x2 INIT1x2


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1600,14 +1591,9 @@ _L1_M4_BEGIN:
ldr I, M ldr I, M
asrs I, I, #2 // I = I / 4 asrs I, I, #2 // I = I / 4
ble _L1_M2_BEGIN ble _L1_M2_BEGIN
b _L1_M4_20

strmm_f32zero4:
.word 0x00000000


_L1_M4_20: _L1_M4_20:


vldr.f32 s16 , strmm_f32zero4
INIT4x1 INIT4x1


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1714,7 +1700,6 @@ _L1_M2_BEGIN:


_L1_M2_20: _L1_M2_20:


vldr.f32 s16 , strmm_f32zero2
INIT2x1 INIT2x1


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
@@ -1813,7 +1798,7 @@ _L1_M1_BEGIN:


_L1_M1_20: _L1_M1_20:


vldr.f32 s16 , strmm_f32zero2
INIT1x1


#if (defined(LEFT) && defined(TRANSA)) || \ #if (defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA)) (!defined(LEFT) && !defined(TRANSA))
@@ -1897,5 +1882,3 @@ _L999:


EPILOGUE EPILOGUE


strmm_f32zero2:
.word 0x00000000

+ 4
- 4
kernel/arm/zdot_vfp.S View File

@@ -190,10 +190,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
mov Y, OLD_Y mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y ldr INC_Y, OLD_INC_Y


vldr.f64 d0 , =0
vldr.f64 d1 , =0
vldr.f64 d2 , =0
vldr.f64 d3 , =0
vsub.f64 d0 , d0 , d0
vsub.f64 d1 , d1 , d1
vsub.f64 d2 , d2 , d2
vsub.f64 d3 , d3 , d3


cmp N, #0 cmp N, #0
ble zdot_kernel_L999 ble zdot_kernel_L999


+ 4
- 4
kernel/arm/zgemm_kernel_2x2_vfp.S View File

@@ -131,7 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -383,7 +383,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d12, d8 vmov.f64 d12, d8
vmov.f64 d13, d8 vmov.f64 d13, d8
@@ -557,7 +557,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -724,7 +724,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8


.endm .endm


+ 4
- 4
kernel/arm/zgemm_kernel_2x2_vfpv3.S View File

@@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -404,7 +404,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d21, d16 vmov.f64 d21, d16
@@ -586,7 +586,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -766,7 +766,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d24, d16 vmov.f64 d24, d16
vmov.f64 d25, d16 vmov.f64 d25, d16


+ 4
- 4
kernel/arm/zgemv_n_vfp.S View File

@@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_F4 .macro INIT_F4


pld [ YO, #Y_PRE ] pld [ YO, #Y_PRE ]
vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -222,7 +222,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8


.endm .endm
@@ -269,7 +269,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S4 .macro INIT_S4


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8


.endm .endm


+ 12
- 12
kernel/arm/zgemv_t_vfp.S View File

@@ -117,10 +117,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F2 .macro INIT_F2


vldr.f64 d12, =0
vldr.f64 d13, =0
vldr.f64 d14, =0
vldr.f64 d15, =0
vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13
vsub.f64 d14, d14, d14
vsub.f64 d15, d15, d15


.endm .endm


@@ -173,8 +173,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_F1 .macro INIT_F1


vldr.f64 d12, =0
vldr.f64 d13, =0
vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13


.endm .endm


@@ -216,10 +216,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S2 .macro INIT_S2


vldr.f64 d12, =0
vldr.f64 d13, =0
vldr.f64 d14, =0
vldr.f64 d15, =0
vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13
vsub.f64 d14, d14, d14
vsub.f64 d15, d15, d15


.endm .endm


@@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT_S1 .macro INIT_S1


vldr.f64 d12, =0
vldr.f64 d13, =0
vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13


.endm .endm




+ 22
- 22
kernel/arm/ztrmm_kernel_2x2_vfp.S View File

@@ -140,7 +140,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -356,10 +356,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


vldr.f64 d4, =0
vldr.f64 d5, =0
vldr.f64 d6, =0
vldr.f64 d7, =0
vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
@@ -373,10 +373,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


fstmiad CO1, { d4 - d7 } fstmiad CO1, { d4 - d7 }


vldr.f64 d4, =0
vldr.f64 d5, =0
vldr.f64 d6, =0
vldr.f64 d7, =0
vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
@@ -398,7 +398,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d12, d8 vmov.f64 d12, d8
vmov.f64 d13, d8 vmov.f64 d13, d8
@@ -545,8 +545,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


vldr.f64 d4, =0
vldr.f64 d5, =0
vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
@@ -555,8 +555,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


fstmiad CO1, { d4 - d5 } fstmiad CO1, { d4 - d5 }


vldr.f64 d4, =0
vldr.f64 d5, =0
vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5


FMAC_R1 d4 , d0 , d12 FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13 FMAC_I1 d5 , d0 , d13
@@ -574,7 +574,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8
vmov.f64 d10, d8 vmov.f64 d10, d8
vmov.f64 d11, d8 vmov.f64 d11, d8
@@ -718,10 +718,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


vldr.f64 d4, =0
vldr.f64 d5, =0
vldr.f64 d6, =0
vldr.f64 d7, =0
vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9
@@ -744,7 +744,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d8 , =0
vsub.f64 d8 , d8 , d8
vmov.f64 d9 , d8 vmov.f64 d9 , d8


.endm .endm
@@ -850,8 +850,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R fldd d0, ALPHA_R
fldd d1, ALPHA_I fldd d1, ALPHA_I


vldr.f64 d4, =0
vldr.f64 d5, =0
vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5


FMAC_R1 d4 , d0 , d8 FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9 FMAC_I1 d5 , d0 , d9


+ 4
- 4
kernel/arm/ztrmm_kernel_2x2_vfpv3.S View File

@@ -134,7 +134,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x2 .macro INIT2x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -388,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x2 .macro INIT1x2


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d20, d16 vmov.f64 d20, d16
vmov.f64 d21, d16 vmov.f64 d21, d16
@@ -566,7 +566,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT2x1 .macro INIT2x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d18, d16 vmov.f64 d18, d16
vmov.f64 d19, d16 vmov.f64 d19, d16
@@ -743,7 +743,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


.macro INIT1x1 .macro INIT1x1


vldr.f64 d16 , =0
vsub.f64 d16 , d16 , d16
vmov.f64 d17, d16 vmov.f64 d17, d16
vmov.f64 d24, d16 vmov.f64 d24, d16
vmov.f64 d25, d16 vmov.f64 d25, d16


Loading…
Cancel
Save