Browse Source

Merge pull request #761 from wernsaar/develop

Ref #740: all assembly codes now clear floating point register correctly
tags/v0.2.16.rc1
wernsaar 9 years ago
parent
commit
0b194426f8
28 changed files with 397 additions and 243 deletions
  1. +10
    -32
      kernel/arm/KERNEL.ARMV6
  2. +5
    -19
      kernel/arm/KERNEL.ARMV7
  3. +6
    -5
      kernel/arm/cdot_vfp.S
  4. +12
    -4
      kernel/arm/cgemm_kernel_2x2_vfp.S
  5. +12
    -4
      kernel/arm/cgemm_kernel_2x2_vfpv3.S
  6. +12
    -4
      kernel/arm/cgemv_n_vfp.S
  7. +20
    -12
      kernel/arm/cgemv_t_vfp.S
  8. +31
    -22
      kernel/arm/ctrmm_kernel_2x2_vfp.S
  9. +12
    -4
      kernel/arm/ctrmm_kernel_2x2_vfpv3.S
  10. +15
    -6
      kernel/arm/dgemm_kernel_4x2_vfp.S
  11. +14
    -6
      kernel/arm/dtrmm_kernel_4x2_vfp.S
  12. +18
    -9
      kernel/arm/dtrmm_kernel_4x4_vfpv3.S
  13. +16
    -8
      kernel/arm/gemv_n_vfp.S
  14. +16
    -8
      kernel/arm/gemv_n_vfpv3.S
  15. +23
    -12
      kernel/arm/gemv_t_vfp.S
  16. +5
    -2
      kernel/arm/nrm2_vfpv3.S
  17. +14
    -6
      kernel/arm/sgemm_kernel_4x2_vfp.S
  18. +18
    -10
      kernel/arm/sgemm_kernel_4x4_vfpv3.S
  19. +14
    -6
      kernel/arm/strmm_kernel_4x2_vfp.S
  20. +17
    -9
      kernel/arm/strmm_kernel_4x4_vfpv3.S
  21. +7
    -4
      kernel/arm/zdot_vfp.S
  22. +12
    -4
      kernel/arm/zgemm_kernel_2x2_vfp.S
  23. +12
    -4
      kernel/arm/zgemm_kernel_2x2_vfpv3.S
  24. +13
    -4
      kernel/arm/zgemv_n_vfp.S
  25. +20
    -12
      kernel/arm/zgemv_t_vfp.S
  26. +30
    -22
      kernel/arm/ztrmm_kernel_2x2_vfp.S
  27. +12
    -4
      kernel/arm/ztrmm_kernel_2x2_vfpv3.S
  28. +1
    -1
      lapack-netlib/TESTING/nep.in

+ 10
- 32
kernel/arm/KERNEL.ARMV6 View File

@@ -1,26 +1,4 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
CGEMVNKERNEL = ../arm/zgemv_n.c
CGEMVTKERNEL = ../arm/zgemv_t.c

DGEMVNKERNEL = ../arm/gemv_n.c
DGEMVTKERNEL = ../arm/gemv_t.c

CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c

#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c


#STRMMKERNEL = ../generic/trmmkernel_2x2.c
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
#SGEMMOTCOPY = ../generic/gemm_tcopy_2.c


###############################################################################
@@ -96,19 +74,19 @@ DSWAPKERNEL = swap_vfp.S
CSWAPKERNEL = swap_vfp.S
ZSWAPKERNEL = swap_vfp.S

# BAD SGEMVNKERNEL = gemv_n_vfp.S
# BAD DGEMVNKERNEL = gemv_n_vfp.S
# CGEMVNKERNEL = cgemv_n_vfp.S
SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S

# BAD SGEMVTKERNEL = gemv_t_vfp.S
# BAD DGEMVTKERNEL = gemv_t_vfp.S
# CGEMVTKERNEL = cgemv_t_vfp.S
SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
#CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S

SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
@@ -131,9 +109,9 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o

#CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
#CGEMMONCOPY = cgemm_ncopy_2_vfp.S
#CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o



+ 5
- 19
kernel/arm/KERNEL.ARMV7 View File

@@ -1,8 +1,3 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
CGEMVNKERNEL = ../arm/zgemv_n.c
CGEMVTKERNEL = ../arm/zgemv_t.c


#################################################################################
SAMAXKERNEL = iamax_vfp.S
@@ -77,14 +72,14 @@ DSCALKERNEL = scal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c

# BAD SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
#CGEMVNKERNEL = cgemv_n_vfp.S
SGEMVNKERNEL = gemv_n_vfpv3.S
DGEMVNKERNEL = gemv_n_vfpv3.S
CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S

# BAD SGEMVTKERNEL = gemv_t_vfp.S
SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
#CGEMVTKERNEL = cgemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S

STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
@@ -92,24 +87,15 @@ DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S

#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o

DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o



+ 6
- 5
kernel/arm/cdot_vfp.S View File

@@ -185,14 +185,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r4, fp, #128
vstm r4, { s8 - s15} // store floating point registers

movs r4, #0 // clear floating point register
vmov s0, r4
vmov s1, s0
vmov s2, s0
vmov s3, s0

mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y

vsub.f32 s0 , s0 , s0
vsub.f32 s1 , s1 , s1
vsub.f32 s2 , s2 , s2
vsub.f32 s3 , s3 , s3

cmp N, #0
ble cdot_kernel_L999



+ 12
- 4
kernel/arm/cgemm_kernel_2x2_vfp.S View File

@@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, # -240]
#define FP_ZERO_1 [fp, # -236]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -138,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -340,7 +344,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s12, s8
vmov.f32 s13, s8
@@ -514,7 +518,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -681,7 +685,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -822,6 +826,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 4 * 2
str r3, LDC


+ 12
- 4
kernel/arm/cgemm_kernel_2x2_vfpv3.S View File

@@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, # -240]
#define FP_ZERO_1 [fp, # -236]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s20, s16
vmov.f32 s21, s16
@@ -550,7 +554,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -730,7 +734,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s24, s16
vmov.f32 s25, s16
@@ -879,6 +883,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s31} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 4 * 2
str r3, LDC


+ 12
- 4
kernel/arm/cgemv_n_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define I r12

#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]

#define ALPHA_I [fp, #-236]
#define ALPHA_R [fp, #-244]

@@ -117,7 +121,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_F4

pld [ YO, #Y_PRE ]
vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -267,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S4

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -448,6 +452,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s15 } // store floating point registers
#endif

movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1

cmp OLD_M, #0
ble cgemvn_kernel_L999



+ 20
- 12
kernel/arm/cgemv_t_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define I r12

#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]

#define N [fp, #-252 ]
#define A [fp, #-256 ]

@@ -116,10 +120,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F2

vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13
vsub.f32 s14, s14, s14
vsub.f32 s15, s15, s15
flds s12, FP_ZERO
vmov.f32 s13, s12
vmov.f32 s14, s12
vmov.f32 s15, s12

.endm

@@ -172,8 +176,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13
flds s12, FP_ZERO
vmov.f32 s13, s12

.endm

@@ -215,10 +219,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S2

vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13
vsub.f32 s14, s14, s14
vsub.f32 s15, s15, s15
flds s12, FP_ZERO
vmov.f32 s13, s12
vmov.f32 s14, s12
vmov.f32 s15, s12

.endm

@@ -281,8 +285,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f32 s12, s12, s12
vsub.f32 s13, s13, s13
flds s12, FP_ZERO
vmov.f32 s13, s12

.endm

@@ -345,6 +349,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s15 } // store floating point registers
#endif

movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1

cmp M, #0
ble cgemvt_kernel_L999



+ 31
- 22
kernel/arm/ctrmm_kernel_2x2_vfp.S View File

@@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]


#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -136,7 +141,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -301,10 +306,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7
flds s4, FP_ZERO
vmov.f32 s5, s4
vmov.f32 s6, s4
vmov.f32 s7, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -318,10 +323,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmias CO1, { s4 - s7 }

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7
flds s4, FP_ZERO
vmov.f32 s5, s4
vmov.f32 s6, s4
vmov.f32 s7, s4

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
@@ -343,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s12, s8
vmov.f32 s13, s8
@@ -490,8 +495,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
flds s4, FP_ZERO
vmov.f32 s5, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -500,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmias CO1, { s4 - s5 }

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
flds s4, FP_ZERO
vmov.f32 s5, s4

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
@@ -519,7 +524,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -663,10 +668,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
vsub.f32 s6, s6, s6
vsub.f32 s7, s7, s7
flds s4, FP_ZERO
vmov.f32 s5, s4
vmov.f32 s6, s4
vmov.f32 s7, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -689,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -795,8 +800,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

vsub.f32 s4, s4, s4
vsub.f32 s5, s5, s5
flds s4, FP_ZERO
vmov.f32 s5, s4

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -831,6 +836,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 4 * 2
str r3, LDC


+ 12
- 4
kernel/arm/ctrmm_kernel_2x2_vfpv3.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-236]
#define FP_ZERO_0 [fp, #-236]
#define FP_ZERO_1 [fp, #-232]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s16 , s16 , s16
flds s16 , FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -351,7 +355,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s16 , s16 , s16
flds s16 , FP_ZERO
vmov.f32 s17, s16
vmov.f32 s20, s16
vmov.f32 s21, s16
@@ -529,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s16 , s16 , s16
flds s16 , FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -706,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s16 , s16 , s16
flds s16 , FP_ZERO
vmov.f32 s17, s16
vmov.f32 s24, s16
vmov.f32 s25, s16
@@ -852,6 +856,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s31} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 4 * 2
str r3, LDC


+ 15
- 6
kernel/arm/dgemm_kernel_4x2_vfp.S View File

@@ -56,8 +56,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, # -240]
#define FP_ZERO_1 [fp, # -236]

#define ALPHA [fp, #-280]


#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
@@ -85,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9, d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -173,7 +178,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9, d8
vmov.f64 d12, d8
vmov.f64 d13, d8
@@ -233,7 +238,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d12, d8

.endm
@@ -283,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9, d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -338,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -380,7 +385,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO

.endm

@@ -433,6 +438,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 8
str r3, LDC


+ 14
- 6
kernel/arm/dtrmm_kernel_4x2_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]

#define ALPHA [fp, #-276 ]

#define B [fp, #4 ]
@@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9, d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -165,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9, d8
vmov.f64 d12, d8
vmov.f64 d13, d8
@@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d12, d8

.endm
@@ -268,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9, d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -318,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO

.endm

@@ -409,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 8
str r3, LDC


+ 18
- 9
kernel/arm/dtrmm_kernel_4x4_vfpv3.S View File

@@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-236]
#define FP_ZERO_0 [fp, #-236]
#define FP_ZERO_1 [fp, #-232]


#define ALPHA [fp, #-276 ]

#define B [fp, #4 ]
@@ -89,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x4

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d18, d16
vmov.f64 d19, d16
@@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x4

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d20, d16
vmov.f64 d21, d16
@@ -468,7 +473,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x4

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d20, d16
vmov.f64 d24, d16
vmov.f64 d28, d16
@@ -527,7 +532,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d18, d16
vmov.f64 d19, d16
@@ -601,7 +606,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d20, d16
vmov.f64 d21, d16
@@ -656,7 +661,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d20, d16

.endm
@@ -699,7 +704,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d18, d16
vmov.f64 d19, d16
@@ -753,7 +758,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16

.endm
@@ -794,7 +799,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO

.endm

@@ -850,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #3 // ldc = ldc * 8
str r3, LDC


+ 16
- 8
kernel/arm/gemv_n_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define I r12

#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]

#define M [fp, #-252 ]
#define A [fp, #-256 ]

@@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ YO , #Y_PRE ]
pld [ YO , #Y_PRE+32 ]

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10 , d8
vmov.f64 d11 , d8
@@ -158,7 +162,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f64 d12 , d12 , d12
fldd d12 , FP_ZERO

.endm

@@ -185,7 +189,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S4

vsub.f64 d12 , d12 , d12
fldd d12 , FP_ZERO
vmov.f64 d13 , d12
vmov.f64 d14 , d12
vmov.f64 d15 , d12
@@ -245,7 +249,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f64 d12 , d12 , d12
fldd d12 , FP_ZERO

.endm

@@ -279,7 +283,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ YO , #Y_PRE ]

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8
vmov.f32 s10 , s8
vmov.f32 s11 , s8
@@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f32 s12 , s12 , s12
flds s12 , FP_ZERO

.endm

@@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S4

vsub.f32 s12 , s12 , s12
flds s12 , FP_ZERO
vmov.f32 s13 , s12
vmov.f32 s14 , s12
vmov.f32 s15 , s12
@@ -445,7 +449,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f32 s12 , s12 , s12
flds s12 , FP_ZERO

.endm

@@ -494,6 +498,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s15 } // store floating point registers
#endif

movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1

cmp OLD_M, #0
ble gemvn_kernel_L999



+ 16
- 8
kernel/arm/gemv_n_vfpv3.S View File

@@ -62,6 +62,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define M [fp, #-252 ]
#define A [fp, #-256 ]

#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]


#define X_PRE 64
#define Y_PRE 0
@@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ YO , #Y_PRE ]
pld [ YO , #Y_PRE+32 ]

vsub.f64 d24 , d24 , d24
fldd d24 , FP_ZERO
vmov.f64 d25 , d24
vmov.f64 d26 , d24
vmov.f64 d27 , d24
@@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f64 d24 , d24 , d24
fldd d24 , FP_ZERO

.endm

@@ -175,7 +179,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S8

vsub.f64 d24 , d24 , d24
fldd d24 , FP_ZERO
vmov.f64 d25 , d24
vmov.f64 d26 , d24
vmov.f64 d27 , d24
@@ -269,7 +273,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f64 d24 , d24 , d24
fldd d24 , FP_ZERO

.endm

@@ -302,7 +306,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ YO , #Y_PRE ]

vsub.f32 s24 , s24 , s24
flds s24 , FP_ZERO
vmov.f32 s25 , s24
vmov.f32 s26 , s24
vmov.f32 s27 , s24
@@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f32 s24 , s24 , s24
flds s24 , FP_ZERO

.endm

@@ -396,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S8

vsub.f32 s24 , s24 , s24
flds s24 , FP_ZERO
vmov.f32 s25 , s24
vmov.f32 s26 , s24
vmov.f32 s27 , s24
@@ -489,7 +493,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f32 s24 , s24 , s24
flds s24 , FP_ZERO

.endm

@@ -538,6 +542,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s31 } // store floating point registers
#endif

movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1

cmp OLD_M, #0
ble gemvn_kernel_L999



+ 23
- 12
kernel/arm/gemv_t_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define I r12

#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]

#define N [fp, #-252 ]
#define A [fp, #-256 ]

@@ -75,8 +79,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F2

vsub.f64 d2 , d2 , d2
vsub.f64 d3 , d3 , d3
fldd d2, FP_ZERO
vmov.f64 d3 , d2

.endm

@@ -123,7 +127,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f64 d2 , d2 , d2
fldd d2, FP_ZERO
vmov.f64 d3 , d2

.endm

@@ -160,8 +165,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S2

vsub.f64 d2 , d2 , d2
vsub.f64 d3 , d3 , d3
fldd d2, FP_ZERO
vmov.f64 d3 , d2

.endm

@@ -224,7 +229,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f64 d2 , d2 , d2
fldd d2, FP_ZERO
vmov.f64 d3 , d2

.endm

@@ -276,8 +282,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F2

vsub.f32 s2 , s2 , s2
vsub.f32 s3 , s3 , s3
flds s2 , FP_ZERO
vmov.f32 s3 , s2


.endm

@@ -321,7 +328,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f32 s2 , s2 , s2
flds s2 , FP_ZERO

.endm

@@ -356,8 +363,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S2

vsub.f32 s2 , s2 , s2
vsub.f32 s3 , s3 , s3
flds s2 , FP_ZERO
vmov.f32 s3 , s2

.endm

@@ -418,7 +425,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f32 s2 , s2 , s2
flds s2 , FP_ZERO

.endm

@@ -488,6 +495,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s15 } // store floating point registers
#endif

movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1

cmp M, #0
ble gemvt_kernel_L999



+ 5
- 2
kernel/arm/nrm2_vfpv3.S View File

@@ -405,12 +405,15 @@ KERNEL_S1_END_\@:
.align 5

#if defined(DOUBLE)
vsub.f64 d0 , d0 , d0 // scale=0.0
movs r12 , #0
vmov.f32 s0 , r12 // scale=0.0
vcvt.f64.f32 d0, s0
vmov.f64 d1 , #1.0 // ssq=1.0
vmov.f64 d7 , d1 // value 1.0
vmov.f64 d6 , d0 // value 0.0
#else
vsub.f32 s0 , s0 , s0 // scale=0.0
movs r12 , #0
vmov.f32 s0 , r12 // scale=0.0
vmov.f32 s1 , #1.0 // ssq=1.0
vmov.f32 s7 , s1 // value 1.0
vmov.f32 s6 , s0 // value 0.0


+ 14
- 6
kernel/arm/sgemm_kernel_4x2_vfp.S View File

@@ -56,6 +56,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, # -240]
#define FP_ZERO_1 [fp, # -236]

#define ALPHA [fp, #-280]

#define B [fp, #4 ]
@@ -85,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f32 s8 , s8 , s8
flds s8, FP_ZERO
vmov.f32 s9, s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -161,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s8 , s8 , s8
flds s8, FP_ZERO
vmov.f32 s9, s8
vmov.f32 s12, s8
vmov.f32 s13, s8
@@ -221,7 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s8 , s8 , s8
flds s8, FP_ZERO
vmov.f32 s12, s8

.endm
@@ -271,7 +275,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f32 s8 , s8 , s8
flds s8, FP_ZERO
vmov.f32 s9, s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -326,7 +330,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s8 , s8 , s8
flds s8, FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s8 , s8 , s8
flds s8, FP_ZERO

.endm

@@ -421,6 +425,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #2 // ldc = ldc * 4
str r3, LDC


+ 18
- 10
kernel/arm/sgemm_kernel_4x4_vfpv3.S View File

@@ -73,7 +73,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define ALPHA [fp, #-280]
#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, #-240]
#define FP_ZERO_1 [fp, #-236]

#define ALPHA [fp, #-280]

#define B [fp, #4 ]
#define C [fp, #8 ]
@@ -102,7 +106,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x4

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -349,7 +353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x4

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s20, s16
vmov.f32 s21, s16
@@ -443,7 +447,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x4

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s20, s16
vmov.f32 s24, s16
vmov.f32 s28, s16
@@ -506,7 +510,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -590,7 +594,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s20, s16
vmov.f32 s21, s16
@@ -651,7 +655,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s20, s16

.endm
@@ -696,7 +700,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -755,7 +759,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO
vmov.f32 s17, s16

.endm
@@ -799,7 +803,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s16 , s16 , s16
flds s16, FP_ZERO

.endm

@@ -856,6 +860,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s31} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #2 // ldc = ldc * 4
str r3, LDC


+ 14
- 6
kernel/arm/strmm_kernel_4x2_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]

#define ALPHA [fp, #-276 ]

#define B [fp, #4 ]
@@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9, s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -156,7 +160,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9, s8
vmov.f32 s12, s8
vmov.f32 s13, s8
@@ -211,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s12, s8

.endm
@@ -259,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9, s8
vmov.f32 s10, s8
vmov.f32 s11, s8
@@ -309,7 +313,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO
vmov.f32 s9 , s8

.endm
@@ -348,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s8 , s8 , s8
flds s8 , FP_ZERO

.endm

@@ -400,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #2 // ldc = ldc * 4
str r3, LDC


+ 17
- 9
kernel/arm/strmm_kernel_4x4_vfpv3.S View File

@@ -58,6 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define K [fp, #-264 ]
#define A [fp, #-268 ]

#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, # -240]
#define FP_ZERO_1 [fp, # -236]

#define ALPHA [fp, #-280]

#define B [fp, #4 ]
@@ -88,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x4

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -322,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x4

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s20, s16
vmov.f32 s21, s16
@@ -405,7 +409,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x4

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s20, s16
vmov.f32 s24, s16
vmov.f32 s28, s16
@@ -464,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x2

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -538,7 +542,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s20, s16
vmov.f32 s21, s16
@@ -593,7 +597,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s20, s16

.endm
@@ -636,7 +640,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT4x1

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -690,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO
vmov.f32 s17, s16

.endm
@@ -731,7 +735,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f32 s16 , s16 , s16
flds S16, FP_ZERO

.endm

@@ -787,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { s8 - s31} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #2 // ldc = ldc * 4
str r3, LDC


+ 7
- 4
kernel/arm/zdot_vfp.S View File

@@ -187,13 +187,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r4, fp, #128
vstm r4, { d8 - d15} // store floating point registers

movs r4, #0 // clear floating point register
vmov s0, r4
vcvt.f64.f32 d0, s0
vcvt.f64.f32 d1, s0
vcvt.f64.f32 d2, s0
vcvt.f64.f32 d3, s0

mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y

vsub.f64 d0 , d0 , d0
vsub.f64 d1 , d1 , d1
vsub.f64 d2 , d2 , d2
vsub.f64 d3 , d3 , d3

cmp N, #0
ble zdot_kernel_L999


+ 12
- 4
kernel/arm/zgemm_kernel_2x2_vfp.S View File

@@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, # -240]
#define FP_ZERO_1 [fp, # -236]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -131,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -383,7 +387,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d12, d8
vmov.f64 d13, d8
@@ -557,7 +561,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -724,7 +728,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -869,6 +873,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #4 // ldc = ldc * 8 * 2
str r3, LDC


+ 12
- 4
kernel/arm/zgemm_kernel_2x2_vfpv3.S View File

@@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, # -240]
#define FP_ZERO_1 [fp, # -236]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d18, d16
vmov.f64 d19, d16
@@ -404,7 +408,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d20, d16
vmov.f64 d21, d16
@@ -586,7 +590,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d18, d16
vmov.f64 d19, d16
@@ -766,7 +770,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d16 , d16 , d16
fldd d16, FP_ZERO
vmov.f64 d17, d16
vmov.f64 d24, d16
vmov.f64 d25, d16
@@ -915,6 +919,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #4 // ldc = ldc * 8 * 2
str r3, LDC


+ 13
- 4
kernel/arm/zgemv_n_vfp.S View File

@@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define I r12

#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]


#define ALPHA_I [fp, #-236]
#define ALPHA_R [fp, #-244]

@@ -117,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_F4

pld [ YO, #Y_PRE ]
vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -222,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -269,7 +274,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S4

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f64 d8 , d8 , d8
fldd d8, FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -450,6 +455,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s15 } // store floating point registers
#endif

movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1

cmp OLD_M, #0
ble zgemvn_kernel_L999



+ 20
- 12
kernel/arm/zgemv_t_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define I r12

#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]

#define N [fp, #-252 ]
#define A [fp, #-256 ]

@@ -117,10 +121,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F2

vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13
vsub.f64 d14, d14, d14
vsub.f64 d15, d15, d15
fldd d12, FP_ZERO
vmov.f64 d13, d12
vmov.f64 d14, d12
vmov.f64 d15, d12

.endm

@@ -173,8 +177,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F1

vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13
fldd d12, FP_ZERO
vmov.f64 d13, d12

.endm

@@ -216,10 +220,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S2

vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13
vsub.f64 d14, d14, d14
vsub.f64 d15, d15, d15
fldd d12, FP_ZERO
vmov.f64 d13, d12
vmov.f64 d14, d12
vmov.f64 d15, d12

.endm

@@ -282,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S1

vsub.f64 d12, d12, d12
vsub.f64 d13, d13, d13
fldd d12, FP_ZERO
vmov.f64 d13, d12

.endm

@@ -346,6 +350,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s15 } // store floating point registers
#endif

movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1

cmp M, #0
ble zgemvt_kernel_L999



+ 30
- 22
kernel/arm/ztrmm_kernel_2x2_vfp.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-232]
#define FP_ZERO_0 [fp, #-232]
#define FP_ZERO_1 [fp, #-228]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -140,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -356,10 +360,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7
fldd d4 , FP_ZERO
vmov.f64 d5 , d4
vmov.f64 d6 , d4
vmov.f64 d7 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -373,10 +377,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmiad CO1, { d4 - d7 }

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7
fldd d4 , FP_ZERO
vmov.f64 d5 , d4
vmov.f64 d6 , d4
vmov.f64 d7 , d4

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
@@ -398,7 +402,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d12, d8
vmov.f64 d13, d8
@@ -545,8 +549,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
fldd d4 , FP_ZERO
vmov.f64 d5 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -555,8 +559,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fstmiad CO1, { d4 - d5 }

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
fldd d4 , FP_ZERO
vmov.f64 d5 , d4

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
@@ -574,7 +578,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8
vmov.f64 d10, d8
vmov.f64 d11, d8
@@ -718,10 +722,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
vsub.f64 d6, d6 , d6
vsub.f64 d7, d7 , d7
fldd d4 , FP_ZERO
vmov.f64 d5 , d4
vmov.f64 d6 , d4
vmov.f64 d7 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -744,7 +748,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d8 , d8 , d8
fldd d8 , FP_ZERO
vmov.f64 d9 , d8

.endm
@@ -850,8 +854,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

vsub.f64 d4, d4 , d4
vsub.f64 d5, d5 , d5
fldd d4 , FP_ZERO
vmov.f64 d5 , d4

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -888,6 +892,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #4 // ldc = ldc * 8 * 2
str r3, LDC


+ 12
- 4
kernel/arm/ztrmm_kernel_2x2_vfpv3.S View File

@@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N [fp, #-260 ]
#define K [fp, #-264 ]

#define FP_ZERO [fp, #-236]
#define FP_ZERO_0 [fp, #-236]
#define FP_ZERO_1 [fp, #-232]

#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]

@@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x2

vsub.f64 d16 , d16 , d16
fldd d16 , FP_ZERO
vmov.f64 d17, d16
vmov.f64 d18, d16
vmov.f64 d19, d16
@@ -388,7 +392,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x2

vsub.f64 d16 , d16 , d16
fldd d16 , FP_ZERO
vmov.f64 d17, d16
vmov.f64 d20, d16
vmov.f64 d21, d16
@@ -566,7 +570,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT2x1

vsub.f64 d16 , d16 , d16
fldd d16 , FP_ZERO
vmov.f64 d17, d16
vmov.f64 d18, d16
vmov.f64 d19, d16
@@ -743,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT1x1

vsub.f64 d16 , d16 , d16
fldd d16 , FP_ZERO
vmov.f64 d17, d16
vmov.f64 d24, d16
vmov.f64 d25, d16
@@ -889,6 +893,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
sub r3, fp, #128
vstm r3, { d8 - d15} // store floating point registers

movs r4, #0
str r4, FP_ZERO
str r4, FP_ZERO_1

ldr r3, OLD_LDC
lsl r3, r3, #4 // ldc = ldc * 8 * 2
str r3, LDC


+ 1
- 1
lapack-netlib/TESTING/nep.in View File

@@ -10,7 +10,7 @@ NEP: Data file for testing Nonsymmetric Eigenvalue Problem routines
0 5 7 3 200 Values of INIBL (nibble crossover point)
1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts)
0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2)
20.0 Threshold value
30.0 Threshold value
T Put T to test the error exits
1 Code to interpret the seed
NEP 21

Loading…
Cancel
Save