Ref #740: all assembly codes now clear floating point register correctlytags/v0.2.16.rc1
| @@ -1,26 +1,4 @@ | |||||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||||
| DGEMVNKERNEL = ../arm/gemv_n.c | |||||
| DGEMVTKERNEL = ../arm/gemv_t.c | |||||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| #ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| #ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| #ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| #ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| #STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
| #SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
| #SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
| #SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
| ############################################################################### | ############################################################################### | ||||
| @@ -96,19 +74,19 @@ DSWAPKERNEL = swap_vfp.S | |||||
| CSWAPKERNEL = swap_vfp.S | CSWAPKERNEL = swap_vfp.S | ||||
| ZSWAPKERNEL = swap_vfp.S | ZSWAPKERNEL = swap_vfp.S | ||||
| # BAD SGEMVNKERNEL = gemv_n_vfp.S | |||||
| # BAD DGEMVNKERNEL = gemv_n_vfp.S | |||||
| # CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| SGEMVNKERNEL = gemv_n_vfp.S | |||||
| DGEMVNKERNEL = gemv_n_vfp.S | |||||
| CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| ZGEMVNKERNEL = zgemv_n_vfp.S | ZGEMVNKERNEL = zgemv_n_vfp.S | ||||
| # BAD SGEMVTKERNEL = gemv_t_vfp.S | |||||
| # BAD DGEMVTKERNEL = gemv_t_vfp.S | |||||
| # CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| SGEMVTKERNEL = gemv_t_vfp.S | |||||
| DGEMVTKERNEL = gemv_t_vfp.S | |||||
| CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| ZGEMVTKERNEL = zgemv_t_vfp.S | ZGEMVTKERNEL = zgemv_t_vfp.S | ||||
| STRMMKERNEL = strmm_kernel_4x2_vfp.S | STRMMKERNEL = strmm_kernel_4x2_vfp.S | ||||
| DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S | DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S | ||||
| #CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S | |||||
| CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S | |||||
| ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S | ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S | ||||
| SGEMMKERNEL = sgemm_kernel_4x2_vfp.S | SGEMMKERNEL = sgemm_kernel_4x2_vfp.S | ||||
| @@ -131,9 +109,9 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | DGEMMONCOPYOBJ = dgemm_oncopy.o | ||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | DGEMMOTCOPYOBJ = dgemm_otcopy.o | ||||
| #CGEMMKERNEL = cgemm_kernel_2x2_vfp.S | |||||
| #CGEMMONCOPY = cgemm_ncopy_2_vfp.S | |||||
| #CGEMMOTCOPY = cgemm_tcopy_2_vfp.S | |||||
| CGEMMKERNEL = cgemm_kernel_2x2_vfp.S | |||||
| CGEMMONCOPY = cgemm_ncopy_2_vfp.S | |||||
| CGEMMOTCOPY = cgemm_tcopy_2_vfp.S | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | CGEMMONCOPYOBJ = cgemm_oncopy.o | ||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | CGEMMOTCOPYOBJ = cgemm_otcopy.o | ||||
| @@ -1,8 +1,3 @@ | |||||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||||
| ################################################################################# | ################################################################################# | ||||
| SAMAXKERNEL = iamax_vfp.S | SAMAXKERNEL = iamax_vfp.S | ||||
| @@ -77,14 +72,14 @@ DSCALKERNEL = scal.c | |||||
| CSCALKERNEL = zscal.c | CSCALKERNEL = zscal.c | ||||
| ZSCALKERNEL = zscal.c | ZSCALKERNEL = zscal.c | ||||
| # BAD SGEMVNKERNEL = gemv_n_vfp.S | |||||
| DGEMVNKERNEL = gemv_n_vfp.S | |||||
| #CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| SGEMVNKERNEL = gemv_n_vfpv3.S | |||||
| DGEMVNKERNEL = gemv_n_vfpv3.S | |||||
| CGEMVNKERNEL = cgemv_n_vfp.S | |||||
| ZGEMVNKERNEL = zgemv_n_vfp.S | ZGEMVNKERNEL = zgemv_n_vfp.S | ||||
| # BAD SGEMVTKERNEL = gemv_t_vfp.S | |||||
| SGEMVTKERNEL = gemv_t_vfp.S | |||||
| DGEMVTKERNEL = gemv_t_vfp.S | DGEMVTKERNEL = gemv_t_vfp.S | ||||
| #CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| ZGEMVTKERNEL = zgemv_t_vfp.S | ZGEMVTKERNEL = zgemv_t_vfp.S | ||||
| STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | ||||
| @@ -92,24 +87,15 @@ DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S | |||||
| CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S | CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S | ||||
| ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S | ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S | ||||
| #SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
| SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S | SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S | ||||
| SGEMMINCOPY = | |||||
| SGEMMITCOPY = | |||||
| SGEMMONCOPY = sgemm_ncopy_4_vfp.S | SGEMMONCOPY = sgemm_ncopy_4_vfp.S | ||||
| SGEMMOTCOPY = sgemm_tcopy_4_vfp.S | SGEMMOTCOPY = sgemm_tcopy_4_vfp.S | ||||
| SGEMMINCOPYOBJ = | |||||
| SGEMMITCOPYOBJ = | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | SGEMMONCOPYOBJ = sgemm_oncopy.o | ||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | SGEMMOTCOPYOBJ = sgemm_otcopy.o | ||||
| DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S | DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S | ||||
| DGEMMINCOPY = | |||||
| DGEMMITCOPY = | |||||
| DGEMMONCOPY = dgemm_ncopy_4_vfp.S | DGEMMONCOPY = dgemm_ncopy_4_vfp.S | ||||
| DGEMMOTCOPY = dgemm_tcopy_4_vfp.S | DGEMMOTCOPY = dgemm_tcopy_4_vfp.S | ||||
| DGEMMINCOPYOBJ = | |||||
| DGEMMITCOPYOBJ = | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | DGEMMONCOPYOBJ = dgemm_oncopy.o | ||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | DGEMMOTCOPYOBJ = dgemm_otcopy.o | ||||
| @@ -185,14 +185,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r4, fp, #128 | sub r4, fp, #128 | ||||
| vstm r4, { s8 - s15} // store floating point registers | vstm r4, { s8 - s15} // store floating point registers | ||||
| movs r4, #0 // clear floating point register | |||||
| vmov s0, r4 | |||||
| vmov s1, s0 | |||||
| vmov s2, s0 | |||||
| vmov s3, s0 | |||||
| mov Y, OLD_Y | mov Y, OLD_Y | ||||
| ldr INC_Y, OLD_INC_Y | ldr INC_Y, OLD_INC_Y | ||||
| vsub.f32 s0 , s0 , s0 | |||||
| vsub.f32 s1 , s1 , s1 | |||||
| vsub.f32 s2 , s2 , s2 | |||||
| vsub.f32 s3 , s3 , s3 | |||||
| cmp N, #0 | cmp N, #0 | ||||
| ble cdot_kernel_L999 | ble cdot_kernel_L999 | ||||
| @@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, # -240] | |||||
| #define FP_ZERO_1 [fp, # -236] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -138,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -340,7 +344,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s12, s8 | vmov.f32 s12, s8 | ||||
| vmov.f32 s13, s8 | vmov.f32 s13, s8 | ||||
| @@ -514,7 +518,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -681,7 +685,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| .endm | .endm | ||||
| @@ -822,6 +826,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s15} // store floating point registers | vstm r3, { s8 - s15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #3 // ldc = ldc * 4 * 2 | lsl r3, r3, #3 // ldc = ldc * 4 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, # -240] | |||||
| #define FP_ZERO_1 [fp, # -236] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s21, s16 | vmov.f32 s21, s16 | ||||
| @@ -550,7 +554,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -730,7 +734,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s24, s16 | vmov.f32 s24, s16 | ||||
| vmov.f32 s25, s16 | vmov.f32 s25, s16 | ||||
| @@ -879,6 +883,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s31} // store floating point registers | vstm r3, { s8 - s31} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #3 // ldc = ldc * 4 * 2 | lsl r3, r3, #3 // ldc = ldc * 4 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define I r12 | #define I r12 | ||||
| #define FP_ZERO [fp, #-228] | |||||
| #define FP_ZERO_0 [fp, #-228] | |||||
| #define FP_ZERO_1 [fp, #-224] | |||||
| #define ALPHA_I [fp, #-236] | #define ALPHA_I [fp, #-236] | ||||
| #define ALPHA_R [fp, #-244] | #define ALPHA_R [fp, #-244] | ||||
| @@ -117,7 +121,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F4 | .macro INIT_F4 | ||||
| pld [ YO, #Y_PRE ] | pld [ YO, #Y_PRE ] | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| .endm | .endm | ||||
| @@ -267,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S4 | .macro INIT_S4 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| .endm | .endm | ||||
| @@ -448,6 +452,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vstm r12, { s8 - s15 } // store floating point registers | vstm r12, { s8 - s15 } // store floating point registers | ||||
| #endif | #endif | ||||
| movs r12, #0 | |||||
| str r12, FP_ZERO | |||||
| str r12, FP_ZERO_1 | |||||
| cmp OLD_M, #0 | cmp OLD_M, #0 | ||||
| ble cgemvn_kernel_L999 | ble cgemvn_kernel_L999 | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define I r12 | #define I r12 | ||||
| #define FP_ZERO [fp, #-228] | |||||
| #define FP_ZERO_0 [fp, #-228] | |||||
| #define FP_ZERO_1 [fp, #-224] | |||||
| #define N [fp, #-252 ] | #define N [fp, #-252 ] | ||||
| #define A [fp, #-256 ] | #define A [fp, #-256 ] | ||||
| @@ -116,10 +120,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F2 | .macro INIT_F2 | ||||
| vsub.f32 s12, s12, s12 | |||||
| vsub.f32 s13, s13, s13 | |||||
| vsub.f32 s14, s14, s14 | |||||
| vsub.f32 s15, s15, s15 | |||||
| flds s12, FP_ZERO | |||||
| vmov.f32 s13, s12 | |||||
| vmov.f32 s14, s12 | |||||
| vmov.f32 s15, s12 | |||||
| .endm | .endm | ||||
| @@ -172,8 +176,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f32 s12, s12, s12 | |||||
| vsub.f32 s13, s13, s13 | |||||
| flds s12, FP_ZERO | |||||
| vmov.f32 s13, s12 | |||||
| .endm | .endm | ||||
| @@ -215,10 +219,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S2 | .macro INIT_S2 | ||||
| vsub.f32 s12, s12, s12 | |||||
| vsub.f32 s13, s13, s13 | |||||
| vsub.f32 s14, s14, s14 | |||||
| vsub.f32 s15, s15, s15 | |||||
| flds s12, FP_ZERO | |||||
| vmov.f32 s13, s12 | |||||
| vmov.f32 s14, s12 | |||||
| vmov.f32 s15, s12 | |||||
| .endm | .endm | ||||
| @@ -281,8 +285,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f32 s12, s12, s12 | |||||
| vsub.f32 s13, s13, s13 | |||||
| flds s12, FP_ZERO | |||||
| vmov.f32 s13, s12 | |||||
| .endm | .endm | ||||
| @@ -345,6 +349,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vstm r12, { s8 - s15 } // store floating point registers | vstm r12, { s8 - s15 } // store floating point registers | ||||
| #endif | #endif | ||||
| movs r12, #0 | |||||
| str r12, FP_ZERO | |||||
| str r12, FP_ZERO_1 | |||||
| cmp M, #0 | cmp M, #0 | ||||
| ble cgemvt_kernel_L999 | ble cgemvt_kernel_L999 | ||||
| @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-232] | |||||
| #define FP_ZERO_0 [fp, #-232] | |||||
| #define FP_ZERO_1 [fp, #-228] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -136,7 +141,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -301,10 +306,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| vsub.f32 s4, s4, s4 | |||||
| vsub.f32 s5, s5, s5 | |||||
| vsub.f32 s6, s6, s6 | |||||
| vsub.f32 s7, s7, s7 | |||||
| flds s4, FP_ZERO | |||||
| vmov.f32 s5, s4 | |||||
| vmov.f32 s6, s4 | |||||
| vmov.f32 s7, s4 | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| @@ -318,10 +323,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fstmias CO1, { s4 - s7 } | fstmias CO1, { s4 - s7 } | ||||
| vsub.f32 s4, s4, s4 | |||||
| vsub.f32 s5, s5, s5 | |||||
| vsub.f32 s6, s6, s6 | |||||
| vsub.f32 s7, s7, s7 | |||||
| flds s4, FP_ZERO | |||||
| vmov.f32 s5, s4 | |||||
| vmov.f32 s6, s4 | |||||
| vmov.f32 s7, s4 | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| @@ -343,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s12, s8 | vmov.f32 s12, s8 | ||||
| vmov.f32 s13, s8 | vmov.f32 s13, s8 | ||||
| @@ -490,8 +495,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| vsub.f32 s4, s4, s4 | |||||
| vsub.f32 s5, s5, s5 | |||||
| flds s4, FP_ZERO | |||||
| vmov.f32 s5, s4 | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| @@ -500,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fstmias CO1, { s4 - s5 } | fstmias CO1, { s4 - s5 } | ||||
| vsub.f32 s4, s4, s4 | |||||
| vsub.f32 s5, s5, s5 | |||||
| flds s4, FP_ZERO | |||||
| vmov.f32 s5, s4 | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| @@ -519,7 +524,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -663,10 +668,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| vsub.f32 s4, s4, s4 | |||||
| vsub.f32 s5, s5, s5 | |||||
| vsub.f32 s6, s6, s6 | |||||
| vsub.f32 s7, s7, s7 | |||||
| flds s4, FP_ZERO | |||||
| vmov.f32 s5, s4 | |||||
| vmov.f32 s6, s4 | |||||
| vmov.f32 s7, s4 | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| @@ -689,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| .endm | .endm | ||||
| @@ -795,8 +800,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| vsub.f32 s4, s4, s4 | |||||
| vsub.f32 s5, s5, s5 | |||||
| flds s4, FP_ZERO | |||||
| vmov.f32 s5, s4 | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| @@ -831,6 +836,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s15} // store floating point registers | vstm r3, { s8 - s15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #3 // ldc = ldc * 4 * 2 | lsl r3, r3, #3 // ldc = ldc * 4 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-236] | |||||
| #define FP_ZERO_0 [fp, #-236] | |||||
| #define FP_ZERO_1 [fp, #-232] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16 , FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -351,7 +355,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16 , FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s21, s16 | vmov.f32 s21, s16 | ||||
| @@ -529,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16 , FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -706,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16 , FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s24, s16 | vmov.f32 s24, s16 | ||||
| vmov.f32 s25, s16 | vmov.f32 s25, s16 | ||||
| @@ -852,6 +856,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s31} // store floating point registers | vstm r3, { s8 - s31} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #3 // ldc = ldc * 4 * 2 | lsl r3, r3, #3 // ldc = ldc * 4 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -56,8 +56,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, # -240] | |||||
| #define FP_ZERO_1 [fp, # -236] | |||||
| #define ALPHA [fp, #-280] | #define ALPHA [fp, #-280] | ||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| #define C [fp, #8 ] | #define C [fp, #8 ] | ||||
| #define OLD_LDC [fp, #12 ] | #define OLD_LDC [fp, #12 ] | ||||
| @@ -85,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x2 | .macro INIT4x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9, d8 | vmov.f64 d9, d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -173,7 +178,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9, d8 | vmov.f64 d9, d8 | ||||
| vmov.f64 d12, d8 | vmov.f64 d12, d8 | ||||
| vmov.f64 d13, d8 | vmov.f64 d13, d8 | ||||
| @@ -233,7 +238,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d12, d8 | vmov.f64 d12, d8 | ||||
| .endm | .endm | ||||
| @@ -283,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x1 | .macro INIT4x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9, d8 | vmov.f64 d9, d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -338,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| .endm | .endm | ||||
| @@ -380,7 +385,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -433,6 +438,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { d8 - d15} // store floating point registers | vstm r3, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #3 // ldc = ldc * 8 | lsl r3, r3, #3 // ldc = ldc * 8 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #define FP_ZERO [fp, #-232] | |||||
| #define FP_ZERO_0 [fp, #-232] | |||||
| #define FP_ZERO_1 [fp, #-228] | |||||
| #define ALPHA [fp, #-276 ] | #define ALPHA [fp, #-276 ] | ||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| @@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x2 | .macro INIT4x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9, d8 | vmov.f64 d9, d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -165,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9, d8 | vmov.f64 d9, d8 | ||||
| vmov.f64 d12, d8 | vmov.f64 d12, d8 | ||||
| vmov.f64 d13, d8 | vmov.f64 d13, d8 | ||||
| @@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d12, d8 | vmov.f64 d12, d8 | ||||
| .endm | .endm | ||||
| @@ -268,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x1 | .macro INIT4x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9, d8 | vmov.f64 d9, d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -318,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| .endm | .endm | ||||
| @@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -409,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { d8 - d15} // store floating point registers | vstm r3, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #3 // ldc = ldc * 8 | lsl r3, r3, #3 // ldc = ldc * 8 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #define FP_ZERO [fp, #-236] | |||||
| #define FP_ZERO_0 [fp, #-236] | |||||
| #define FP_ZERO_1 [fp, #-232] | |||||
| #define ALPHA [fp, #-276 ] | #define ALPHA [fp, #-276 ] | ||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| @@ -89,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x4 | .macro INIT4x4 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d18, d16 | vmov.f64 d18, d16 | ||||
| vmov.f64 d19, d16 | vmov.f64 d19, d16 | ||||
| @@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x4 | .macro INIT2x4 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d20, d16 | vmov.f64 d20, d16 | ||||
| vmov.f64 d21, d16 | vmov.f64 d21, d16 | ||||
| @@ -468,7 +473,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x4 | .macro INIT1x4 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d20, d16 | vmov.f64 d20, d16 | ||||
| vmov.f64 d24, d16 | vmov.f64 d24, d16 | ||||
| vmov.f64 d28, d16 | vmov.f64 d28, d16 | ||||
| @@ -527,7 +532,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x2 | .macro INIT4x2 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d18, d16 | vmov.f64 d18, d16 | ||||
| vmov.f64 d19, d16 | vmov.f64 d19, d16 | ||||
| @@ -601,7 +606,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d20, d16 | vmov.f64 d20, d16 | ||||
| vmov.f64 d21, d16 | vmov.f64 d21, d16 | ||||
| @@ -656,7 +661,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d20, d16 | vmov.f64 d20, d16 | ||||
| .endm | .endm | ||||
| @@ -699,7 +704,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x1 | .macro INIT4x1 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d18, d16 | vmov.f64 d18, d16 | ||||
| vmov.f64 d19, d16 | vmov.f64 d19, d16 | ||||
| @@ -753,7 +758,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| .endm | .endm | ||||
| @@ -794,7 +799,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -850,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { d8 - d15} // store floating point registers | vstm r3, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #3 // ldc = ldc * 8 | lsl r3, r3, #3 // ldc = ldc * 8 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define I r12 | #define I r12 | ||||
| #define FP_ZERO [fp, #-228] | |||||
| #define FP_ZERO_0 [fp, #-228] | |||||
| #define FP_ZERO_1 [fp, #-224] | |||||
| #define M [fp, #-252 ] | #define M [fp, #-252 ] | ||||
| #define A [fp, #-256 ] | #define A [fp, #-256 ] | ||||
| @@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ YO , #Y_PRE ] | pld [ YO , #Y_PRE ] | ||||
| pld [ YO , #Y_PRE+32 ] | pld [ YO , #Y_PRE+32 ] | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d10 , d8 | vmov.f64 d10 , d8 | ||||
| vmov.f64 d11 , d8 | vmov.f64 d11 , d8 | ||||
| @@ -158,7 +162,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f64 d12 , d12 , d12 | |||||
| fldd d12 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -185,7 +189,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S4 | .macro INIT_S4 | ||||
| vsub.f64 d12 , d12 , d12 | |||||
| fldd d12 , FP_ZERO | |||||
| vmov.f64 d13 , d12 | vmov.f64 d13 , d12 | ||||
| vmov.f64 d14 , d12 | vmov.f64 d14 , d12 | ||||
| vmov.f64 d15 , d12 | vmov.f64 d15 , d12 | ||||
| @@ -245,7 +249,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f64 d12 , d12 , d12 | |||||
| fldd d12 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -279,7 +283,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ YO , #Y_PRE ] | pld [ YO , #Y_PRE ] | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| vmov.f32 s10 , s8 | vmov.f32 s10 , s8 | ||||
| vmov.f32 s11 , s8 | vmov.f32 s11 , s8 | ||||
| @@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f32 s12 , s12 , s12 | |||||
| flds s12 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S4 | .macro INIT_S4 | ||||
| vsub.f32 s12 , s12 , s12 | |||||
| flds s12 , FP_ZERO | |||||
| vmov.f32 s13 , s12 | vmov.f32 s13 , s12 | ||||
| vmov.f32 s14 , s12 | vmov.f32 s14 , s12 | ||||
| vmov.f32 s15 , s12 | vmov.f32 s15 , s12 | ||||
| @@ -445,7 +449,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f32 s12 , s12 , s12 | |||||
| flds s12 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -494,6 +498,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vstm r12, { s8 - s15 } // store floating point registers | vstm r12, { s8 - s15 } // store floating point registers | ||||
| #endif | #endif | ||||
| movs r12, #0 | |||||
| str r12, FP_ZERO | |||||
| str r12, FP_ZERO_1 | |||||
| cmp OLD_M, #0 | cmp OLD_M, #0 | ||||
| ble gemvn_kernel_L999 | ble gemvn_kernel_L999 | ||||
| @@ -62,6 +62,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define M [fp, #-252 ] | #define M [fp, #-252 ] | ||||
| #define A [fp, #-256 ] | #define A [fp, #-256 ] | ||||
| #define FP_ZERO [fp, #-228] | |||||
| #define FP_ZERO_0 [fp, #-228] | |||||
| #define FP_ZERO_1 [fp, #-224] | |||||
| #define X_PRE 64 | #define X_PRE 64 | ||||
| #define Y_PRE 0 | #define Y_PRE 0 | ||||
| @@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ YO , #Y_PRE ] | pld [ YO , #Y_PRE ] | ||||
| pld [ YO , #Y_PRE+32 ] | pld [ YO , #Y_PRE+32 ] | ||||
| vsub.f64 d24 , d24 , d24 | |||||
| fldd d24 , FP_ZERO | |||||
| vmov.f64 d25 , d24 | vmov.f64 d25 , d24 | ||||
| vmov.f64 d26 , d24 | vmov.f64 d26 , d24 | ||||
| vmov.f64 d27 , d24 | vmov.f64 d27 , d24 | ||||
| @@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f64 d24 , d24 , d24 | |||||
| fldd d24 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -175,7 +179,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S8 | .macro INIT_S8 | ||||
| vsub.f64 d24 , d24 , d24 | |||||
| fldd d24 , FP_ZERO | |||||
| vmov.f64 d25 , d24 | vmov.f64 d25 , d24 | ||||
| vmov.f64 d26 , d24 | vmov.f64 d26 , d24 | ||||
| vmov.f64 d27 , d24 | vmov.f64 d27 , d24 | ||||
| @@ -269,7 +273,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f64 d24 , d24 , d24 | |||||
| fldd d24 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -302,7 +306,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ YO , #Y_PRE ] | pld [ YO , #Y_PRE ] | ||||
| vsub.f32 s24 , s24 , s24 | |||||
| flds s24 , FP_ZERO | |||||
| vmov.f32 s25 , s24 | vmov.f32 s25 , s24 | ||||
| vmov.f32 s26 , s24 | vmov.f32 s26 , s24 | ||||
| vmov.f32 s27 , s24 | vmov.f32 s27 , s24 | ||||
| @@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f32 s24 , s24 , s24 | |||||
| flds s24 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -396,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S8 | .macro INIT_S8 | ||||
| vsub.f32 s24 , s24 , s24 | |||||
| flds s24 , FP_ZERO | |||||
| vmov.f32 s25 , s24 | vmov.f32 s25 , s24 | ||||
| vmov.f32 s26 , s24 | vmov.f32 s26 , s24 | ||||
| vmov.f32 s27 , s24 | vmov.f32 s27 , s24 | ||||
| @@ -489,7 +493,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f32 s24 , s24 , s24 | |||||
| flds s24 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -538,6 +542,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vstm r12, { s8 - s31 } // store floating point registers | vstm r12, { s8 - s31 } // store floating point registers | ||||
| #endif | #endif | ||||
| movs r12, #0 | |||||
| str r12, FP_ZERO | |||||
| str r12, FP_ZERO_1 | |||||
| cmp OLD_M, #0 | cmp OLD_M, #0 | ||||
| ble gemvn_kernel_L999 | ble gemvn_kernel_L999 | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define I r12 | #define I r12 | ||||
| #define FP_ZERO [fp, #-228] | |||||
| #define FP_ZERO_0 [fp, #-228] | |||||
| #define FP_ZERO_1 [fp, #-224] | |||||
| #define N [fp, #-252 ] | #define N [fp, #-252 ] | ||||
| #define A [fp, #-256 ] | #define A [fp, #-256 ] | ||||
| @@ -75,8 +79,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F2 | .macro INIT_F2 | ||||
| vsub.f64 d2 , d2 , d2 | |||||
| vsub.f64 d3 , d3 , d3 | |||||
| fldd d2, FP_ZERO | |||||
| vmov.f64 d3 , d2 | |||||
| .endm | .endm | ||||
| @@ -123,7 +127,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f64 d2 , d2 , d2 | |||||
| fldd d2, FP_ZERO | |||||
| vmov.f64 d3 , d2 | |||||
| .endm | .endm | ||||
| @@ -160,8 +165,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S2 | .macro INIT_S2 | ||||
| vsub.f64 d2 , d2 , d2 | |||||
| vsub.f64 d3 , d3 , d3 | |||||
| fldd d2, FP_ZERO | |||||
| vmov.f64 d3 , d2 | |||||
| .endm | .endm | ||||
| @@ -224,7 +229,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f64 d2 , d2 , d2 | |||||
| fldd d2, FP_ZERO | |||||
| vmov.f64 d3 , d2 | |||||
| .endm | .endm | ||||
| @@ -276,8 +282,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F2 | .macro INIT_F2 | ||||
| vsub.f32 s2 , s2 , s2 | |||||
| vsub.f32 s3 , s3 , s3 | |||||
| flds s2 , FP_ZERO | |||||
| vmov.f32 s3 , s2 | |||||
| .endm | .endm | ||||
| @@ -321,7 +328,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f32 s2 , s2 , s2 | |||||
| flds s2 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -356,8 +363,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S2 | .macro INIT_S2 | ||||
| vsub.f32 s2 , s2 , s2 | |||||
| vsub.f32 s3 , s3 , s3 | |||||
| flds s2 , FP_ZERO | |||||
| vmov.f32 s3 , s2 | |||||
| .endm | .endm | ||||
| @@ -418,7 +425,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f32 s2 , s2 , s2 | |||||
| flds s2 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -488,6 +495,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vstm r12, { s8 - s15 } // store floating point registers | vstm r12, { s8 - s15 } // store floating point registers | ||||
| #endif | #endif | ||||
| movs r12, #0 | |||||
| str r12, FP_ZERO | |||||
| str r12, FP_ZERO_1 | |||||
| cmp M, #0 | cmp M, #0 | ||||
| ble gemvt_kernel_L999 | ble gemvt_kernel_L999 | ||||
| @@ -405,12 +405,15 @@ KERNEL_S1_END_\@: | |||||
| .align 5 | .align 5 | ||||
| #if defined(DOUBLE) | #if defined(DOUBLE) | ||||
| vsub.f64 d0 , d0 , d0 // scale=0.0 | |||||
| movs r12 , #0 | |||||
| vmov.f32 s0 , r12 // scale=0.0 | |||||
| vcvt.f64.f32 d0, s0 | |||||
| vmov.f64 d1 , #1.0 // ssq=1.0 | vmov.f64 d1 , #1.0 // ssq=1.0 | ||||
| vmov.f64 d7 , d1 // value 1.0 | vmov.f64 d7 , d1 // value 1.0 | ||||
| vmov.f64 d6 , d0 // value 0.0 | vmov.f64 d6 , d0 // value 0.0 | ||||
| #else | #else | ||||
| vsub.f32 s0 , s0 , s0 // scale=0.0 | |||||
| movs r12 , #0 | |||||
| vmov.f32 s0 , r12 // scale=0.0 | |||||
| vmov.f32 s1 , #1.0 // ssq=1.0 | vmov.f32 s1 , #1.0 // ssq=1.0 | ||||
| vmov.f32 s7 , s1 // value 1.0 | vmov.f32 s7 , s1 // value 1.0 | ||||
| vmov.f32 s6 , s0 // value 0.0 | vmov.f32 s6 , s0 // value 0.0 | ||||
| @@ -56,6 +56,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, # -240] | |||||
| #define FP_ZERO_1 [fp, # -236] | |||||
| #define ALPHA [fp, #-280] | #define ALPHA [fp, #-280] | ||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| @@ -85,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x2 | .macro INIT4x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8, FP_ZERO | |||||
| vmov.f32 s9, s8 | vmov.f32 s9, s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -161,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8, FP_ZERO | |||||
| vmov.f32 s9, s8 | vmov.f32 s9, s8 | ||||
| vmov.f32 s12, s8 | vmov.f32 s12, s8 | ||||
| vmov.f32 s13, s8 | vmov.f32 s13, s8 | ||||
| @@ -221,7 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8, FP_ZERO | |||||
| vmov.f32 s12, s8 | vmov.f32 s12, s8 | ||||
| .endm | .endm | ||||
| @@ -271,7 +275,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x1 | .macro INIT4x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8, FP_ZERO | |||||
| vmov.f32 s9, s8 | vmov.f32 s9, s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -326,7 +330,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8, FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| .endm | .endm | ||||
| @@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8, FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -421,6 +425,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s15} // store floating point registers | vstm r3, { s8 - s15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #2 // ldc = ldc * 4 | lsl r3, r3, #2 // ldc = ldc * 4 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -73,7 +73,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #define ALPHA [fp, #-280] | |||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, #-240] | |||||
| #define FP_ZERO_1 [fp, #-236] | |||||
| #define ALPHA [fp, #-280] | |||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| #define C [fp, #8 ] | #define C [fp, #8 ] | ||||
| @@ -102,7 +106,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x4 | .macro INIT4x4 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -349,7 +353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x4 | .macro INIT2x4 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s21, s16 | vmov.f32 s21, s16 | ||||
| @@ -443,7 +447,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x4 | .macro INIT1x4 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s24, s16 | vmov.f32 s24, s16 | ||||
| vmov.f32 s28, s16 | vmov.f32 s28, s16 | ||||
| @@ -506,7 +510,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x2 | .macro INIT4x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -590,7 +594,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s21, s16 | vmov.f32 s21, s16 | ||||
| @@ -651,7 +655,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| .endm | .endm | ||||
| @@ -696,7 +700,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x1 | .macro INIT4x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -755,7 +759,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| .endm | .endm | ||||
| @@ -799,7 +803,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds s16, FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -856,6 +860,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s31} // store floating point registers | vstm r3, { s8 - s31} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #2 // ldc = ldc * 4 | lsl r3, r3, #2 // ldc = ldc * 4 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #define FP_ZERO [fp, #-232] | |||||
| #define FP_ZERO_0 [fp, #-232] | |||||
| #define FP_ZERO_1 [fp, #-228] | |||||
| #define ALPHA [fp, #-276 ] | #define ALPHA [fp, #-276 ] | ||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| @@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x2 | .macro INIT4x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9, s8 | vmov.f32 s9, s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -156,7 +160,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9, s8 | vmov.f32 s9, s8 | ||||
| vmov.f32 s12, s8 | vmov.f32 s12, s8 | ||||
| vmov.f32 s13, s8 | vmov.f32 s13, s8 | ||||
| @@ -211,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s12, s8 | vmov.f32 s12, s8 | ||||
| .endm | .endm | ||||
| @@ -259,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x1 | .macro INIT4x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9, s8 | vmov.f32 s9, s8 | ||||
| vmov.f32 s10, s8 | vmov.f32 s10, s8 | ||||
| vmov.f32 s11, s8 | vmov.f32 s11, s8 | ||||
| @@ -309,7 +313,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| vmov.f32 s9 , s8 | vmov.f32 s9 , s8 | ||||
| .endm | .endm | ||||
| @@ -348,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s8 , s8 , s8 | |||||
| flds s8 , FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -400,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s15} // store floating point registers | vstm r3, { s8 - s15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #2 // ldc = ldc * 4 | lsl r3, r3, #2 // ldc = ldc * 4 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -58,6 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, # -240] | |||||
| #define FP_ZERO_1 [fp, # -236] | |||||
| #define ALPHA [fp, #-280] | #define ALPHA [fp, #-280] | ||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| @@ -88,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x4 | .macro INIT4x4 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -322,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x4 | .macro INIT2x4 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s21, s16 | vmov.f32 s21, s16 | ||||
| @@ -405,7 +409,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x4 | .macro INIT1x4 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s24, s16 | vmov.f32 s24, s16 | ||||
| vmov.f32 s28, s16 | vmov.f32 s28, s16 | ||||
| @@ -464,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x2 | .macro INIT4x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -538,7 +542,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| vmov.f32 s21, s16 | vmov.f32 s21, s16 | ||||
| @@ -593,7 +597,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s20, s16 | vmov.f32 s20, s16 | ||||
| .endm | .endm | ||||
| @@ -636,7 +640,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT4x1 | .macro INIT4x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| vmov.f32 s18, s16 | vmov.f32 s18, s16 | ||||
| vmov.f32 s19, s16 | vmov.f32 s19, s16 | ||||
| @@ -690,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| vmov.f32 s17, s16 | vmov.f32 s17, s16 | ||||
| .endm | .endm | ||||
| @@ -731,7 +735,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f32 s16 , s16 , s16 | |||||
| flds S16, FP_ZERO | |||||
| .endm | .endm | ||||
| @@ -787,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s31} // store floating point registers | vstm r3, { s8 - s31} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #2 // ldc = ldc * 4 | lsl r3, r3, #2 // ldc = ldc * 4 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -187,13 +187,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r4, fp, #128 | sub r4, fp, #128 | ||||
| vstm r4, { d8 - d15} // store floating point registers | vstm r4, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 // clear floating point register | |||||
| vmov s0, r4 | |||||
| vcvt.f64.f32 d0, s0 | |||||
| vcvt.f64.f32 d1, s0 | |||||
| vcvt.f64.f32 d2, s0 | |||||
| vcvt.f64.f32 d3, s0 | |||||
| mov Y, OLD_Y | mov Y, OLD_Y | ||||
| ldr INC_Y, OLD_INC_Y | ldr INC_Y, OLD_INC_Y | ||||
| vsub.f64 d0 , d0 , d0 | |||||
| vsub.f64 d1 , d1 , d1 | |||||
| vsub.f64 d2 , d2 , d2 | |||||
| vsub.f64 d3 , d3 , d3 | |||||
| cmp N, #0 | cmp N, #0 | ||||
| ble zdot_kernel_L999 | ble zdot_kernel_L999 | ||||
| @@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, # -240] | |||||
| #define FP_ZERO_1 [fp, # -236] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -131,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -383,7 +387,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d12, d8 | vmov.f64 d12, d8 | ||||
| vmov.f64 d13, d8 | vmov.f64 d13, d8 | ||||
| @@ -557,7 +561,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -724,7 +728,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| .endm | .endm | ||||
| @@ -869,6 +873,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { d8 - d15} // store floating point registers | vstm r3, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #4 // ldc = ldc * 8 * 2 | lsl r3, r3, #4 // ldc = ldc * 8 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-240] | |||||
| #define FP_ZERO_0 [fp, # -240] | |||||
| #define FP_ZERO_1 [fp, # -236] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d18, d16 | vmov.f64 d18, d16 | ||||
| vmov.f64 d19, d16 | vmov.f64 d19, d16 | ||||
| @@ -404,7 +408,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d20, d16 | vmov.f64 d20, d16 | ||||
| vmov.f64 d21, d16 | vmov.f64 d21, d16 | ||||
| @@ -586,7 +590,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d18, d16 | vmov.f64 d18, d16 | ||||
| vmov.f64 d19, d16 | vmov.f64 d19, d16 | ||||
| @@ -766,7 +770,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16, FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d24, d16 | vmov.f64 d24, d16 | ||||
| vmov.f64 d25, d16 | vmov.f64 d25, d16 | ||||
| @@ -915,6 +919,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { d8 - d15} // store floating point registers | vstm r3, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #4 // ldc = ldc * 8 * 2 | lsl r3, r3, #4 // ldc = ldc * 8 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define I r12 | #define I r12 | ||||
| #define FP_ZERO [fp, #-228] | |||||
| #define FP_ZERO_0 [fp, #-228] | |||||
| #define FP_ZERO_1 [fp, #-224] | |||||
| #define ALPHA_I [fp, #-236] | #define ALPHA_I [fp, #-236] | ||||
| #define ALPHA_R [fp, #-244] | #define ALPHA_R [fp, #-244] | ||||
| @@ -117,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F4 | .macro INIT_F4 | ||||
| pld [ YO, #Y_PRE ] | pld [ YO, #Y_PRE ] | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -222,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| .endm | .endm | ||||
| @@ -269,7 +274,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S4 | .macro INIT_S4 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8, FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| .endm | .endm | ||||
| @@ -450,6 +455,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vstm r12, { s8 - s15 } // store floating point registers | vstm r12, { s8 - s15 } // store floating point registers | ||||
| #endif | #endif | ||||
| movs r12, #0 | |||||
| str r12, FP_ZERO | |||||
| str r12, FP_ZERO_1 | |||||
| cmp OLD_M, #0 | cmp OLD_M, #0 | ||||
| ble zgemvn_kernel_L999 | ble zgemvn_kernel_L999 | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define I r12 | #define I r12 | ||||
| #define FP_ZERO [fp, #-228] | |||||
| #define FP_ZERO_0 [fp, #-228] | |||||
| #define FP_ZERO_1 [fp, #-224] | |||||
| #define N [fp, #-252 ] | #define N [fp, #-252 ] | ||||
| #define A [fp, #-256 ] | #define A [fp, #-256 ] | ||||
| @@ -117,10 +121,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F2 | .macro INIT_F2 | ||||
| vsub.f64 d12, d12, d12 | |||||
| vsub.f64 d13, d13, d13 | |||||
| vsub.f64 d14, d14, d14 | |||||
| vsub.f64 d15, d15, d15 | |||||
| fldd d12, FP_ZERO | |||||
| vmov.f64 d13, d12 | |||||
| vmov.f64 d14, d12 | |||||
| vmov.f64 d15, d12 | |||||
| .endm | .endm | ||||
| @@ -173,8 +177,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F1 | .macro INIT_F1 | ||||
| vsub.f64 d12, d12, d12 | |||||
| vsub.f64 d13, d13, d13 | |||||
| fldd d12, FP_ZERO | |||||
| vmov.f64 d13, d12 | |||||
| .endm | .endm | ||||
| @@ -216,10 +220,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S2 | .macro INIT_S2 | ||||
| vsub.f64 d12, d12, d12 | |||||
| vsub.f64 d13, d13, d13 | |||||
| vsub.f64 d14, d14, d14 | |||||
| vsub.f64 d15, d15, d15 | |||||
| fldd d12, FP_ZERO | |||||
| vmov.f64 d13, d12 | |||||
| vmov.f64 d14, d12 | |||||
| vmov.f64 d15, d12 | |||||
| .endm | .endm | ||||
| @@ -282,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S1 | .macro INIT_S1 | ||||
| vsub.f64 d12, d12, d12 | |||||
| vsub.f64 d13, d13, d13 | |||||
| fldd d12, FP_ZERO | |||||
| vmov.f64 d13, d12 | |||||
| .endm | .endm | ||||
| @@ -346,6 +350,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vstm r12, { s8 - s15 } // store floating point registers | vstm r12, { s8 - s15 } // store floating point registers | ||||
| #endif | #endif | ||||
| movs r12, #0 | |||||
| str r12, FP_ZERO | |||||
| str r12, FP_ZERO_1 | |||||
| cmp M, #0 | cmp M, #0 | ||||
| ble zgemvt_kernel_L999 | ble zgemvt_kernel_L999 | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-232] | |||||
| #define FP_ZERO_0 [fp, #-232] | |||||
| #define FP_ZERO_1 [fp, #-228] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -140,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -356,10 +360,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| vsub.f64 d4, d4 , d4 | |||||
| vsub.f64 d5, d5 , d5 | |||||
| vsub.f64 d6, d6 , d6 | |||||
| vsub.f64 d7, d7 , d7 | |||||
| fldd d4 , FP_ZERO | |||||
| vmov.f64 d5 , d4 | |||||
| vmov.f64 d6 , d4 | |||||
| vmov.f64 d7 , d4 | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| @@ -373,10 +377,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fstmiad CO1, { d4 - d7 } | fstmiad CO1, { d4 - d7 } | ||||
| vsub.f64 d4, d4 , d4 | |||||
| vsub.f64 d5, d5 , d5 | |||||
| vsub.f64 d6, d6 , d6 | |||||
| vsub.f64 d7, d7 , d7 | |||||
| fldd d4 , FP_ZERO | |||||
| vmov.f64 d5 , d4 | |||||
| vmov.f64 d6 , d4 | |||||
| vmov.f64 d7 , d4 | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| @@ -398,7 +402,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d12, d8 | vmov.f64 d12, d8 | ||||
| vmov.f64 d13, d8 | vmov.f64 d13, d8 | ||||
| @@ -545,8 +549,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| vsub.f64 d4, d4 , d4 | |||||
| vsub.f64 d5, d5 , d5 | |||||
| fldd d4 , FP_ZERO | |||||
| vmov.f64 d5 , d4 | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| @@ -555,8 +559,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fstmiad CO1, { d4 - d5 } | fstmiad CO1, { d4 - d5 } | ||||
| vsub.f64 d4, d4 , d4 | |||||
| vsub.f64 d5, d5 , d5 | |||||
| fldd d4 , FP_ZERO | |||||
| vmov.f64 d5 , d4 | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| @@ -574,7 +578,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| vmov.f64 d10, d8 | vmov.f64 d10, d8 | ||||
| vmov.f64 d11, d8 | vmov.f64 d11, d8 | ||||
| @@ -718,10 +722,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| vsub.f64 d4, d4 , d4 | |||||
| vsub.f64 d5, d5 , d5 | |||||
| vsub.f64 d6, d6 , d6 | |||||
| vsub.f64 d7, d7 , d7 | |||||
| fldd d4 , FP_ZERO | |||||
| vmov.f64 d5 , d4 | |||||
| vmov.f64 d6 , d4 | |||||
| vmov.f64 d7 , d4 | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| @@ -744,7 +748,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f64 d8 , d8 , d8 | |||||
| fldd d8 , FP_ZERO | |||||
| vmov.f64 d9 , d8 | vmov.f64 d9 , d8 | ||||
| .endm | .endm | ||||
| @@ -850,8 +854,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| vsub.f64 d4, d4 , d4 | |||||
| vsub.f64 d5, d5 , d5 | |||||
| fldd d4 , FP_ZERO | |||||
| vmov.f64 d5 , d4 | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| @@ -888,6 +892,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { d8 - d15} // store floating point registers | vstm r3, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #4 // ldc = ldc * 8 * 2 | lsl r3, r3, #4 // ldc = ldc * 8 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #define FP_ZERO [fp, #-236] | |||||
| #define FP_ZERO_0 [fp, #-236] | |||||
| #define FP_ZERO_1 [fp, #-232] | |||||
| #define ALPHA_I [fp, #-272] | #define ALPHA_I [fp, #-272] | ||||
| #define ALPHA_R [fp, #-280] | #define ALPHA_R [fp, #-280] | ||||
| @@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x2 | .macro INIT2x2 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16 , FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d18, d16 | vmov.f64 d18, d16 | ||||
| vmov.f64 d19, d16 | vmov.f64 d19, d16 | ||||
| @@ -388,7 +392,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x2 | .macro INIT1x2 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16 , FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d20, d16 | vmov.f64 d20, d16 | ||||
| vmov.f64 d21, d16 | vmov.f64 d21, d16 | ||||
| @@ -566,7 +570,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT2x1 | .macro INIT2x1 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16 , FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d18, d16 | vmov.f64 d18, d16 | ||||
| vmov.f64 d19, d16 | vmov.f64 d19, d16 | ||||
| @@ -743,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT1x1 | .macro INIT1x1 | ||||
| vsub.f64 d16 , d16 , d16 | |||||
| fldd d16 , FP_ZERO | |||||
| vmov.f64 d17, d16 | vmov.f64 d17, d16 | ||||
| vmov.f64 d24, d16 | vmov.f64 d24, d16 | ||||
| vmov.f64 d25, d16 | vmov.f64 d25, d16 | ||||
| @@ -889,6 +893,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { d8 - d15} // store floating point registers | vstm r3, { d8 - d15} // store floating point registers | ||||
| movs r4, #0 | |||||
| str r4, FP_ZERO | |||||
| str r4, FP_ZERO_1 | |||||
| ldr r3, OLD_LDC | ldr r3, OLD_LDC | ||||
| lsl r3, r3, #4 // ldc = ldc * 8 * 2 | lsl r3, r3, #4 // ldc = ldc * 8 * 2 | ||||
| str r3, LDC | str r3, LDC | ||||
| @@ -10,7 +10,7 @@ NEP: Data file for testing Nonsymmetric Eigenvalue Problem routines | |||||
| 0 5 7 3 200 Values of INIBL (nibble crossover point) | 0 5 7 3 200 Values of INIBL (nibble crossover point) | ||||
| 1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts) | 1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts) | ||||
| 0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2) | 0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2) | ||||
| 20.0 Threshold value | |||||
| 30.0 Threshold value | |||||
| T Put T to test the error exits | T Put T to test the error exits | ||||
| 1 Code to interpret the seed | 1 Code to interpret the seed | ||||
| NEP 21 | NEP 21 | ||||