| @@ -73,6 +73,7 @@ CGEMVTKERNEL = cgemv_t_vfp.S | |||||
| ZGEMVTKERNEL = zgemv_t_vfp.S | ZGEMVTKERNEL = zgemv_t_vfp.S | ||||
| SGEMMKERNEL = ../generic/gemmkernel_4x2.c | SGEMMKERNEL = ../generic/gemmkernel_4x2.c | ||||
| SGEMMKERNEL = sgemm_kernel_4x2_vfp.S | |||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ||||
| SGEMMINCOPY = sgemm_ncopy_4_vfp.S | SGEMMINCOPY = sgemm_ncopy_4_vfp.S | ||||
| SGEMMITCOPY = sgemm_tcopy_4_vfp.S | SGEMMITCOPY = sgemm_tcopy_4_vfp.S | ||||
| @@ -97,6 +98,7 @@ DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | DGEMMOTCOPYOBJ = dgemm_otcopy.o | ||||
| STRMMKERNEL = ../generic/trmmkernel_4x2.c | STRMMKERNEL = ../generic/trmmkernel_4x2.c | ||||
| STRMMKERNEL = strmm_kernel_4x2_vfp.S | |||||
| DTRMMKERNEL = ../generic/trmmkernel_4x2.c | DTRMMKERNEL = ../generic/trmmkernel_4x2.c | ||||
| CGEMMONCOPY = cgemm_ncopy_2_vfp.S | CGEMMONCOPY = cgemm_ncopy_2_vfp.S | ||||
| @@ -111,12 +113,10 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ifeq ($(ARM_ABI),hard) | ifeq ($(ARM_ABI),hard) | ||||
| STRMMKERNEL = strmm_kernel_4x2_vfp.S | |||||
| DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S | DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S | ||||
| CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S | CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S | ||||
| ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S | ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S | ||||
| SGEMMKERNEL = sgemm_kernel_4x2_vfp.S | |||||
| DGEMMKERNEL = dgemm_kernel_4x2_vfp.S | DGEMMKERNEL = dgemm_kernel_4x2_vfp.S | ||||
| CGEMMKERNEL = cgemm_kernel_2x2_vfp.S | CGEMMKERNEL = cgemm_kernel_2x2_vfp.S | ||||
| ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S | ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S | ||||
| @@ -11,7 +11,7 @@ DGEMVNKERNEL = gemv_n_vfpv3.S | |||||
| STRMMKERNEL = ../generic/trmmkernel_4x4.c | STRMMKERNEL = ../generic/trmmkernel_4x4.c | ||||
| DTRMMKERNEL = ../generic/trmmkernel_4x4.c | DTRMMKERNEL = ../generic/trmmkernel_4x4.c | ||||
| SGEMMKERNEL = ../generic/gemmkernel_4x4.c | |||||
| SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S | |||||
| SGEMMONCOPY = sgemm_ncopy_4_vfp.S | SGEMMONCOPY = sgemm_ncopy_4_vfp.S | ||||
| SGEMMOTCOPY = sgemm_tcopy_4_vfp.S | SGEMMOTCOPY = sgemm_tcopy_4_vfp.S | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | SGEMMONCOPYOBJ = sgemm_oncopy.o | ||||
| @@ -30,7 +30,6 @@ DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S | |||||
| CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S | CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S | ||||
| ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S | ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S | ||||
| SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S | |||||
| DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S | DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S | ||||
| CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S | CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S | ||||
| @@ -62,9 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ALPHA [fp, #-280] | #define ALPHA [fp, #-280] | ||||
| #if !defined(__ARM_PCS_VFP) | |||||
| #define OLD_ALPHA_SOFTFP r3 | |||||
| #define OLD_A_SOFTFP [fp, #4 ] | |||||
| #define B [fp, #8 ] | |||||
| #define C [fp, #12 ] | |||||
| #define OLD_LDC [fp, #16 ] | |||||
| #else | |||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| #define C [fp, #8 ] | #define C [fp, #8 ] | ||||
| #define OLD_LDC [fp, #12 ] | #define OLD_LDC [fp, #12 ] | ||||
| #endif | |||||
| #define I r0 | #define I r0 | ||||
| #define J r1 | #define J r1 | ||||
| @@ -416,6 +424,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add fp, sp, #24 | add fp, sp, #24 | ||||
| sub sp, sp, #STACKSIZE // reserve stack | sub sp, sp, #STACKSIZE // reserve stack | ||||
| #if !defined(__ARM_PCS_VFP) | |||||
| vmov OLD_ALPHA, OLD_ALPHA_SOFTFP | |||||
| ldr OLD_A, OLD_A_SOFTFP | |||||
| #endif | |||||
| str OLD_M, M | str OLD_M, M | ||||
| str OLD_N, N | str OLD_N, N | ||||
| str OLD_K, K | str OLD_K, K | ||||
| @@ -58,14 +58,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define OLD_M r0 | #define OLD_M r0 | ||||
| #define OLD_N r1 | #define OLD_N r1 | ||||
| #define OLD_K r2 | #define OLD_K r2 | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| #define OLD_ALPHA r3 | |||||
| //#define OLD_A | |||||
| #else //hard | |||||
| #define OLD_A r3 | #define OLD_A r3 | ||||
| #define OLD_ALPHA s0 | #define OLD_ALPHA s0 | ||||
| #endif | |||||
| /****************************************************** | /****************************************************** | ||||
| * [fp, #-128] - [fp, #-64] is reserved | * [fp, #-128] - [fp, #-64] is reserved | ||||
| @@ -77,10 +71,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define M [fp, #-256 ] | #define M [fp, #-256 ] | ||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #ifndef ARM_SOFTFP_ABI | |||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #endif | |||||
| #define FP_ZERO [fp, #-240] | #define FP_ZERO [fp, #-240] | ||||
| #define FP_ZERO_0 [fp, #-240] | #define FP_ZERO_0 [fp, #-240] | ||||
| @@ -88,17 +79,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ALPHA [fp, #-280] | #define ALPHA [fp, #-280] | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| #define A [fp, #4 ] | |||||
| #if !defined(__ARM_PCS_VFP) | |||||
| #define OLD_ALPHA_SOFTFP r3 | |||||
| #define OLD_A_SOFTFP [fp, #4 ] | |||||
| #define B [fp, #8 ] | #define B [fp, #8 ] | ||||
| #define C [fp, #12 ] | #define C [fp, #12 ] | ||||
| #define OLD_LDC [fp, #16 ] | #define OLD_LDC [fp, #16 ] | ||||
| #else //hard | |||||
| #else | |||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| #define C [fp, #8 ] | #define C [fp, #8 ] | ||||
| #define OLD_LDC [fp, #12 ] | #define OLD_LDC [fp, #12 ] | ||||
| #endif | #endif | ||||
| #define I r0 | #define I r0 | ||||
| #define J r1 | #define J r1 | ||||
| #define L r2 | #define L r2 | ||||
| @@ -867,16 +859,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add fp, sp, #24 | add fp, sp, #24 | ||||
| sub sp, sp, #STACKSIZE // reserve stack | sub sp, sp, #STACKSIZE // reserve stack | ||||
| #if !defined(__ARM_PCS_VFP) | |||||
| vmov OLD_ALPHA, OLD_ALPHA_SOFTFP | |||||
| ldr OLD_A, OLD_A_SOFTFP | |||||
| #endif | |||||
| str OLD_M, M | str OLD_M, M | ||||
| str OLD_N, N | str OLD_N, N | ||||
| str OLD_K, K | str OLD_K, K | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| str OLD_ALPHA, ALPHA | |||||
| #else //hard | |||||
| str OLD_A, A | str OLD_A, A | ||||
| vstr OLD_ALPHA, ALPHA | vstr OLD_ALPHA, ALPHA | ||||
| #endif | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s31} // store floating point registers | vstm r3, { s8 - s31} // store floating point registers | ||||
| @@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ALPHA [fp, #-276 ] | #define ALPHA [fp, #-276 ] | ||||
| #if !defined(__ARM_PCS_VFP) | |||||
| #define OLD_ALPHA_SOFTFP r3 | |||||
| #define OLD_A_SOFTFP [fp, #4 ] | |||||
| #define B [fp, #8 ] | |||||
| #define OLD_C [fp, #12 ] | |||||
| #define OLD_LDC [fp, #16 ] | |||||
| #define OFFSET [fp, #20 ] | |||||
| #else | |||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| #define OLD_C [fp, #8 ] | #define OLD_C [fp, #8 ] | ||||
| #define OLD_LDC [fp, #12 ] | #define OLD_LDC [fp, #12 ] | ||||
| #define OFFSET [fp, #16 ] | #define OFFSET [fp, #16 ] | ||||
| #endif | |||||
| #define I r0 | #define I r0 | ||||
| #define J r1 | #define J r1 | ||||
| @@ -395,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add fp, sp, #24 | add fp, sp, #24 | ||||
| sub sp, sp, #STACKSIZE // reserve stack | sub sp, sp, #STACKSIZE // reserve stack | ||||
| #if !defined(__ARM_PCS_VFP) | |||||
| vmov OLD_ALPHA, OLD_ALPHA_SOFTFP | |||||
| ldr OLD_A, OLD_A_SOFTFP | |||||
| #endif | |||||
| str OLD_M, M | str OLD_M, M | ||||
| str OLD_N, N | str OLD_N, N | ||||
| str OLD_K, K | str OLD_K, K | ||||
| @@ -64,10 +64,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ALPHA [fp, #-280] | #define ALPHA [fp, #-280] | ||||
| #if !defined(__ARM_PCS_VFP) | |||||
| #define OLD_ALPHA_SOFTFP r3 | |||||
| #define OLD_A_SOFTFP [fp, #4 ] | |||||
| #define B [fp, #8 ] | |||||
| #define C [fp, #12 ] | |||||
| #define OLD_LDC [fp, #16 ] | |||||
| #define OFFSET [fp, #20 ] | |||||
| #else | |||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| #define C [fp, #8 ] | #define C [fp, #8 ] | ||||
| #define OLD_LDC [fp, #12 ] | #define OLD_LDC [fp, #12 ] | ||||
| #define OFFSET [fp, #16 ] | #define OFFSET [fp, #16 ] | ||||
| #endif | |||||
| #define I r0 | #define I r0 | ||||
| #define J r1 | #define J r1 | ||||
| @@ -782,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add fp, sp, #24 | add fp, sp, #24 | ||||
| sub sp, sp, #STACKSIZE // reserve stack | sub sp, sp, #STACKSIZE // reserve stack | ||||
| #if !defined(__ARM_PCS_VFP) | |||||
| vmov OLD_ALPHA, OLD_ALPHA_SOFTFP | |||||
| ldr OLD_A, OLD_A_SOFTFP | |||||
| #endif | |||||
| str OLD_M, M | str OLD_M, M | ||||
| str OLD_N, N | str OLD_N, N | ||||
| str OLD_K, K | str OLD_K, K | ||||