@@ -72,7 +72,6 @@ DGEMVTKERNEL = gemv_t_vfp.S | |||
CGEMVTKERNEL = cgemv_t_vfp.S | |||
ZGEMVTKERNEL = zgemv_t_vfp.S | |||
SGEMMKERNEL = ../generic/gemmkernel_4x2.c | |||
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S | |||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
SGEMMINCOPY = sgemm_ncopy_4_vfp.S | |||
@@ -85,7 +84,7 @@ SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
DGEMMKERNEL = ../generic/gemmkernel_4x2.c | |||
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S | |||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||
DGEMMINCOPY = dgemm_ncopy_4_vfp.S | |||
DGEMMITCOPY = dgemm_tcopy_4_vfp.S | |||
@@ -97,9 +96,8 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
STRMMKERNEL = ../generic/trmmkernel_4x2.c | |||
STRMMKERNEL = strmm_kernel_4x2_vfp.S | |||
DTRMMKERNEL = ../generic/trmmkernel_4x2.c | |||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S | |||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S | |||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S | |||
@@ -113,11 +111,9 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ifeq ($(ARM_ABI),hard) | |||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S | |||
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S | |||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S | |||
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S | |||
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S | |||
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S | |||
@@ -8,8 +8,8 @@ ZNRM2KERNEL = nrm2_vfpv3.S | |||
SGEMVNKERNEL = gemv_n_vfpv3.S | |||
DGEMVNKERNEL = gemv_n_vfpv3.S | |||
STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||
DTRMMKERNEL = ../generic/trmmkernel_4x4.c | |||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | |||
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S | |||
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S | |||
SGEMMONCOPY = sgemm_ncopy_4_vfp.S | |||
@@ -17,7 +17,7 @@ SGEMMOTCOPY = sgemm_tcopy_4_vfp.S | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
DGEMMKERNEL = ../generic/gemmkernel_4x4.c | |||
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S | |||
DGEMMONCOPY = dgemm_ncopy_4_vfp.S | |||
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
@@ -25,13 +25,9 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
ifeq ($(ARM_ABI),hard) | |||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | |||
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S | |||
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S | |||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S | |||
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S | |||
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S | |||
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S | |||
@@ -62,10 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ALPHA [fp, #-280] | |||
#if !defined(__ARM_PCS_VFP) | |||
#define OLD_ALPHA_SOFTFP [fp, #4] | |||
#define OLD_A_SOFTFP [fp, #12 ] | |||
#define B [fp, #16 ] | |||
#define C [fp, #20 ] | |||
#define OLD_LDC [fp, #24 ] | |||
#else | |||
#define B [fp, #4 ] | |||
#define C [fp, #8 ] | |||
#define OLD_LDC [fp, #12 ] | |||
#endif | |||
#define I r0 | |||
#define J r1 | |||
@@ -429,6 +436,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
add fp, sp, #24 | |||
sub sp, sp, #STACKSIZE // reserve stack | |||
#if !defined(__ARM_PCS_VFP) | |||
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP | |||
ldr OLD_A, OLD_A_SOFTFP | |||
#endif | |||
str OLD_M, M | |||
str OLD_N, N | |||
str OLD_K, K | |||
@@ -79,9 +79,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ALPHA [fp, #-280] | |||
#if !defined(__ARM_PCS_VFP) | |||
#define OLD_ALPHA_SOFTFP [fp, #4] | |||
#define OLD_A_SOFTFP [fp, #12 ] | |||
#define B [fp, #16 ] | |||
#define C [fp, #20 ] | |||
#define OLD_LDC [fp, #24 ] | |||
#else | |||
#define B [fp, #4 ] | |||
#define C [fp, #8 ] | |||
#define OLD_LDC [fp, #12 ] | |||
#endif | |||
#define I r0 | |||
#define J r1 | |||
@@ -878,6 +886,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
add fp, sp, #24 | |||
sub sp, sp, #STACKSIZE // reserve stack | |||
#if !defined(__ARM_PCS_VFP) | |||
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP | |||
ldr OLD_A, OLD_A_SOFTFP | |||
#endif | |||
str OLD_M, M | |||
str OLD_N, N | |||
str OLD_K, K | |||
@@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ALPHA [fp, #-276 ] | |||
#if !defined(__ARM_PCS_VFP) | |||
#define OLD_ALPHA_SOFTFP [fp, #4] | |||
#define OLD_A_SOFTFP [fp, #12 ] | |||
#define B [fp, #16 ] | |||
#define OLD_C [fp, #20 ] | |||
#define OLD_LDC [fp, #24 ] | |||
#define OFFSET [fp, #28 ] | |||
#else | |||
#define B [fp, #4 ] | |||
#define OLD_C [fp, #8 ] | |||
#define OLD_LDC [fp, #12 ] | |||
#define OFFSET [fp, #16 ] | |||
#endif | |||
#define I r0 | |||
#define J r1 | |||
@@ -404,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
add fp, sp, #24 | |||
sub sp, sp, #STACKSIZE // reserve stack | |||
#if !defined(__ARM_PCS_VFP) | |||
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP | |||
ldr OLD_A, OLD_A_SOFTFP | |||
#endif | |||
str OLD_M, M | |||
str OLD_N, N | |||
str OLD_K, K | |||
@@ -66,10 +66,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ALPHA [fp, #-276 ] | |||
#if !defined(__ARM_PCS_VFP) | |||
#define OLD_ALPHA_SOFTFP [fp, #4] | |||
#define OLD_A_SOFTFP [fp, #12 ] | |||
#define B [fp, #16 ] | |||
#define OLD_C [fp, #20 ] | |||
#define OLD_LDC [fp, #24 ] | |||
#define OFFSET [fp, #28 ] | |||
#else | |||
#define B [fp, #4 ] | |||
#define OLD_C [fp, #8 ] | |||
#define OLD_LDC [fp, #12 ] | |||
#define OFFSET [fp, #16 ] | |||
#endif | |||
#define I r0 | |||
#define J r1 | |||
@@ -846,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
add fp, sp, #24 | |||
sub sp, sp, #STACKSIZE // reserve stack | |||
#if !defined(__ARM_PCS_VFP) | |||
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP | |||
ldr OLD_A, OLD_A_SOFTFP | |||
#endif | |||
str OLD_M, M | |||
str OLD_N, N | |||
str OLD_K, K | |||