| @@ -162,16 +162,16 @@ matrix: | |||
| before_script: | |||
| - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" | |||
| - brew update | |||
| - brew install gcc # for gfortran | |||
| - brew install gcc@8 # for gfortran | |||
| script: | |||
| - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
| env: | |||
| - BTYPE="BINARY=64 INTERFACE64=1" | |||
| - BTYPE="BINARY=64 INTERFACE64=1 FC=gfortran-8" | |||
| - <<: *test-macos | |||
| osx_image: xcode8.3 | |||
| env: | |||
| - BTYPE="BINARY=32" | |||
| - BTYPE="BINARY=32 FC=gfortran-8" | |||
| # whitelist | |||
| branches: | |||
| @@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||
| #if defined(ASSEMBLER) && !defined(NEEDPARAM) | |||
| #define PROLOGUE \ | |||
| .text ;\ | |||
| .align 4 ;\ | |||
| .global REALNAME ;\ | |||
| .type REALNAME, %function ;\ | |||
| .macro PROLOGUE | |||
| .text ; | |||
| .p2align 2 ; | |||
| .global REALNAME ; | |||
| .type REALNAME, %function ; | |||
| REALNAME: | |||
| .endm | |||
| #define EPILOGUE | |||
| @@ -54,37 +54,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if !defined(DOUBLE) | |||
| ldr s4, [X], #4 | |||
| fcmp s4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||
| fabs s4, s4 | |||
| fcmp SCALE, s4 | |||
| bge KERNEL_F1_SCALE_GE_X_\@ | |||
| bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ | |||
| fdiv s2, SCALE, s4 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_X_\@: | |||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||
| 1: /* KERNEL_F1_SCALE_GE_X_\@: */ | |||
| fdiv s2, s4, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| #else | |||
| ldr d4, [X], #8 | |||
| fcmp d4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||
| fabs d4, d4 | |||
| fcmp SCALE, d4 | |||
| bge KERNEL_F1_SCALE_GE_X_\@ | |||
| bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ | |||
| fdiv d2, SCALE, d4 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_X_\@: | |||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||
| 1: /* KERNEL_F1_SCALE_GE_X_\@: */ | |||
| fdiv d2, d4, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| #endif | |||
| KERNEL_F1_NEXT_\@: | |||
| 2: /* KERNEL_F1_NEXT_\@: */ | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| @@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if !defined(DOUBLE) | |||
| ldr s4, [X], #4 | |||
| fcmp s4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||
| fabs s4, s4 | |||
| fcmp SCALE, s4 | |||
| bge KERNEL_F1_SCALE_GE_XR_\@ | |||
| bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ | |||
| fdiv s2, SCALE, s4 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_XR_\@: | |||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||
| 1: /* KERNEL_F1_SCALE_GE_XR_\@: */ | |||
| fdiv s2, s4, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| KERNEL_F1_NEXT_\@: | |||
| 2: /* KERNEL_F1_NEXT_\@: */ | |||
| ldr s5, [X], #4 | |||
| fcmp s5, REGZERO | |||
| beq KERNEL_F1_END_\@ | |||
| beq 4f /* KERNEL_F1_END_\@ */ | |||
| fabs s5, s5 | |||
| fcmp SCALE, s5 | |||
| bge KERNEL_F1_SCALE_GE_XI_\@ | |||
| bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ | |||
| fdiv s2, SCALE, s5 | |||
| fmul s2, s2, s2 | |||
| fmul s3, SSQ, s2 | |||
| fadd SSQ, REGONE, s3 | |||
| fmov SCALE, s5 | |||
| b KERNEL_F1_END_\@ | |||
| KERNEL_F1_SCALE_GE_XI_\@: | |||
| b 4f /* KERNEL_F1_END_\@ */ | |||
| 3: /* KERNEL_F1_SCALE_GE_XI_\@: */ | |||
| fdiv s2, s5, SCALE | |||
| fmla SSQ, s2, v2.s[0] | |||
| #else | |||
| ldr d4, [X], #8 | |||
| fcmp d4, REGZERO | |||
| beq KERNEL_F1_NEXT_\@ | |||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||
| fabs d4, d4 | |||
| fcmp SCALE, d4 | |||
| bge KERNEL_F1_SCALE_GE_XR_\@ | |||
| bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ | |||
| fdiv d2, SCALE, d4 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d4 | |||
| b KERNEL_F1_NEXT_\@ | |||
| KERNEL_F1_SCALE_GE_XR_\@: | |||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||
| 1: /* KERNEL_F1_SCALE_GE_XR_\@: */ | |||
| fdiv d2, d4, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| KERNEL_F1_NEXT_\@: | |||
| 2: /* KERNEL_F1_NEXT_\@: */ | |||
| ldr d5, [X], #8 | |||
| fcmp d5, REGZERO | |||
| beq KERNEL_F1_END_\@ | |||
| beq 4f /* KERNEL_F1_END_\@ */ | |||
| fabs d5, d5 | |||
| fcmp SCALE, d5 | |||
| bge KERNEL_F1_SCALE_GE_XI_\@ | |||
| bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ | |||
| fdiv d2, SCALE, d5 | |||
| fmul d2, d2, d2 | |||
| fmul d3, SSQ, d2 | |||
| fadd SSQ, REGONE, d3 | |||
| fmov SCALE, d5 | |||
| b KERNEL_F1_END_\@ | |||
| KERNEL_F1_SCALE_GE_XI_\@: | |||
| b 4f /* KERNEL_F1_END_\@ */ | |||
| 3: /* KERNEL_F1_SCALE_GE_XI_\@: */ | |||
| fdiv d2, d5, SCALE | |||
| fmla SSQ, d2, v2.d[0] | |||
| #endif | |||
| KERNEL_F1_END_\@: | |||
| 4: /* KERNEL_F1_END_\@: */ | |||
| .endm | |||
| .macro KERNEL_S1 | |||
| @@ -34,9 +34,9 @@ caxpy_k: | |||
| lfs 0,4(10) | |||
| fmuls 10,2,10 | |||
| #ifdef CONJ | |||
| fmsubs 11,11,1,10 | |||
| #else | |||
| fmadds 11,11,1,10 | |||
| #else | |||
| fmsubs 11,11,1,10 | |||
| #endif | |||
| fadds 12,12,11 | |||
| stfs 12,0(10) | |||
| @@ -241,8 +241,13 @@ caxpy_k: | |||
| lfsx 12,8,5 | |||
| lfsx 0,10,5 | |||
| fmuls 11,2,11 | |||
| #ifdef CONJ | |||
| fmsubs 12,1,12,11 | |||
| fsubs 0,0,12 | |||
| #else | |||
| fmadds 12,1,12,11 | |||
| fadds 0,0,12 | |||
| #endif | |||
| stfsx 0,10,5 | |||
| ble 7,.L39 | |||
| sldi 6,6,2 | |||
| @@ -1,10 +1,16 @@ | |||
| .file "cdot.c" | |||
| #define ASSEMBLER | |||
| #include "common.h" | |||
| /* | |||
| .file "cdot.c" | |||
| .abiversion 2 | |||
| .section ".text" | |||
| .align 2 | |||
| .p2align 4,,15 | |||
| .globl cdot_k | |||
| .type cdot_k, @function | |||
| */ | |||
| PROLOGUE | |||
| cdot_k: | |||
| .LCF0: | |||
| 0: addis 2,12,.TOC.-.LCF0@ha | |||
| @@ -136,8 +136,8 @@ LSGEMM_L8x16_BEGIN: | |||
| #endif | |||
| ZERO8x16 | |||
| mtctr L | |||
| ble LSGEMM_L8x16_SUB0 | |||
| mtctr L | |||
| bl LSGEMM_L8x16_LMAIN_SUB | |||
| andi. L, T12, 127 | |||
| ble LSGEMM_L8x16_SAVE | |||
| @@ -146,7 +146,7 @@ LSGEMM_L8x16_BEGIN: | |||
| LSGEMM_L8x16_SUB0: | |||
| #if defined(TRMMKERNEL) | |||
| andi. L, T11, 255 | |||
| cmpwi T11,128 | |||
| cmpwi T11,129 | |||
| #else | |||
| andi. L, K, 255 | |||
| cmpwi K,129 | |||