| @@ -162,16 +162,16 @@ matrix: | |||||
| before_script: | before_script: | ||||
| - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" | - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" | ||||
| - brew update | - brew update | ||||
| - brew install gcc # for gfortran | |||||
| - brew install gcc@8 # for gfortran | |||||
| script: | script: | ||||
| - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | ||||
| env: | env: | ||||
| - BTYPE="BINARY=64 INTERFACE64=1" | |||||
| - BTYPE="BINARY=64 INTERFACE64=1 FC=gfortran-8" | |||||
| - <<: *test-macos | - <<: *test-macos | ||||
| osx_image: xcode8.3 | osx_image: xcode8.3 | ||||
| env: | env: | ||||
| - BTYPE="BINARY=32" | |||||
| - BTYPE="BINARY=32 FC=gfortran-8" | |||||
| # whitelist | # whitelist | ||||
| branches: | branches: | ||||
| @@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| #if defined(ASSEMBLER) && !defined(NEEDPARAM) | #if defined(ASSEMBLER) && !defined(NEEDPARAM) | ||||
| #define PROLOGUE \ | |||||
| .text ;\ | |||||
| .align 4 ;\ | |||||
| .global REALNAME ;\ | |||||
| .type REALNAME, %function ;\ | |||||
| .macro PROLOGUE | |||||
| .text ; | |||||
| .p2align 2 ; | |||||
| .global REALNAME ; | |||||
| .type REALNAME, %function ; | |||||
| REALNAME: | REALNAME: | ||||
| .endm | |||||
| #define EPILOGUE | #define EPILOGUE | ||||
| @@ -54,37 +54,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if !defined(DOUBLE) | #if !defined(DOUBLE) | ||||
| ldr s4, [X], #4 | ldr s4, [X], #4 | ||||
| fcmp s4, REGZERO | fcmp s4, REGZERO | ||||
| beq KERNEL_F1_NEXT_\@ | |||||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| fabs s4, s4 | fabs s4, s4 | ||||
| fcmp SCALE, s4 | fcmp SCALE, s4 | ||||
| bge KERNEL_F1_SCALE_GE_X_\@ | |||||
| bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ | |||||
| fdiv s2, SCALE, s4 | fdiv s2, SCALE, s4 | ||||
| fmul s2, s2, s2 | fmul s2, s2, s2 | ||||
| fmul s3, SSQ, s2 | fmul s3, SSQ, s2 | ||||
| fadd SSQ, REGONE, s3 | fadd SSQ, REGONE, s3 | ||||
| fmov SCALE, s4 | fmov SCALE, s4 | ||||
| b KERNEL_F1_NEXT_\@ | |||||
| KERNEL_F1_SCALE_GE_X_\@: | |||||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| 1: /* KERNEL_F1_SCALE_GE_X_\@: */ | |||||
| fdiv s2, s4, SCALE | fdiv s2, s4, SCALE | ||||
| fmla SSQ, s2, v2.s[0] | fmla SSQ, s2, v2.s[0] | ||||
| #else | #else | ||||
| ldr d4, [X], #8 | ldr d4, [X], #8 | ||||
| fcmp d4, REGZERO | fcmp d4, REGZERO | ||||
| beq KERNEL_F1_NEXT_\@ | |||||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| fabs d4, d4 | fabs d4, d4 | ||||
| fcmp SCALE, d4 | fcmp SCALE, d4 | ||||
| bge KERNEL_F1_SCALE_GE_X_\@ | |||||
| bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ | |||||
| fdiv d2, SCALE, d4 | fdiv d2, SCALE, d4 | ||||
| fmul d2, d2, d2 | fmul d2, d2, d2 | ||||
| fmul d3, SSQ, d2 | fmul d3, SSQ, d2 | ||||
| fadd SSQ, REGONE, d3 | fadd SSQ, REGONE, d3 | ||||
| fmov SCALE, d4 | fmov SCALE, d4 | ||||
| b KERNEL_F1_NEXT_\@ | |||||
| KERNEL_F1_SCALE_GE_X_\@: | |||||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| 1: /* KERNEL_F1_SCALE_GE_X_\@: */ | |||||
| fdiv d2, d4, SCALE | fdiv d2, d4, SCALE | ||||
| fmla SSQ, d2, v2.d[0] | fmla SSQ, d2, v2.d[0] | ||||
| #endif | #endif | ||||
| KERNEL_F1_NEXT_\@: | |||||
| 2: /* KERNEL_F1_NEXT_\@: */ | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| @@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if !defined(DOUBLE) | #if !defined(DOUBLE) | ||||
| ldr s4, [X], #4 | ldr s4, [X], #4 | ||||
| fcmp s4, REGZERO | fcmp s4, REGZERO | ||||
| beq KERNEL_F1_NEXT_\@ | |||||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| fabs s4, s4 | fabs s4, s4 | ||||
| fcmp SCALE, s4 | fcmp SCALE, s4 | ||||
| bge KERNEL_F1_SCALE_GE_XR_\@ | |||||
| bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ | |||||
| fdiv s2, SCALE, s4 | fdiv s2, SCALE, s4 | ||||
| fmul s2, s2, s2 | fmul s2, s2, s2 | ||||
| fmul s3, SSQ, s2 | fmul s3, SSQ, s2 | ||||
| fadd SSQ, REGONE, s3 | fadd SSQ, REGONE, s3 | ||||
| fmov SCALE, s4 | fmov SCALE, s4 | ||||
| b KERNEL_F1_NEXT_\@ | |||||
| KERNEL_F1_SCALE_GE_XR_\@: | |||||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| 1: /* KERNEL_F1_SCALE_GE_XR_\@: */ | |||||
| fdiv s2, s4, SCALE | fdiv s2, s4, SCALE | ||||
| fmla SSQ, s2, v2.s[0] | fmla SSQ, s2, v2.s[0] | ||||
| KERNEL_F1_NEXT_\@: | |||||
| 2: /* KERNEL_F1_NEXT_\@: */ | |||||
| ldr s5, [X], #4 | ldr s5, [X], #4 | ||||
| fcmp s5, REGZERO | fcmp s5, REGZERO | ||||
| beq KERNEL_F1_END_\@ | |||||
| beq 4f /* KERNEL_F1_END_\@ */ | |||||
| fabs s5, s5 | fabs s5, s5 | ||||
| fcmp SCALE, s5 | fcmp SCALE, s5 | ||||
| bge KERNEL_F1_SCALE_GE_XI_\@ | |||||
| bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ | |||||
| fdiv s2, SCALE, s5 | fdiv s2, SCALE, s5 | ||||
| fmul s2, s2, s2 | fmul s2, s2, s2 | ||||
| fmul s3, SSQ, s2 | fmul s3, SSQ, s2 | ||||
| fadd SSQ, REGONE, s3 | fadd SSQ, REGONE, s3 | ||||
| fmov SCALE, s5 | fmov SCALE, s5 | ||||
| b KERNEL_F1_END_\@ | |||||
| KERNEL_F1_SCALE_GE_XI_\@: | |||||
| b 4f /* KERNEL_F1_END_\@ */ | |||||
| 3: /* KERNEL_F1_SCALE_GE_XI_\@: */ | |||||
| fdiv s2, s5, SCALE | fdiv s2, s5, SCALE | ||||
| fmla SSQ, s2, v2.s[0] | fmla SSQ, s2, v2.s[0] | ||||
| #else | #else | ||||
| ldr d4, [X], #8 | ldr d4, [X], #8 | ||||
| fcmp d4, REGZERO | fcmp d4, REGZERO | ||||
| beq KERNEL_F1_NEXT_\@ | |||||
| beq 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| fabs d4, d4 | fabs d4, d4 | ||||
| fcmp SCALE, d4 | fcmp SCALE, d4 | ||||
| bge KERNEL_F1_SCALE_GE_XR_\@ | |||||
| bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ | |||||
| fdiv d2, SCALE, d4 | fdiv d2, SCALE, d4 | ||||
| fmul d2, d2, d2 | fmul d2, d2, d2 | ||||
| fmul d3, SSQ, d2 | fmul d3, SSQ, d2 | ||||
| fadd SSQ, REGONE, d3 | fadd SSQ, REGONE, d3 | ||||
| fmov SCALE, d4 | fmov SCALE, d4 | ||||
| b KERNEL_F1_NEXT_\@ | |||||
| KERNEL_F1_SCALE_GE_XR_\@: | |||||
| b 2f /* KERNEL_F1_NEXT_\@ */ | |||||
| 1: /* KERNEL_F1_SCALE_GE_XR_\@: */ | |||||
| fdiv d2, d4, SCALE | fdiv d2, d4, SCALE | ||||
| fmla SSQ, d2, v2.d[0] | fmla SSQ, d2, v2.d[0] | ||||
| KERNEL_F1_NEXT_\@: | |||||
| 2: /* KERNEL_F1_NEXT_\@: */ | |||||
| ldr d5, [X], #8 | ldr d5, [X], #8 | ||||
| fcmp d5, REGZERO | fcmp d5, REGZERO | ||||
| beq KERNEL_F1_END_\@ | |||||
| beq 4f /* KERNEL_F1_END_\@ */ | |||||
| fabs d5, d5 | fabs d5, d5 | ||||
| fcmp SCALE, d5 | fcmp SCALE, d5 | ||||
| bge KERNEL_F1_SCALE_GE_XI_\@ | |||||
| bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ | |||||
| fdiv d2, SCALE, d5 | fdiv d2, SCALE, d5 | ||||
| fmul d2, d2, d2 | fmul d2, d2, d2 | ||||
| fmul d3, SSQ, d2 | fmul d3, SSQ, d2 | ||||
| fadd SSQ, REGONE, d3 | fadd SSQ, REGONE, d3 | ||||
| fmov SCALE, d5 | fmov SCALE, d5 | ||||
| b KERNEL_F1_END_\@ | |||||
| KERNEL_F1_SCALE_GE_XI_\@: | |||||
| b 4f /* KERNEL_F1_END_\@ */ | |||||
| 3: /* KERNEL_F1_SCALE_GE_XI_\@: */ | |||||
| fdiv d2, d5, SCALE | fdiv d2, d5, SCALE | ||||
| fmla SSQ, d2, v2.d[0] | fmla SSQ, d2, v2.d[0] | ||||
| #endif | #endif | ||||
| KERNEL_F1_END_\@: | |||||
| 4: /* KERNEL_F1_END_\@: */ | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| @@ -34,9 +34,9 @@ caxpy_k: | |||||
| lfs 0,4(10) | lfs 0,4(10) | ||||
| fmuls 10,2,10 | fmuls 10,2,10 | ||||
| #ifdef CONJ | #ifdef CONJ | ||||
| fmsubs 11,11,1,10 | |||||
| #else | |||||
| fmadds 11,11,1,10 | fmadds 11,11,1,10 | ||||
| #else | |||||
| fmsubs 11,11,1,10 | |||||
| #endif | #endif | ||||
| fadds 12,12,11 | fadds 12,12,11 | ||||
| stfs 12,0(10) | stfs 12,0(10) | ||||
| @@ -241,8 +241,13 @@ caxpy_k: | |||||
| lfsx 12,8,5 | lfsx 12,8,5 | ||||
| lfsx 0,10,5 | lfsx 0,10,5 | ||||
| fmuls 11,2,11 | fmuls 11,2,11 | ||||
| #ifdef CONJ | |||||
| fmsubs 12,1,12,11 | fmsubs 12,1,12,11 | ||||
| fsubs 0,0,12 | fsubs 0,0,12 | ||||
| #else | |||||
| fmadds 12,1,12,11 | |||||
| fadds 0,0,12 | |||||
| #endif | |||||
| stfsx 0,10,5 | stfsx 0,10,5 | ||||
| ble 7,.L39 | ble 7,.L39 | ||||
| sldi 6,6,2 | sldi 6,6,2 | ||||
| @@ -1,10 +1,16 @@ | |||||
| .file "cdot.c" | |||||
| #define ASSEMBLER | |||||
| #include "common.h" | |||||
| /* | |||||
| .file "cdot.c" | |||||
| .abiversion 2 | .abiversion 2 | ||||
| .section ".text" | .section ".text" | ||||
| .align 2 | .align 2 | ||||
| .p2align 4,,15 | .p2align 4,,15 | ||||
| .globl cdot_k | .globl cdot_k | ||||
| .type cdot_k, @function | .type cdot_k, @function | ||||
| */ | |||||
| PROLOGUE | |||||
| cdot_k: | cdot_k: | ||||
| .LCF0: | .LCF0: | ||||
| 0: addis 2,12,.TOC.-.LCF0@ha | 0: addis 2,12,.TOC.-.LCF0@ha | ||||
| @@ -136,8 +136,8 @@ LSGEMM_L8x16_BEGIN: | |||||
| #endif | #endif | ||||
| ZERO8x16 | ZERO8x16 | ||||
| mtctr L | |||||
| ble LSGEMM_L8x16_SUB0 | ble LSGEMM_L8x16_SUB0 | ||||
| mtctr L | |||||
| bl LSGEMM_L8x16_LMAIN_SUB | bl LSGEMM_L8x16_LMAIN_SUB | ||||
| andi. L, T12, 127 | andi. L, T12, 127 | ||||
| ble LSGEMM_L8x16_SAVE | ble LSGEMM_L8x16_SAVE | ||||
| @@ -146,7 +146,7 @@ LSGEMM_L8x16_BEGIN: | |||||
| LSGEMM_L8x16_SUB0: | LSGEMM_L8x16_SUB0: | ||||
| #if defined(TRMMKERNEL) | #if defined(TRMMKERNEL) | ||||
| andi. L, T11, 255 | andi. L, T11, 255 | ||||
| cmpwi T11,128 | |||||
| cmpwi T11,129 | |||||
| #else | #else | ||||
| andi. L, K, 255 | andi. L, K, 255 | ||||
| cmpwi K,129 | cmpwi K,129 | ||||