Browse Source

Merge pull request #11 from xianyi/develop

sync with upstream
tags/v0.3.8^2
Martin Kroeker GitHub 6 years ago
parent
commit
17609f88f1
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 54 additions and 41 deletions
  1. +3
    -3
      .travis.yml
  2. +7
    -5
      common_arm64.h
  3. +9
    -9
      kernel/arm64/nrm2.S
  4. +19
    -19
      kernel/arm64/znrm2.S
  5. +7
    -2
      kernel/power/caxpy_power8.S
  6. +7
    -1
      kernel/power/cdot_power9.S
  7. +2
    -2
      kernel/power/sgemm_logic_power9.S

+ 3
- 3
.travis.yml View File

@@ -162,16 +162,16 @@ matrix:
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
- brew update
- brew install gcc # for gfortran
- brew install gcc@8 # for gfortran
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- BTYPE="BINARY=64 INTERFACE64=1"
- BTYPE="BINARY=64 INTERFACE64=1 FC=gfortran-8"

- <<: *test-macos
osx_image: xcode8.3
env:
- BTYPE="BINARY=32"
- BTYPE="BINARY=32 FC=gfortran-8"

# whitelist
branches:


+ 7
- 5
common_arm64.h View File

@@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){

#if defined(ASSEMBLER) && !defined(NEEDPARAM)

#define PROLOGUE \
.text ;\
.align 4 ;\
.global REALNAME ;\
.type REALNAME, %function ;\
.macro PROLOGUE
.text ;
.p2align 2 ;
.global REALNAME ;
.type REALNAME, %function ;
REALNAME:
.endm


#define EPILOGUE



+ 9
- 9
kernel/arm64/nrm2.S View File

@@ -54,37 +54,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE)
ldr s4, [X], #4
fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@
beq 2f /* KERNEL_F1_NEXT_\@ */
fabs s4, s4
fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_X_\@
bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_X_\@:
b 2f /* KERNEL_F1_NEXT_\@ */
1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
#else
ldr d4, [X], #8
fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@
beq 2f /* KERNEL_F1_NEXT_\@ */
fabs d4, d4
fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_X_\@
bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
fdiv d2, SCALE, d4
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_X_\@:
b 2f /* KERNEL_F1_NEXT_\@ */
1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
#endif
KERNEL_F1_NEXT_\@:
2: /* KERNEL_F1_NEXT_\@: */
.endm

.macro KERNEL_S1


+ 19
- 19
kernel/arm64/znrm2.S View File

@@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE)
ldr s4, [X], #4
fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@
beq 2f /* KERNEL_F1_NEXT_\@ */
fabs s4, s4
fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_XR_\@
bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_XR_\@:
b 2f /* KERNEL_F1_NEXT_\@ */
1: /* KERNEL_F1_SCALE_GE_XR_\@: */
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
KERNEL_F1_NEXT_\@:
2: /* KERNEL_F1_NEXT_\@: */
ldr s5, [X], #4
fcmp s5, REGZERO
beq KERNEL_F1_END_\@
beq 4f /* KERNEL_F1_END_\@ */
fabs s5, s5
fcmp SCALE, s5
bge KERNEL_F1_SCALE_GE_XI_\@
bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
fdiv s2, SCALE, s5
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s5
b KERNEL_F1_END_\@
KERNEL_F1_SCALE_GE_XI_\@:
b 4f /* KERNEL_F1_END_\@ */
3: /* KERNEL_F1_SCALE_GE_XI_\@: */
fdiv s2, s5, SCALE
fmla SSQ, s2, v2.s[0]
#else
ldr d4, [X], #8
fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@
beq 2f /* KERNEL_F1_NEXT_\@ */
fabs d4, d4
fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_XR_\@
bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
fdiv d2, SCALE, d4
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_XR_\@:
b 2f /* KERNEL_F1_NEXT_\@ */
1: /* KERNEL_F1_SCALE_GE_XR_\@: */
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
KERNEL_F1_NEXT_\@:
2: /* KERNEL_F1_NEXT_\@: */
ldr d5, [X], #8
fcmp d5, REGZERO
beq KERNEL_F1_END_\@
beq 4f /* KERNEL_F1_END_\@ */
fabs d5, d5
fcmp SCALE, d5
bge KERNEL_F1_SCALE_GE_XI_\@
bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
fdiv d2, SCALE, d5
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d5
b KERNEL_F1_END_\@
KERNEL_F1_SCALE_GE_XI_\@:
b 4f /* KERNEL_F1_END_\@ */
3: /* KERNEL_F1_SCALE_GE_XI_\@: */
fdiv d2, d5, SCALE
fmla SSQ, d2, v2.d[0]
#endif
KERNEL_F1_END_\@:
4: /* KERNEL_F1_END_\@: */
.endm

.macro KERNEL_S1


+ 7
- 2
kernel/power/caxpy_power8.S View File

@@ -34,9 +34,9 @@ caxpy_k:
lfs 0,4(10)
fmuls 10,2,10
#ifdef CONJ
fmsubs 11,11,1,10
#else
fmadds 11,11,1,10
#else
fmsubs 11,11,1,10
#endif
fadds 12,12,11
stfs 12,0(10)
@@ -241,8 +241,13 @@ caxpy_k:
lfsx 12,8,5
lfsx 0,10,5
fmuls 11,2,11
#ifdef CONJ
fmsubs 12,1,12,11
fsubs 0,0,12
#else
fmadds 12,1,12,11
fadds 0,0,12
#endif
stfsx 0,10,5
ble 7,.L39
sldi 6,6,2


+ 7
- 1
kernel/power/cdot_power9.S View File

@@ -1,10 +1,16 @@
.file "cdot.c"
#define ASSEMBLER
#include "common.h"
/*
.file "cdot.c"
.abiversion 2
.section ".text"
.align 2
.p2align 4,,15
.globl cdot_k
.type cdot_k, @function
*/
PROLOGUE
cdot_k:
.LCF0:
0: addis 2,12,.TOC.-.LCF0@ha


+ 2
- 2
kernel/power/sgemm_logic_power9.S View File

@@ -136,8 +136,8 @@ LSGEMM_L8x16_BEGIN:
#endif
ZERO8x16
mtctr L
ble LSGEMM_L8x16_SUB0
mtctr L
bl LSGEMM_L8x16_LMAIN_SUB
andi. L, T12, 127
ble LSGEMM_L8x16_SAVE
@@ -146,7 +146,7 @@ LSGEMM_L8x16_BEGIN:
LSGEMM_L8x16_SUB0:
#if defined(TRMMKERNEL)
andi. L, T11, 255
cmpwi T11,128
cmpwi T11,129
#else
andi. L, K, 255
cmpwi K,129


Loading…
Cancel
Save