Browse Source

Merge pull request #2312 from martin-frbg/power8be

Further Power8 big-endian corrections
tags/v0.3.8^2
Martin Kroeker GitHub 6 years ago
parent
commit
08fa83aba2
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 42 additions and 11 deletions
  1. +20
    -0
      kernel/power/KERNEL.POWER8
  2. +4
    -1
      kernel/power/caxpy_power8.S
  3. +4
    -1
      kernel/power/icamin_power8.S
  4. +3
    -3
      kernel/power/idamax.c
  5. +1
    -2
      kernel/power/idamin.c
  6. +4
    -1
      kernel/power/isamax_power8.S
  7. +4
    -1
      kernel/power/isamin_power8.S
  8. +2
    -2
      kernel/power/izamin.c

+ 20
- 0
kernel/power/KERNEL.POWER8 View File

@@ -89,14 +89,30 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#SMINKERNEL = ../arm/min.c #SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c #DMINKERNEL = ../arm/min.c
# #
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ISAMAXKERNEL = isamax_power8.S ISAMAXKERNEL = isamax_power8.S
else
ISAMAXKERNEL = isamax.c
endif
IDAMAXKERNEL = idamax.c IDAMAXKERNEL = idamax.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ICAMAXKERNEL = icamax_power8.S ICAMAXKERNEL = icamax_power8.S
else
ICAMAXKERNEL = icamax.c
endif
IZAMAXKERNEL = izamax.c IZAMAXKERNEL = izamax.c
# #
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ISAMINKERNEL = isamin_power8.S ISAMINKERNEL = isamin_power8.S
else
ISAMINKERNEL = isamin.c
endif
IDAMINKERNEL = idamin.c IDAMINKERNEL = idamin.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
ICAMINKERNEL = icamin_power8.S ICAMINKERNEL = icamin_power8.S
else
ICAMINKERNEL = icamin.c
endif
IZAMINKERNEL = izamin.c IZAMINKERNEL = izamin.c
# #
#ISMAXKERNEL = ../arm/imax.c #ISMAXKERNEL = ../arm/imax.c
@@ -112,7 +128,11 @@ ZASUMKERNEL = zasum.c
# #
SAXPYKERNEL = saxpy.c SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c DAXPYKERNEL = daxpy.c
ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
CAXPYKERNEL = caxpy_power8.S CAXPYKERNEL = caxpy_power8.S
else
CAXPYKERNEL = caxpy.c
endif
ZAXPYKERNEL = zaxpy.c ZAXPYKERNEL = zaxpy.c
# #
SCOPYKERNEL = scopy.c SCOPYKERNEL = scopy.c


+ 4
- 1
kernel/power/caxpy_power8.S View File

@@ -12,11 +12,12 @@


PROLOGUE PROLOGUE


caxpy_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#if _CALL_ELF ==2
.localentry caxpy_k,.-caxpy_k .localentry caxpy_k,.-caxpy_k
#endif
mr. 7,3 mr. 7,3
ble 0,.L33 ble 0,.L33
cmpdi 7,9,1 cmpdi 7,9,1
@@ -515,7 +516,9 @@ caxpy_k:
b .L13 b .L13
.long 0 .long 0
.byte 0,0,0,0,0,4,0,0 .byte 0,0,0,0,0,4,0,0
#if _CALL_ELF ==2
.size caxpy_k,.-caxpy_k .size caxpy_k,.-caxpy_k
#endif
.section .rodata .section .rodata
.align 4 .align 4
.set .LANCHOR0,. + 0 .set .LANCHOR0,. + 0


+ 4
- 1
kernel/power/icamin_power8.S View File

@@ -11,11 +11,12 @@


PROLOGUE PROLOGUE


icamin_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#if _CALL_ELF ==2
.localentry icamin_k,.-icamin_k .localentry icamin_k,.-icamin_k
#endif
mr. 9,3 mr. 9,3
ble 0,.L25 ble 0,.L25
cmpdi 7,5,0 cmpdi 7,5,0
@@ -388,7 +389,9 @@ icamin_k:
b .L21 b .L21
.long 0 .long 0
.byte 0,0,0,0,0,1,0,0 .byte 0,0,0,0,0,1,0,0
#if _CALL_ELF ==2
.size icamin_k,.-icamin_k .size icamin_k,.-icamin_k
#endif
.section .rodata.cst16,"aM",@progbits,16 .section .rodata.cst16,"aM",@progbits,16
.align 4 .align 4
.LC2: .LC2:


+ 3
- 3
kernel/power/idamax.c View File

@@ -324,15 +324,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {


if (inc_x == 1) { if (inc_x == 1) {


#if defined(_CALL_ELF) && (_CALL_ELF == 2)
BLASLONG n1 = n & -32; BLASLONG n1 = n & -32;
if (n1 > 0) {
#if defined(_CALL_ELF) && (_CALL_ELF == 2)
if (n1 > 0) {


max = diamax_kernel_32(n1, x, &maxf); max = diamax_kernel_32(n1, x, &maxf);


i = n1; i = n1;
} }
#endif
#endif
while (i < n) { while (i < n) {
if (ABS(x[i]) > maxf) { if (ABS(x[i]) > maxf) {
max = i; max = i;


+ 1
- 2
kernel/power/idamin.c View File

@@ -328,13 +328,12 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {


#if defined(_CALL_ELF) && (_CALL_ELF == 2) #if defined(_CALL_ELF) && (_CALL_ELF == 2)
BLASLONG n1 = n & -32; BLASLONG n1 = n & -32;
if (n1 > 0) {
if (n1 > 0) {


min = diamin_kernel_32(n1, x, &minf); min = diamin_kernel_32(n1, x, &minf);
i = n1; i = n1;
} }
#endif #endif
while (i < n) { while (i < n) {
if (ABS(x[i]) < minf) { if (ABS(x[i]) < minf) {
min = i; min = i;


+ 4
- 1
kernel/power/isamax_power8.S View File

@@ -12,11 +12,12 @@


PROLOGUE PROLOGUE


isamax_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#if _CALL_ELF ==2
.localentry isamax_k,.-isamax_k .localentry isamax_k,.-isamax_k
#endif
mr. 11,3 mr. 11,3
ble 0,.L36 ble 0,.L36
cmpdi 7,5,0 cmpdi 7,5,0
@@ -397,7 +398,9 @@ isamax_k:
b .L61 b .L61
.long 0 .long 0
.byte 0,0,0,0,0,1,0,0 .byte 0,0,0,0,0,1,0,0
#if _CALL_ELF ==2
.size isamax_k,.-isamax_k .size isamax_k,.-isamax_k
#endif
.section .rodata.cst16,"aM",@progbits,16 .section .rodata.cst16,"aM",@progbits,16
.align 4 .align 4
.LC2: .LC2:


+ 4
- 1
kernel/power/isamin_power8.S View File

@@ -11,11 +11,12 @@


PROLOGUE PROLOGUE


isamin_k:
.LCF0: .LCF0:
0: addis 2,12,.TOC.-.LCF0@ha 0: addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l addi 2,2,.TOC.-.LCF0@l
#if _CALL_ELF ==2
.localentry isamin_k,.-isamin_k .localentry isamin_k,.-isamin_k
#endif
mr. 11,3 mr. 11,3
ble 0,.L36 ble 0,.L36
cmpdi 7,5,0 cmpdi 7,5,0
@@ -380,7 +381,9 @@ isamin_k:
b .L35 b .L35
.long 0 .long 0
.byte 0,0,0,0,0,1,0,0 .byte 0,0,0,0,0,1,0,0
#if _CALL_ELF ==2
.size isamin_k,.-isamin_k .size isamin_k,.-isamin_k
#endif
.section .rodata.cst16,"aM",@progbits,16 .section .rodata.cst16,"aM",@progbits,16
.align 4 .align 4
.LC2: .LC2:


+ 2
- 2
kernel/power/izamin.c View File

@@ -316,14 +316,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
minf = CABS1(x,0); //index will not be incremented minf = CABS1(x,0); //index will not be incremented


#if defined(_CALL_ELF) && (_CALL_ELF == 2) #if defined(_CALL_ELF) && (_CALL_ELF == 2)
BLASLONG n1 = n & -16;
BLASLONG n1 = n & -16;
if (n1 > 0) { if (n1 > 0) {


min = ziamin_kernel_16_TUNED(n1, x, &minf); min = ziamin_kernel_16_TUNED(n1, x, &minf);
i = n1; i = n1;
ix = n1 << 1; ix = n1 << 1;
} }
#endif
#endif


while(i < n) while(i < n)
{ {


Loading…
Cancel
Save