Further Power8 big-endian correctionstags/v0.3.8^2
| @@ -89,14 +89,30 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| #SMINKERNEL = ../arm/min.c | #SMINKERNEL = ../arm/min.c | ||||
| #DMINKERNEL = ../arm/min.c | #DMINKERNEL = ../arm/min.c | ||||
| # | # | ||||
| ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) | |||||
| ISAMAXKERNEL = isamax_power8.S | ISAMAXKERNEL = isamax_power8.S | ||||
| else | |||||
| ISAMAXKERNEL = isamax.c | |||||
| endif | |||||
| IDAMAXKERNEL = idamax.c | IDAMAXKERNEL = idamax.c | ||||
| ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) | |||||
| ICAMAXKERNEL = icamax_power8.S | ICAMAXKERNEL = icamax_power8.S | ||||
| else | |||||
| ICAMAXKERNEL = icamax.c | |||||
| endif | |||||
| IZAMAXKERNEL = izamax.c | IZAMAXKERNEL = izamax.c | ||||
| # | # | ||||
| ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) | |||||
| ISAMINKERNEL = isamin_power8.S | ISAMINKERNEL = isamin_power8.S | ||||
| else | |||||
| ISAMINKERNEL = isamin.c | |||||
| endif | |||||
| IDAMINKERNEL = idamin.c | IDAMINKERNEL = idamin.c | ||||
| ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) | |||||
| ICAMINKERNEL = icamin_power8.S | ICAMINKERNEL = icamin_power8.S | ||||
| else | |||||
| ICAMINKERNEL = icamin.c | |||||
| endif | |||||
| IZAMINKERNEL = izamin.c | IZAMINKERNEL = izamin.c | ||||
| # | # | ||||
| #ISMAXKERNEL = ../arm/imax.c | #ISMAXKERNEL = ../arm/imax.c | ||||
| @@ -112,7 +128,11 @@ ZASUMKERNEL = zasum.c | |||||
| # | # | ||||
| SAXPYKERNEL = saxpy.c | SAXPYKERNEL = saxpy.c | ||||
| DAXPYKERNEL = daxpy.c | DAXPYKERNEL = daxpy.c | ||||
| ifneq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__)) | |||||
| CAXPYKERNEL = caxpy_power8.S | CAXPYKERNEL = caxpy_power8.S | ||||
| else | |||||
| CAXPYKERNEL = caxpy.c | |||||
| endif | |||||
| ZAXPYKERNEL = zaxpy.c | ZAXPYKERNEL = zaxpy.c | ||||
| # | # | ||||
| SCOPYKERNEL = scopy.c | SCOPYKERNEL = scopy.c | ||||
| @@ -12,11 +12,12 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| caxpy_k: | |||||
| .LCF0: | .LCF0: | ||||
| 0: addis 2,12,.TOC.-.LCF0@ha | 0: addis 2,12,.TOC.-.LCF0@ha | ||||
| addi 2,2,.TOC.-.LCF0@l | addi 2,2,.TOC.-.LCF0@l | ||||
| #if _CALL_ELF ==2 | |||||
| .localentry caxpy_k,.-caxpy_k | .localentry caxpy_k,.-caxpy_k | ||||
| #endif | |||||
| mr. 7,3 | mr. 7,3 | ||||
| ble 0,.L33 | ble 0,.L33 | ||||
| cmpdi 7,9,1 | cmpdi 7,9,1 | ||||
| @@ -515,7 +516,9 @@ caxpy_k: | |||||
| b .L13 | b .L13 | ||||
| .long 0 | .long 0 | ||||
| .byte 0,0,0,0,0,4,0,0 | .byte 0,0,0,0,0,4,0,0 | ||||
| #if _CALL_ELF ==2 | |||||
| .size caxpy_k,.-caxpy_k | .size caxpy_k,.-caxpy_k | ||||
| #endif | |||||
| .section .rodata | .section .rodata | ||||
| .align 4 | .align 4 | ||||
| .set .LANCHOR0,. + 0 | .set .LANCHOR0,. + 0 | ||||
| @@ -11,11 +11,12 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| icamin_k: | |||||
| .LCF0: | .LCF0: | ||||
| 0: addis 2,12,.TOC.-.LCF0@ha | 0: addis 2,12,.TOC.-.LCF0@ha | ||||
| addi 2,2,.TOC.-.LCF0@l | addi 2,2,.TOC.-.LCF0@l | ||||
| #if _CALL_ELF ==2 | |||||
| .localentry icamin_k,.-icamin_k | .localentry icamin_k,.-icamin_k | ||||
| #endif | |||||
| mr. 9,3 | mr. 9,3 | ||||
| ble 0,.L25 | ble 0,.L25 | ||||
| cmpdi 7,5,0 | cmpdi 7,5,0 | ||||
| @@ -388,7 +389,9 @@ icamin_k: | |||||
| b .L21 | b .L21 | ||||
| .long 0 | .long 0 | ||||
| .byte 0,0,0,0,0,1,0,0 | .byte 0,0,0,0,0,1,0,0 | ||||
| #if _CALL_ELF ==2 | |||||
| .size icamin_k,.-icamin_k | .size icamin_k,.-icamin_k | ||||
| #endif | |||||
| .section .rodata.cst16,"aM",@progbits,16 | .section .rodata.cst16,"aM",@progbits,16 | ||||
| .align 4 | .align 4 | ||||
| .LC2: | .LC2: | ||||
| @@ -324,15 +324,15 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| if (inc_x == 1) { | if (inc_x == 1) { | ||||
| #if defined(_CALL_ELF) && (_CALL_ELF == 2) | |||||
| BLASLONG n1 = n & -32; | BLASLONG n1 = n & -32; | ||||
| if (n1 > 0) { | |||||
| #if defined(_CALL_ELF) && (_CALL_ELF == 2) | |||||
| if (n1 > 0) { | |||||
| max = diamax_kernel_32(n1, x, &maxf); | max = diamax_kernel_32(n1, x, &maxf); | ||||
| i = n1; | i = n1; | ||||
| } | } | ||||
| #endif | |||||
| #endif | |||||
| while (i < n) { | while (i < n) { | ||||
| if (ABS(x[i]) > maxf) { | if (ABS(x[i]) > maxf) { | ||||
| max = i; | max = i; | ||||
| @@ -328,13 +328,12 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { | |||||
| #if defined(_CALL_ELF) && (_CALL_ELF == 2) | #if defined(_CALL_ELF) && (_CALL_ELF == 2) | ||||
| BLASLONG n1 = n & -32; | BLASLONG n1 = n & -32; | ||||
| if (n1 > 0) { | |||||
| if (n1 > 0) { | |||||
| min = diamin_kernel_32(n1, x, &minf); | min = diamin_kernel_32(n1, x, &minf); | ||||
| i = n1; | i = n1; | ||||
| } | } | ||||
| #endif | #endif | ||||
| while (i < n) { | while (i < n) { | ||||
| if (ABS(x[i]) < minf) { | if (ABS(x[i]) < minf) { | ||||
| min = i; | min = i; | ||||
| @@ -12,11 +12,12 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| isamax_k: | |||||
| .LCF0: | .LCF0: | ||||
| 0: addis 2,12,.TOC.-.LCF0@ha | 0: addis 2,12,.TOC.-.LCF0@ha | ||||
| addi 2,2,.TOC.-.LCF0@l | addi 2,2,.TOC.-.LCF0@l | ||||
| #if _CALL_ELF ==2 | |||||
| .localentry isamax_k,.-isamax_k | .localentry isamax_k,.-isamax_k | ||||
| #endif | |||||
| mr. 11,3 | mr. 11,3 | ||||
| ble 0,.L36 | ble 0,.L36 | ||||
| cmpdi 7,5,0 | cmpdi 7,5,0 | ||||
| @@ -397,7 +398,9 @@ isamax_k: | |||||
| b .L61 | b .L61 | ||||
| .long 0 | .long 0 | ||||
| .byte 0,0,0,0,0,1,0,0 | .byte 0,0,0,0,0,1,0,0 | ||||
| #if _CALL_ELF ==2 | |||||
| .size isamax_k,.-isamax_k | .size isamax_k,.-isamax_k | ||||
| #endif | |||||
| .section .rodata.cst16,"aM",@progbits,16 | .section .rodata.cst16,"aM",@progbits,16 | ||||
| .align 4 | .align 4 | ||||
| .LC2: | .LC2: | ||||
| @@ -11,11 +11,12 @@ | |||||
| PROLOGUE | PROLOGUE | ||||
| isamin_k: | |||||
| .LCF0: | .LCF0: | ||||
| 0: addis 2,12,.TOC.-.LCF0@ha | 0: addis 2,12,.TOC.-.LCF0@ha | ||||
| addi 2,2,.TOC.-.LCF0@l | addi 2,2,.TOC.-.LCF0@l | ||||
| #if _CALL_ELF ==2 | |||||
| .localentry isamin_k,.-isamin_k | .localentry isamin_k,.-isamin_k | ||||
| #endif | |||||
| mr. 11,3 | mr. 11,3 | ||||
| ble 0,.L36 | ble 0,.L36 | ||||
| cmpdi 7,5,0 | cmpdi 7,5,0 | ||||
| @@ -380,7 +381,9 @@ isamin_k: | |||||
| b .L35 | b .L35 | ||||
| .long 0 | .long 0 | ||||
| .byte 0,0,0,0,0,1,0,0 | .byte 0,0,0,0,0,1,0,0 | ||||
| #if _CALL_ELF ==2 | |||||
| .size isamin_k,.-isamin_k | .size isamin_k,.-isamin_k | ||||
| #endif | |||||
| .section .rodata.cst16,"aM",@progbits,16 | .section .rodata.cst16,"aM",@progbits,16 | ||||
| .align 4 | .align 4 | ||||
| .LC2: | .LC2: | ||||
| @@ -316,14 +316,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| minf = CABS1(x,0); //index will not be incremented | minf = CABS1(x,0); //index will not be incremented | ||||
| #if defined(_CALL_ELF) && (_CALL_ELF == 2) | #if defined(_CALL_ELF) && (_CALL_ELF == 2) | ||||
| BLASLONG n1 = n & -16; | |||||
| BLASLONG n1 = n & -16; | |||||
| if (n1 > 0) { | if (n1 > 0) { | ||||
| min = ziamin_kernel_16_TUNED(n1, x, &minf); | min = ziamin_kernel_16_TUNED(n1, x, &minf); | ||||
| i = n1; | i = n1; | ||||
| ix = n1 << 1; | ix = n1 << 1; | ||||
| } | } | ||||
| #endif | |||||
| #endif | |||||
| while(i < n) | while(i < n) | ||||
| { | { | ||||