* Add gcc7-generated assembly files for POWER8/9 isa/ica-min/max and POWER9 caxpy To work around internal compiler errors encountered when compiling the original C source with gcc 4 and 5, and wrong code generated by gcc 8.3.0 * Use gcc-generated assembly instead of original C sources to work around internal compiler errors encountered with gcc 4.8/5.4 and wrong code generation by gcc 8.3 * Use gcc-generated assembly instead of the original C source to work around internal compiler errors encountered with gcc 4.8 and 5.4, and wrong code generation by gcc 8.3 * Add gcc7-generated assembler version of caxpy for power8 to work around wrong code generated by gcc 8.3 * Handle CONJ define for caxpyc * Handle CONJ define for caxpyc * Add gcc7-generated assembly cdot for POWER9 * Use prebuilt assembly for POWER9 cdot created with gcc 7.3.1 to work around ICE in older gcc versions * Exclude POWER9 from DYNAMIC_ARCH when gcc versions is lower than 6 * Update Makefile.system * Use PROLOGUE macro to ensure correct function name for DYNAMIC_ARCH * Disable POWER9 with old gcc versionstags/v0.3.8^2
@@ -322,12 +322,13 @@ CCOMMON_OPT += -DMS_ABI | |||
endif | |||
ifeq ($(C_COMPILER), GCC) | |||
#Test for supporting MS_ABI | |||
#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics) | |||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) | |||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4) | |||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) | |||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) | |||
ifeq ($(GCCVERSIONGT4), 1) | |||
# GCC Majar version > 4 | |||
# GCC Major version > 4 | |||
# It is compatible with MSVC ABI. | |||
CCOMMON_OPT += -DMS_ABI | |||
endif | |||
@@ -554,8 +555,17 @@ endif | |||
ifeq ($(ARCH), power) | |||
DYNAMIC_CORE = POWER6 | |||
DYNAMIC_CORE += POWER8 | |||
ifneq ($(C_COMPILER), GCC) | |||
DYNAMIC_CORE += POWER9 | |||
endif | |||
ifeq ($(C_COMPILER), GCC) | |||
ifeq ($(GCCVERSIONGT5), 1) | |||
DYNAMIC_CORE += POWER9 | |||
else | |||
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) | |||
endif | |||
endif | |||
endif | |||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | |||
ifndef DYNAMIC_CORE | |||
@@ -3,7 +3,9 @@ | |||
extern gotoblas_t gotoblas_POWER6; | |||
extern gotoblas_t gotoblas_POWER8; | |||
#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
extern gotoblas_t gotoblas_POWER9; | |||
#endif | |||
extern void openblas_warning(int verbose, const char *msg); | |||
@@ -19,7 +21,9 @@ static char *corename[] = { | |||
char *gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_POWER6) return corename[1]; | |||
if (gotoblas == &gotoblas_POWER8) return corename[2]; | |||
#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
if (gotoblas == &gotoblas_POWER9) return corename[3]; | |||
#endif | |||
return corename[0]; | |||
} | |||
@@ -29,8 +33,10 @@ static gotoblas_t *get_coretype(void) { | |||
return &gotoblas_POWER6; | |||
if (__builtin_cpu_is("power8")) | |||
return &gotoblas_POWER8; | |||
#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
if (__builtin_cpu_is("power9")) | |||
return &gotoblas_POWER9; | |||
#endif | |||
return NULL; | |||
} | |||
@@ -53,7 +59,9 @@ static gotoblas_t *force_coretype(char * coretype) { | |||
{ | |||
case 1: return (&gotoblas_POWER6); | |||
case 2: return (&gotoblas_POWER8); | |||
#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
case 3: return (&gotoblas_POWER9); | |||
#endif | |||
default: return NULL; | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
@@ -89,14 +89,14 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
#SMINKERNEL = ../arm/min.c | |||
#DMINKERNEL = ../arm/min.c | |||
# | |||
ISAMAXKERNEL = isamax.c | |||
ISAMAXKERNEL = isamax_power8.S | |||
IDAMAXKERNEL = idamax.c | |||
ICAMAXKERNEL = icamax.c | |||
ICAMAXKERNEL = icamax_power8.S | |||
IZAMAXKERNEL = izamax.c | |||
# | |||
ISAMINKERNEL = isamin.c | |||
ISAMINKERNEL = isamin_power8.S | |||
IDAMINKERNEL = idamin.c | |||
ICAMINKERNEL = icamin.c | |||
ICAMINKERNEL = icamin_power8.S | |||
IZAMINKERNEL = izamin.c | |||
# | |||
#ISMAXKERNEL = ../arm/imax.c | |||
@@ -112,7 +112,7 @@ ZASUMKERNEL = zasum.c | |||
# | |||
SAXPYKERNEL = saxpy.c | |||
DAXPYKERNEL = daxpy.c | |||
CAXPYKERNEL = caxpy.c | |||
CAXPYKERNEL = caxpy_power8.S | |||
ZAXPYKERNEL = zaxpy.c | |||
# | |||
SCOPYKERNEL = scopy.c | |||
@@ -89,14 +89,14 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
#SMINKERNEL = ../arm/min.c | |||
#DMINKERNEL = ../arm/min.c | |||
# | |||
ISAMAXKERNEL = isamax.c | |||
ISAMAXKERNEL = isamax_power9.S | |||
IDAMAXKERNEL = idamax.c | |||
ICAMAXKERNEL = icamax.c | |||
ICAMAXKERNEL = icamax_power9.S | |||
IZAMAXKERNEL = izamax.c | |||
# | |||
ISAMINKERNEL = isamin.c | |||
ISAMINKERNEL = isamin_power9.S | |||
IDAMINKERNEL = idamin.c | |||
ICAMINKERNEL = icamin.c | |||
ICAMINKERNEL = icamin_power9.S | |||
IZAMINKERNEL = izamin.c | |||
# | |||
#ISMAXKERNEL = ../arm/imax.c | |||
@@ -112,7 +112,7 @@ ZASUMKERNEL = zasum.c | |||
# | |||
SAXPYKERNEL = saxpy.c | |||
DAXPYKERNEL = daxpy.c | |||
CAXPYKERNEL = caxpy.c | |||
CAXPYKERNEL = caxpy_power9.S | |||
ZAXPYKERNEL = zaxpy.c | |||
# | |||
SCOPYKERNEL = scopy.c | |||
@@ -123,7 +123,7 @@ ZCOPYKERNEL = zcopy.c | |||
SDOTKERNEL = sdot.c | |||
DDOTKERNEL = ddot.c | |||
DSDOTKERNEL = sdot.c | |||
CDOTKERNEL = cdot.c | |||
CDOTKERNEL = cdot_power9.S | |||
ZDOTKERNEL = zdot.c | |||
# | |||
SNRM2KERNEL = ../arm/nrm2.c | |||
@@ -0,0 +1,574 @@ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
/* | |||
.file "caxpy.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl caxpy_k | |||
.type caxpy_k, @function | |||
*/ | |||
PROLOGUE | |||
caxpy_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry caxpy_k,.-caxpy_k | |||
mr. 7,3 | |||
ble 0,.L33 | |||
cmpdi 7,9,1 | |||
beq 7,.L41 | |||
.L3: | |||
mtctr 7 | |||
ld 7,96(1) | |||
sldi 9,9,3 | |||
sldi 7,7,3 | |||
.p2align 4,,15 | |||
.L14: | |||
lfs 10,4(8) | |||
lfs 11,0(8) | |||
lfs 12,0(10) | |||
lfs 0,4(10) | |||
fmuls 10,2,10 | |||
#ifdef CONJ | |||
fmsubs 11,11,1,10 | |||
#else | |||
fmadds 11,11,1,10 | |||
#endif | |||
fadds 12,12,11 | |||
stfs 12,0(10) | |||
lfs 11,0(8) | |||
lfs 12,4(8) | |||
add 8,8,9 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,12,1,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,12,1,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfs 0,4(10) | |||
add 10,10,7 | |||
bdnz .L14 | |||
.L33: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L41: | |||
ld 6,96(1) | |||
cmpdi 7,6,1 | |||
bne 7,.L3 | |||
rldicr. 4,7,0,59 | |||
std 31,-8(1) | |||
li 11,0 | |||
bne 0,.L42 | |||
.L4: | |||
addi 6,11,8 | |||
subf 0,4,7 | |||
sldi 6,6,2 | |||
addi 9,6,-32 | |||
add 5,10,6 | |||
add 3,8,9 | |||
add 6,8,6 | |||
subfc 5,5,3 | |||
add 9,10,9 | |||
subfe 5,5,5 | |||
subfc 6,6,9 | |||
subfe 31,31,31 | |||
addi 6,5,1 | |||
addi 5,31,1 | |||
or 6,6,5 | |||
rlwinm 6,6,0,0xff | |||
cmpwi 7,6,0 | |||
beq 7,.L7 | |||
sradi 6,4,63 | |||
srdi 5,7,63 | |||
subfc 31,7,4 | |||
adde 6,5,6 | |||
subfic 31,0,3 | |||
subfe 31,31,31 | |||
xori 6,6,0x1 | |||
neg 31,31 | |||
and 6,6,31 | |||
rlwinm 6,6,0,0xff | |||
cmpwi 7,6,0 | |||
beq 7,.L7 | |||
cmpd 7,4,7 | |||
li 6,1 | |||
blt 7,.L43 | |||
.L9: | |||
addi 0,7,-1 | |||
subf 0,4,0 | |||
subfic 0,0,3 | |||
subfe 31,31,31 | |||
addi 0,31,1 | |||
rlwinm 0,0,0,0xff | |||
cmpwi 7,0,0 | |||
bne 7,.L10 | |||
sradi 0,4,63 | |||
subfc 31,7,4 | |||
adde 5,5,0 | |||
rlwinm 5,5,0,0xff | |||
cmpwi 7,5,0 | |||
bne 7,.L10 | |||
addi 0,6,-1 | |||
addis 31,2,.LC3@toc@ha | |||
std 30,-16(1) | |||
xscvdpspn 12,1 | |||
xscvdpspn 11,2 | |||
srdi. 30,0,2 | |||
addis 6,2,.LC2@toc@ha | |||
addi 6,6,.LC2@toc@l | |||
mtctr 30 | |||
addi 31,31,.LC3@toc@l | |||
lxvd2x 42,0,6 | |||
li 5,16 | |||
li 6,0 | |||
lxvd2x 41,0,31 | |||
xxspltw 12,12,0 | |||
xxspltw 11,11,0 | |||
xxpermdi 42,42,42,2 | |||
xxpermdi 41,41,41,2 | |||
beq 0,.L44 | |||
.p2align 4,,15 | |||
.L11: | |||
#ifdef CONJ | |||
lxvd2x 44,3,6 | |||
lxvd2x 45,3,5 | |||
lxvd2x 33,9,6 | |||
lxvd2x 0,9,5 | |||
xxpermdi 44,44,44,2 | |||
xxpermdi 45,45,45,2 | |||
xxpermdi 32,33,33,2 | |||
xxpermdi 33,0,0,2 | |||
vperm 11,13,12,10 | |||
vperm 13,13,12,9 | |||
vperm 12,1,0,10 | |||
vperm 1,1,0,9 | |||
xvmulsp 0,11,43 | |||
xvmulsp 32,11,45 | |||
xvmsubmsp 45,12,0 | |||
xvmaddasp 32,12,43 | |||
xvaddsp 44,32,44 | |||
xvsubsp 32,33,45 | |||
vmrglw 1,0,12 | |||
vmrghw 0,0,12 | |||
#else | |||
lxvd2x 45,3,6 | |||
lxvd2x 33,3,5 | |||
lxvd2x 43,9,6 | |||
lxvd2x 0,9,5 | |||
xxpermdi 45,45,45,2 | |||
xxpermdi 33,33,33,2 | |||
xxpermdi 32,43,43,2 | |||
xxpermdi 43,0,0,2 | |||
vperm 12,1,13,10 | |||
vperm 1,1,13,9 | |||
vperm 13,11,0,10 | |||
vperm 11,11,0,9 | |||
xvmulsp 0,11,44 | |||
xvmulsp 32,11,33 | |||
xvmaddmsp 33,12,0 | |||
xvmsubasp 32,12,44 | |||
xvaddsp 45,32,45 | |||
xvaddsp 32,33,43 | |||
vmrglw 1,0,13 | |||
vmrghw 0,0,13 | |||
#endif | |||
xxpermdi 0,33,33,2 | |||
xxpermdi 32,32,32,2 | |||
stxvd2x 0,9,6 | |||
addi 6,6,32 | |||
stxvd2x 32,9,5 | |||
addi 5,5,32 | |||
bdnz .L11 | |||
rldicr 0,0,0,61 | |||
ld 30,-16(1) | |||
sldi 9,0,1 | |||
add 4,4,0 | |||
add 11,11,9 | |||
.L10: | |||
sldi 6,11,2 | |||
addi 9,4,1 | |||
addi 5,6,4 | |||
cmpd 7,7,9 | |||
lfsx 12,8,6 | |||
lfsx 0,10,6 | |||
addi 9,11,2 | |||
lfsx 11,8,5 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,12,1,11 | |||
#else | |||
fmsubs 12,12,1,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,6 | |||
lfsx 11,8,6 | |||
lfsx 12,8,5 | |||
lfsx 0,10,5 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,12,1,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,12,1,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfsx 0,10,5 | |||
ble 7,.L39 | |||
sldi 9,9,2 | |||
addi 6,4,2 | |||
addi 5,9,4 | |||
cmpd 7,7,6 | |||
lfsx 12,8,9 | |||
lfsx 0,10,9 | |||
addi 6,11,4 | |||
lfsx 11,8,5 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,1,12,11 | |||
#else | |||
fmsubs 12,1,12,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,9 | |||
lfsx 11,8,9 | |||
lfsx 12,8,5 | |||
lfsx 0,10,5 | |||
fmuls 11,2,11 | |||
fmsubs 12,1,12,11 | |||
fsubs 0,0,12 | |||
stfsx 0,10,5 | |||
ble 7,.L39 | |||
sldi 6,6,2 | |||
addi 4,4,3 | |||
addi 5,6,4 | |||
cmpd 7,7,4 | |||
lfsx 12,8,6 | |||
lfsx 0,10,6 | |||
addi 9,11,6 | |||
lfsx 11,8,5 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,1,12,11 | |||
#else | |||
fmsubs 12,1,12,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,6 | |||
lfsx 11,8,6 | |||
lfsx 12,8,5 | |||
lfsx 0,10,5 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,1,12,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,1,12,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfsx 0,10,5 | |||
ble 7,.L39 | |||
sldi 9,9,2 | |||
ld 31,-8(1) | |||
addi 7,9,4 | |||
lfsx 12,8,9 | |||
lfsx 0,10,9 | |||
lfsx 11,8,7 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,1,12,11 | |||
#else | |||
fmsubs 12,1,12,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,9 | |||
lfsx 11,8,9 | |||
lfsx 12,8,7 | |||
lfsx 0,10,7 | |||
fmuls 2,2,11 | |||
#ifdef CONJ | |||
fmsubs 1,1,12,2 | |||
fsubs 1,0,1 | |||
#else | |||
fmadds 1,1,12,2 | |||
fadds 1,0,1 | |||
#endif | |||
stfsx 1,10,7 | |||
b .L33 | |||
.L43: | |||
mr 6,0 | |||
b .L9 | |||
.L7: | |||
addi 10,4,1 | |||
cmpd 7,10,7 | |||
subf 10,4,7 | |||
mtctr 10 | |||
bgt 7,.L26 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,7,10 | |||
beq 7,.L26 | |||
.p2align 4,,15 | |||
.L13: | |||
lfs 10,4(3) | |||
lfs 11,0(3) | |||
addi 9,9,8 | |||
addi 3,3,8 | |||
lfs 12,-8(9) | |||
lfs 0,-4(9) | |||
fmuls 10,2,10 | |||
#ifdef CONJ | |||
fmadds 11,1,11,10 | |||
#else | |||
fmsubs 11,1,11,10 | |||
#endif | |||
fadds 12,12,11 | |||
stfs 12,-8(9) | |||
lfs 11,-8(3) | |||
lfs 12,-4(3) | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,1,12,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,1,12,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfs 0,-4(9) | |||
bdnz .L13 | |||
.L39: | |||
ld 31,-8(1) | |||
b .L33 | |||
.L42: | |||
#ifdef CONJ | |||
fneg 0,1 | |||
xxpermdi 32,1,1,0 | |||
addis 9,2,.LANCHOR0@toc@ha | |||
std 28,-32(1) | |||
sradi. 28,4,1 | |||
addi 9,9,.LANCHOR0@toc@l | |||
xscvdpspn 5,2 | |||
xvcvdpsp 32,32 | |||
lxvd2x 12,0,9 | |||
xxpermdi 39,0,0,0 | |||
xxspltw 5,5,0 | |||
xvcvdpsp 39,39 | |||
#else | |||
fneg 0,2 | |||
xxpermdi 39,2,2,0 | |||
addis 9,2,.LANCHOR0@toc@ha | |||
std 28,-32(1) | |||
sradi. 28,4,1 | |||
addi 9,9,.LANCHOR0@toc@l | |||
xscvdpspn 5,1 | |||
xvcvdpsp 39,39 | |||
lxvd2x 12,0,9 | |||
xxpermdi 32,0,0,0 | |||
xxspltw 5,5,0 | |||
xvcvdpsp 32,32 | |||
#endif | |||
xxpermdi 12,12,12,2 | |||
vmrgew 7,7,0 | |||
beq 0,.L5 | |||
xxlnor 38,12,12 | |||
std 29,-24(1) | |||
std 30,-16(1) | |||
mr 6,8 | |||
mr 9,10 | |||
li 29,0 | |||
li 30,16 | |||
li 31,32 | |||
li 12,48 | |||
li 0,64 | |||
li 11,80 | |||
li 3,96 | |||
li 5,112 | |||
.p2align 4,,15 | |||
.L6: | |||
lxvd2x 6,0,9 | |||
lxvd2x 40,0,6 | |||
addi 29,29,8 | |||
lxvd2x 41,6,30 | |||
lxvd2x 42,6,31 | |||
cmpd 7,28,29 | |||
lxvd2x 43,6,12 | |||
lxvd2x 44,6,0 | |||
lxvd2x 45,6,11 | |||
lxvd2x 33,6,3 | |||
lxvd2x 32,6,5 | |||
lxvd2x 7,9,30 | |||
addi 6,6,128 | |||
lxvd2x 8,9,31 | |||
lxvd2x 9,9,12 | |||
xxpermdi 40,40,40,2 | |||
xxpermdi 6,6,6,2 | |||
lxvd2x 10,9,0 | |||
lxvd2x 11,9,11 | |||
xxpermdi 41,41,41,2 | |||
xxpermdi 42,42,42,2 | |||
lxvd2x 12,9,3 | |||
lxvd2x 0,9,5 | |||
xxpermdi 43,43,43,2 | |||
xxpermdi 44,44,44,2 | |||
xxpermdi 45,45,45,2 | |||
xxpermdi 33,33,33,2 | |||
xxpermdi 32,32,32,2 | |||
xxpermdi 7,7,7,2 | |||
xxpermdi 8,8,8,2 | |||
xxpermdi 9,9,9,2 | |||
xxpermdi 10,10,10,2 | |||
xxpermdi 11,11,11,2 | |||
xxpermdi 12,12,12,2 | |||
xxpermdi 0,0,0,2 | |||
#ifndef CONJ | |||
xvmaddasp 6,5,40 | |||
xvmaddasp 7,5,41 | |||
xvmaddasp 8,5,42 | |||
xvmaddasp 9,5,43 | |||
xvmaddasp 10,5,44 | |||
xvmaddasp 11,5,45 | |||
xvmaddasp 12,5,33 | |||
xvmaddasp 0,5,32 | |||
vperm 8,8,8,6 | |||
vperm 9,9,9,6 | |||
vperm 10,10,10,6 | |||
vperm 11,11,11,6 | |||
vperm 12,12,12,6 | |||
vperm 13,13,13,6 | |||
vperm 1,1,1,6 | |||
vperm 0,0,0,6 | |||
#endif | |||
xvmaddasp 6,39,40 | |||
xvmaddasp 7,39,41 | |||
xvmaddasp 8,39,42 | |||
xvmaddasp 9,39,43 | |||
xvmaddasp 10,39,44 | |||
xvmaddasp 11,39,45 | |||
xvmaddasp 12,39,33 | |||
xvmaddasp 0,39,32 | |||
#ifdef CONJ | |||
vperm 8,8,8,6 | |||
vperm 9,9,9,6 | |||
vperm 10,10,10,6 | |||
vperm 11,11,11,6 | |||
vperm 12,12,12,6 | |||
vperm 13,13,13,6 | |||
vperm 1,1,1,6 | |||
vperm 0,0,0,6 | |||
xvmaddasp 6,5,40 | |||
xvmaddasp 7,5,41 | |||
xvmaddasp 8,5,42 | |||
xvmaddasp 9,5,43 | |||
xvmaddasp 10,5,44 | |||
xvmaddasp 11,5,45 | |||
xvmaddasp 12,5,33 | |||
xvmaddasp 0,5,32 | |||
#endif | |||
xxpermdi 6,6,6,2 | |||
xxpermdi 7,7,7,2 | |||
xxpermdi 8,8,8,2 | |||
xxpermdi 9,9,9,2 | |||
stxvd2x 6,0,9 | |||
xxpermdi 10,10,10,2 | |||
stxvd2x 7,9,30 | |||
xxpermdi 11,11,11,2 | |||
stxvd2x 8,9,31 | |||
xxpermdi 12,12,12,2 | |||
stxvd2x 9,9,12 | |||
xxpermdi 0,0,0,2 | |||
stxvd2x 10,9,0 | |||
stxvd2x 11,9,11 | |||
stxvd2x 12,9,3 | |||
stxvd2x 0,9,5 | |||
addi 9,9,128 | |||
bgt 7,.L6 | |||
ld 29,-24(1) | |||
ld 30,-16(1) | |||
.L5: | |||
cmpd 7,7,4 | |||
ble 7,.L36 | |||
sldi 11,4,1 | |||
ld 28,-32(1) | |||
b .L4 | |||
.L36: | |||
ld 28,-32(1) | |||
ld 31,-8(1) | |||
b .L33 | |||
.L44: | |||
li 31,1 | |||
mtctr 31 | |||
b .L11 | |||
.L26: | |||
li 10,1 | |||
mtctr 10 | |||
b .L13 | |||
.long 0 | |||
.byte 0,0,0,0,0,4,0,0 | |||
.size caxpy_k,.-caxpy_k | |||
.section .rodata | |||
.align 4 | |||
.set .LANCHOR0,. + 0 | |||
.type swap_mask_arr, @object | |||
.size swap_mask_arr, 16 | |||
swap_mask_arr: | |||
.byte 4 | |||
.byte 5 | |||
.byte 6 | |||
.byte 7 | |||
.byte 0 | |||
.byte 1 | |||
.byte 2 | |||
.byte 3 | |||
.byte 12 | |||
.byte 13 | |||
.byte 14 | |||
.byte 15 | |||
.byte 8 | |||
.byte 9 | |||
.byte 10 | |||
.byte 11 | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.byte 31 | |||
.byte 30 | |||
.byte 29 | |||
.byte 28 | |||
.byte 23 | |||
.byte 22 | |||
.byte 21 | |||
.byte 20 | |||
.byte 15 | |||
.byte 14 | |||
.byte 13 | |||
.byte 12 | |||
.byte 7 | |||
.byte 6 | |||
.byte 5 | |||
.byte 4 | |||
.LC3: | |||
.byte 27 | |||
.byte 26 | |||
.byte 25 | |||
.byte 24 | |||
.byte 19 | |||
.byte 18 | |||
.byte 17 | |||
.byte 16 | |||
.byte 11 | |||
.byte 10 | |||
.byte 9 | |||
.byte 8 | |||
.byte 3 | |||
.byte 2 | |||
.byte 1 | |||
.byte 0 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.gnu_attribute 4, 1 | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,538 @@ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
/* | |||
.file "caxpy.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl caxpy_k | |||
.type caxpy_k, @function | |||
*/ | |||
PROLOGUE | |||
caxpy_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry caxpy_k,.-caxpy_k | |||
mr. 7,3 | |||
ble 0,.L33 | |||
cmpdi 7,9,1 | |||
beq 7,.L37 | |||
.L3: | |||
mtctr 7 | |||
ld 7,96(1) | |||
sldi 9,9,3 | |||
sldi 7,7,3 | |||
.p2align 4,,15 | |||
.L14: | |||
lfs 10,4(8) | |||
lfs 11,0(8) | |||
lfs 12,0(10) | |||
lfs 0,4(10) | |||
fmuls 10,2,10 | |||
#ifdef CONJ | |||
fmadds 11,11,1,10 | |||
#else | |||
fmsubs 11,11,1,10 | |||
#endif | |||
fadds 12,12,11 | |||
stfs 12,0(10) | |||
lfs 11,0(8) | |||
lfs 12,4(8) | |||
add 8,8,9 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,12,1,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,12,1,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfs 0,4(10) | |||
add 10,10,7 | |||
bdnz .L14 | |||
.L33: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L37: | |||
ld 6,96(1) | |||
cmpdi 7,6,1 | |||
bne 7,.L3 | |||
rldicr. 4,7,0,59 | |||
li 11,0 | |||
bne 0,.L38 | |||
.L4: | |||
addi 6,11,8 | |||
subf 0,4,7 | |||
sldi 6,6,2 | |||
addi 9,6,-32 | |||
add 5,10,6 | |||
add 6,8,6 | |||
add 3,8,9 | |||
add 9,10,9 | |||
subfc 5,5,3 | |||
subfe 5,5,5 | |||
subfc 6,6,9 | |||
subfe 12,12,12 | |||
addi 6,5,1 | |||
addi 5,12,1 | |||
or 6,6,5 | |||
rlwinm 6,6,0,0xff | |||
cmpwi 7,6,0 | |||
beq 7,.L7 | |||
sradi 6,4,63 | |||
srdi 5,7,63 | |||
subfc 12,7,4 | |||
adde 6,5,6 | |||
subfic 12,0,4 | |||
subfe 12,12,12 | |||
xori 6,6,0x1 | |||
neg 12,12 | |||
and 6,6,12 | |||
rlwinm 6,6,0,0xff | |||
cmpwi 7,6,0 | |||
beq 7,.L7 | |||
cmpd 7,4,7 | |||
li 6,1 | |||
blt 7,.L39 | |||
.L9: | |||
addi 0,7,-1 | |||
subf 0,4,0 | |||
subfic 0,0,3 | |||
subfe 12,12,12 | |||
addi 0,12,1 | |||
rlwinm 0,0,0,0xff | |||
cmpwi 7,0,0 | |||
bne 7,.L10 | |||
sradi 0,4,63 | |||
subfc 12,7,4 | |||
adde 5,5,0 | |||
rlwinm 5,5,0,0xff | |||
cmpwi 7,5,0 | |||
bne 7,.L10 | |||
xscvdpspn 0,1 | |||
xscvdpspn 12,2 | |||
addi 0,6,-1 | |||
std 31,-8(1) | |||
addis 12,2,.LC2@toc@ha | |||
addis 6,2,.LC3@toc@ha | |||
li 5,16 | |||
srdi. 31,0,2 | |||
addi 6,6,.LC3@toc@l | |||
addi 12,12,.LC2@toc@l | |||
mtctr 31 | |||
lxv 41,0(6) | |||
lxv 42,0(12) | |||
li 6,0 | |||
xxspltw 0,0,0 | |||
xxspltw 12,12,0 | |||
beq 0,.L40 | |||
.p2align 4,,15 | |||
.L11: | |||
#ifdef CONJ | |||
lxvx 33,3,5 | |||
lxvx 44,3,6 | |||
lxvx 43,9,6 | |||
lxvx 32,9,5 | |||
vperm 13,1,12,10 | |||
vperm 12,1,12,9 | |||
vperm 8,0,11,10 | |||
vperm 0,0,11,9 | |||
xvmulsp 33,12,44 | |||
xvmulsp 11,12,45 | |||
xvmaddasp 33,0,45 | |||
xvmsubmsp 44,0,11 | |||
xvaddsp 33,33,40 | |||
xvsubsp 32,32,44 | |||
#else | |||
lxvx 33,3,6 | |||
lxvx 32,3,5 | |||
lxvx 43,9,6 | |||
lxvx 44,9,5 | |||
vperm 13,0,1,10 | |||
vperm 0,0,1,9 | |||
vperm 8,12,11,10 | |||
vperm 12,12,11,9 | |||
xvmulsp 33,12,32 | |||
xvmulsp 11,12,45 | |||
xvmsubasp 33,0,45 | |||
xvmaddmsp 32,0,11 | |||
xvaddsp 33,33,40 | |||
xvaddsp 32,32,44 | |||
#endif | |||
vmrglw 13,0,1 | |||
vmrghw 0,0,1 | |||
stxvx 45,9,6 | |||
stxvx 32,9,5 | |||
addi 6,6,32 | |||
addi 5,5,32 | |||
bdnz .L11 | |||
rldicr 0,0,0,61 | |||
ld 31,-8(1) | |||
sldi 9,0,1 | |||
add 4,4,0 | |||
add 11,11,9 | |||
.L10: | |||
sldi 5,11,2 | |||
addi 6,4,1 | |||
addi 9,11,2 | |||
addi 3,5,4 | |||
lfsx 12,8,5 | |||
cmpd 7,7,6 | |||
lfsx 0,10,5 | |||
lfsx 11,8,3 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,12,1,11 | |||
#else | |||
fmsubs 12,12,1,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,5 | |||
lfsx 11,8,5 | |||
lfsx 12,8,3 | |||
lfsx 0,10,3 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,12,1,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,12,1,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfsx 0,10,3 | |||
ble 7,.L33 | |||
sldi 9,9,2 | |||
addi 5,4,2 | |||
addi 6,11,4 | |||
addi 3,9,4 | |||
lfsx 12,8,9 | |||
cmpd 7,7,5 | |||
lfsx 0,10,9 | |||
lfsx 11,8,3 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,1,12,11 | |||
#else | |||
fmsubs 12,1,12,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,9 | |||
lfsx 11,8,9 | |||
lfsx 12,8,3 | |||
lfsx 0,10,3 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,1,12,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,1,12,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfsx 0,10,3 | |||
ble 7,.L33 | |||
sldi 6,6,2 | |||
addi 4,4,3 | |||
addi 9,11,6 | |||
addi 5,6,4 | |||
lfsx 12,8,6 | |||
cmpd 7,7,4 | |||
lfsx 0,10,6 | |||
lfsx 11,8,5 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,1,12,11 | |||
#else | |||
fmsubs 12,1,12,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,6 | |||
lfsx 11,8,6 | |||
lfsx 12,8,5 | |||
lfsx 0,10,5 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,1,12,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,1,12,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfsx 0,10,5 | |||
ble 7,.L33 | |||
sldi 9,9,2 | |||
addi 7,9,4 | |||
lfsx 12,8,9 | |||
lfsx 0,10,9 | |||
lfsx 11,8,7 | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmadds 12,1,12,11 | |||
#else | |||
fmsubs 12,1,12,11 | |||
#endif | |||
fadds 0,0,12 | |||
stfsx 0,10,9 | |||
lfsx 11,8,9 | |||
lfsx 12,8,7 | |||
lfsx 0,10,7 | |||
fmuls 2,2,11 | |||
#ifdef CONJ | |||
fmsubs 1,1,12,2 | |||
fsubs 1,0,1 | |||
#else | |||
fmadds 1,1,12,2 | |||
fadds 1,0,1 | |||
#endif | |||
stfsx 1,10,7 | |||
b .L33 | |||
.L39: | |||
mr 6,0 | |||
b .L9 | |||
.L38: | |||
#ifdef CONJ | |||
fneg 0,1 | |||
xxpermdi 45,1,1,0 | |||
xscvdpspn 12,2 | |||
addis 9,2,.LANCHOR0@toc@ha | |||
sradi. 3,4,1 | |||
xxpermdi 44,0,0,0 | |||
addi 9,9,.LANCHOR0@toc@l | |||
xvcvdpsp 45,45 | |||
lxv 33,0(9) | |||
xvcvdpsp 32,44 | |||
xxspltw 12,12,0 | |||
#else | |||
fneg 12,2 | |||
xxpermdi 32,2,2,0 | |||
xscvdpspn 0,1 | |||
addis 9,2,.LANCHOR0@toc@ha | |||
sradi. 3,4,1 | |||
xxpermdi 45,12,12,0 | |||
addi 9,9,.LANCHOR0@toc@l | |||
xvcvdpsp 32,32 | |||
lxv 33,0(9) | |||
xvcvdpsp 45,45 | |||
xxspltw 0,0,0 | |||
#endif | |||
vmrgew 0,0,13 | |||
beq 0,.L5 | |||
mr 6,8 | |||
mr 9,10 | |||
li 5,0 | |||
.p2align 4,,15 | |||
.L6: | |||
lxv 38,16(6) | |||
lxv 11,16(9) | |||
addi 5,5,8 | |||
addi 6,6,128 | |||
addi 9,9,128 | |||
lxv 39,-96(6) | |||
lxv 40,-80(6) | |||
lxv 41,-64(6) | |||
lxv 42,-48(6) | |||
cmpd 7,3,5 | |||
lxv 43,-32(6) | |||
lxv 45,-128(6) | |||
lxv 44,-16(6) | |||
#ifdef CONJ | |||
lxv 0,-128(9) | |||
vpermr 17,6,6,1 | |||
xvmaddmsp 38,32,11 | |||
lxv 11,-96(9) | |||
vpermr 18,7,7,1 | |||
vpermr 19,8,8,1 | |||
vpermr 2,9,9,1 | |||
vpermr 3,10,10,1 | |||
vpermr 4,11,11,1 | |||
xvmaddasp 0,32,45 | |||
vpermr 5,12,12,1 | |||
xvmaddmsp 39,32,11 | |||
lxv 11,-80(9) | |||
vpermr 13,13,13,1 | |||
xvmaddasp 38,12,49 | |||
xvmaddmsp 40,32,11 | |||
lxv 11,-64(9) | |||
xvmaddmsp 45,12,0 | |||
xvmaddasp 39,12,50 | |||
stxv 38,-112(9) | |||
xvmaddmsp 41,32,11 | |||
lxv 11,-48(9) | |||
xvmaddasp 40,12,51 | |||
stxv 45,-128(9) | |||
stxv 39,-96(9) | |||
xvmaddmsp 42,32,11 | |||
lxv 11,-32(9) | |||
xvmaddasp 41,12,34 | |||
stxv 40,-80(9) | |||
xvmaddmsp 43,32,11 | |||
lxv 11,-16(9) | |||
xvmaddasp 42,12,35 | |||
stxv 41,-64(9) | |||
xvmaddmsp 44,32,11 | |||
xvmaddasp 43,12,36 | |||
stxv 42,-48(9) | |||
xvmaddasp 44,12,37 | |||
#else | |||
lxv 12,-128(9) | |||
vpermr 17,6,6,1 | |||
xvmaddmsp 38,0,11 | |||
lxv 11,-96(9) | |||
vpermr 18,7,7,1 | |||
vpermr 19,8,8,1 | |||
vpermr 2,9,9,1 | |||
vpermr 3,10,10,1 | |||
vpermr 4,11,11,1 | |||
xvmaddasp 12,0,45 | |||
vpermr 5,12,12,1 | |||
xvmaddmsp 39,0,11 | |||
lxv 11,-80(9) | |||
vpermr 13,13,13,1 | |||
xvmaddasp 38,32,49 | |||
xvmaddmsp 40,0,11 | |||
lxv 11,-64(9) | |||
xvmaddmsp 45,32,12 | |||
xvmaddasp 39,32,50 | |||
stxv 38,-112(9) | |||
xvmaddmsp 41,0,11 | |||
lxv 11,-48(9) | |||
xvmaddasp 40,32,51 | |||
stxv 45,-128(9) | |||
stxv 39,-96(9) | |||
xvmaddmsp 42,0,11 | |||
lxv 11,-32(9) | |||
xvmaddasp 41,32,34 | |||
stxv 40,-80(9) | |||
xvmaddmsp 43,0,11 | |||
lxv 11,-16(9) | |||
xvmaddasp 42,32,35 | |||
stxv 41,-64(9) | |||
xvmaddmsp 44,0,11 | |||
xvmaddasp 43,32,36 | |||
stxv 42,-48(9) | |||
xvmaddasp 44,32,37 | |||
#endif | |||
stxv 43,-32(9) | |||
stxv 44,-16(9) | |||
bgt 7,.L6 | |||
.L5: | |||
cmpd 7,7,4 | |||
ble 7,.L33 | |||
sldi 11,4,1 | |||
b .L4 | |||
.L7: | |||
addi 10,4,1 | |||
subf 8,4,7 | |||
cmpd 7,10,7 | |||
mtctr 8 | |||
bgt 7,.L26 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,7,10 | |||
beq 7,.L26 | |||
.p2align 4,,15 | |||
.L13: | |||
lfs 10,4(3) | |||
lfs 11,0(3) | |||
lfs 12,0(9) | |||
lfs 0,4(9) | |||
addi 3,3,8 | |||
addi 9,9,8 | |||
fmuls 10,2,10 | |||
#ifdef CONJ | |||
fmadds 11,1,11,10 | |||
#else | |||
fmsubs 11,1,11,10 | |||
#endif | |||
fadds 12,12,11 | |||
stfs 12,-8(9) | |||
lfs 11,-8(3) | |||
lfs 12,-4(3) | |||
fmuls 11,2,11 | |||
#ifdef CONJ | |||
fmsubs 12,1,12,11 | |||
fsubs 0,0,12 | |||
#else | |||
fmadds 12,1,12,11 | |||
fadds 0,0,12 | |||
#endif | |||
stfs 0,-4(9) | |||
bdnz .L13 | |||
b .L33 | |||
.L40: | |||
li 31,1 | |||
mtctr 31 | |||
b .L11 | |||
.L26: | |||
li 10,1 | |||
mtctr 10 | |||
b .L13 | |||
.long 0 | |||
.byte 0,0,0,0,0,1,0,0 | |||
.size caxpy_k,.-caxpy_k | |||
.section .rodata | |||
.align 4 | |||
.set .LANCHOR0,. + 0 | |||
.type swap_mask_arr, @object | |||
.size swap_mask_arr, 16 | |||
swap_mask_arr: | |||
.byte 4 | |||
.byte 5 | |||
.byte 6 | |||
.byte 7 | |||
.byte 0 | |||
.byte 1 | |||
.byte 2 | |||
.byte 3 | |||
.byte 12 | |||
.byte 13 | |||
.byte 14 | |||
.byte 15 | |||
.byte 8 | |||
.byte 9 | |||
.byte 10 | |||
.byte 11 | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.byte 31 | |||
.byte 30 | |||
.byte 29 | |||
.byte 28 | |||
.byte 23 | |||
.byte 22 | |||
.byte 21 | |||
.byte 20 | |||
.byte 15 | |||
.byte 14 | |||
.byte 13 | |||
.byte 12 | |||
.byte 7 | |||
.byte 6 | |||
.byte 5 | |||
.byte 4 | |||
.LC3: | |||
.byte 27 | |||
.byte 26 | |||
.byte 25 | |||
.byte 24 | |||
.byte 19 | |||
.byte 18 | |||
.byte 17 | |||
.byte 16 | |||
.byte 11 | |||
.byte 10 | |||
.byte 9 | |||
.byte 8 | |||
.byte 3 | |||
.byte 2 | |||
.byte 1 | |||
.byte 0 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.gnu_attribute 4, 1 | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,242 @@ | |||
.file "cdot.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl cdot_k | |||
.type cdot_k, @function | |||
cdot_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry cdot_k,.-cdot_k | |||
mr. 9,3 | |||
ble 0,.L10 | |||
cmpdi 7,5,1 | |||
beq 7,.L18 | |||
.L3: | |||
mtctr 9 | |||
xxlxor 2,2,2 | |||
sldi 5,5,3 | |||
sldi 7,7,3 | |||
#ifdef CONJ | |||
fmr 12,2 | |||
#endif | |||
fmr 8,2 | |||
#ifndef CONJ | |||
fmr 9,2 | |||
#endif | |||
fmr 1,2 | |||
.p2align 4,,15 | |||
.L9: | |||
#ifdef CONJ | |||
lfs 9,0(4) | |||
lfs 11,0(6) | |||
lfs 10,4(6) | |||
lfs 0,4(4) | |||
add 6,6,7 | |||
add 4,4,5 | |||
fmadds 1,9,11,1 | |||
fmadds 12,9,10,12 | |||
fmadds 8,0,10,8 | |||
fmadds 2,11,0,2 | |||
#else | |||
lfs 10,0(4) | |||
lfs 12,0(6) | |||
lfs 11,4(6) | |||
lfs 0,4(4) | |||
add 6,6,7 | |||
add 4,4,5 | |||
fmadds 1,10,12,1 | |||
fmadds 8,10,11,8 | |||
fmadds 9,0,11,9 | |||
fmadds 2,12,0,2 | |||
#endif | |||
bdnz .L9 | |||
.L7: | |||
#ifdef CONJ | |||
fsubs 2,12,2 | |||
fadds 1,1,8 | |||
#else | |||
fadds 2,2,8 | |||
fsubs 1,1,9 | |||
#endif | |||
blr | |||
.p2align 4,,15 | |||
.L18: | |||
cmpdi 7,7,1 | |||
bne 7,.L3 | |||
rldicr. 10,9,0,60 | |||
bne 0,.L19 | |||
xxlxor 2,2,2 | |||
li 8,0 | |||
#ifdef CONJ | |||
fmr 12,2 | |||
#endif | |||
fmr 8,2 | |||
#ifndef CONJ | |||
fmr 9,2 | |||
#endif | |||
fmr 1,2 | |||
.L4: | |||
addi 7,10,1 | |||
sldi 8,8,2 | |||
subf 10,10,9 | |||
cmpd 7,7,9 | |||
mtctr 10 | |||
add 4,4,8 | |||
add 6,6,8 | |||
bgt 7,.L16 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L16 | |||
.p2align 4,,15 | |||
.L8: | |||
#ifdef CONJ | |||
lfs 9,0(4) | |||
lfs 11,0(6) | |||
lfs 10,4(6) | |||
lfs 0,4(4) | |||
addi 6,6,8 | |||
addi 4,4,8 | |||
fmadds 1,9,11,1 | |||
fmadds 12,9,10,12 | |||
fmadds 8,0,10,8 | |||
fmadds 2,11,0,2 | |||
#else | |||
lfs 10,0(4) | |||
lfs 12,0(6) | |||
lfs 11,4(6) | |||
lfs 0,4(4) | |||
addi 6,6,8 | |||
addi 4,4,8 | |||
fmadds 1,10,12,1 | |||
fmadds 8,10,11,8 | |||
fmadds 9,0,11,9 | |||
fmadds 2,12,0,2 | |||
#endif | |||
bdnz .L8 | |||
b .L7 | |||
.p2align 4,,15 | |||
.L10: | |||
xxlxor 1,1,1 | |||
fmr 2,1 | |||
blr | |||
.L19: | |||
addis 8,2,.LANCHOR0@toc@ha | |||
sradi. 3,10,1 | |||
xxspltib 42,0 | |||
addi 8,8,.LANCHOR0@toc@l | |||
lxv 32,0(8) | |||
beq 0,.L12 | |||
xxlor 6,42,42 | |||
xxlor 4,42,42 | |||
xxlor 0,42,42 | |||
xxlor 7,42,42 | |||
xxlor 5,42,42 | |||
xxlor 3,42,42 | |||
xxlor 12,42,42 | |||
mr 7,4 | |||
mr 8,6 | |||
li 5,0 | |||
.p2align 4,,15 | |||
.L6: | |||
lxv 43,0(8) | |||
lxv 44,16(8) | |||
addi 5,5,4 | |||
addi 8,8,64 | |||
addi 7,7,64 | |||
lxv 45,-32(8) | |||
lxv 33,-16(8) | |||
lxv 8,-64(7) | |||
lxv 9,-48(7) | |||
cmpd 7,3,5 | |||
lxv 10,-32(7) | |||
lxv 11,-16(7) | |||
vpermr 6,11,11,0 | |||
vpermr 7,12,12,0 | |||
vpermr 8,13,13,0 | |||
vpermr 9,1,1,0 | |||
xvmaddasp 12,43,8 | |||
xvmaddasp 3,44,9 | |||
xvmaddasp 0,8,38 | |||
xvmaddasp 4,9,39 | |||
xvmaddasp 6,10,40 | |||
xvmaddasp 5,45,10 | |||
xvmaddasp 42,11,41 | |||
xvmaddasp 7,33,11 | |||
bgt 7,.L6 | |||
xvaddsp 12,12,3 | |||
xvaddsp 0,0,4 | |||
xvaddsp 12,12,5 | |||
xvaddsp 0,0,6 | |||
xvaddsp 12,12,7 | |||
xvaddsp 42,0,42 | |||
.L5: | |||
#ifdef CONJ | |||
xxpermdi 8,12,12,2 | |||
xxpermdi 0,42,42,2 | |||
cmpd 7,9,10 | |||
sldi 8,10,1 | |||
xvaddsp 8,8,12 | |||
xvaddsp 0,0,42 | |||
xxsldwi 1,8,8,3 | |||
xxsldwi 12,0,0,3 | |||
xxsldwi 8,8,8,2 | |||
xxsldwi 0,0,0,2 | |||
xscvspdp 1,1 | |||
xscvspdp 12,12 | |||
xscvspdp 8,8 | |||
#else | |||
xxpermdi 9,12,12,2 | |||
xxpermdi 0,42,42,2 | |||
cmpd 7,9,10 | |||
sldi 8,10,1 | |||
xvaddsp 9,9,12 | |||
xvaddsp 0,0,42 | |||
xxsldwi 1,9,9,3 | |||
xxsldwi 2,0,0,3 | |||
xxsldwi 9,9,9,2 | |||
xxsldwi 0,0,0,2 | |||
xscvspdp 8,2 | |||
xscvspdp 1,1 | |||
xscvspdp 9,9 | |||
#endif | |||
xscvspdp 2,0 | |||
bgt 7,.L4 | |||
b .L7 | |||
.L12: | |||
xxlor 12,42,42 | |||
b .L5 | |||
.L16: | |||
li 9,1 | |||
mtctr 9 | |||
b .L8 | |||
.long 0 | |||
.byte 0,0,0,0,0,0,0,0 | |||
.size cdot_k,.-cdot_k | |||
.section .rodata | |||
.align 4 | |||
.set .LANCHOR0,. + 0 | |||
.type swap_mask_arr, @object | |||
.size swap_mask_arr, 16 | |||
swap_mask_arr: | |||
.byte 4 | |||
.byte 5 | |||
.byte 6 | |||
.byte 7 | |||
.byte 0 | |||
.byte 1 | |||
.byte 2 | |||
.byte 3 | |||
.byte 12 | |||
.byte 13 | |||
.byte 14 | |||
.byte 15 | |||
.byte 8 | |||
.byte 9 | |||
.byte 10 | |||
.byte 11 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,458 @@ | |||
/* .file "icamax.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl icamax_k | |||
.type icamax_k, @function | |||
*/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
PROLOGUE | |||
icamax_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry icamax_k,.-icamax_k | |||
mr. 9,3 | |||
ble 0,.L25 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
cmpdi 7,5,1 | |||
beq 7,.L54 | |||
lfs 11,0(4) | |||
lfs 0,4(4) | |||
cmpdi 7,9,1 | |||
fabs 11,11 | |||
fabs 0,0 | |||
fadds 11,11,0 | |||
beq 7,.L29 | |||
addi 9,9,-1 | |||
sldi 5,5,3 | |||
mtctr 9 | |||
add 4,4,5 | |||
li 3,0 | |||
li 9,1 | |||
.p2align 4,,15 | |||
.L24: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
add 4,4,5 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bng 7,.L23 | |||
fmr 11,0 | |||
mr 3,9 | |||
.L23: | |||
addi 9,9,1 | |||
bdnz .L24 | |||
.L52: | |||
addi 3,3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L25: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L54: | |||
rldicr. 8,9,0,58 | |||
bne 0,.L55 | |||
addi 7,8,1 | |||
li 10,0 | |||
xxlxor 11,11,11 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
add 4,4,10 | |||
subf 10,8,9 | |||
mtctr 10 | |||
li 3,0 | |||
bgt 7,.L43 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L43 | |||
.p2align 4,,15 | |||
.L44: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bng 7,.L46 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L46: | |||
addi 8,8,1 | |||
bdnz .L44 | |||
b .L52 | |||
.p2align 4,,15 | |||
.L55: | |||
li 0,-144 | |||
std 31,-8(1) | |||
addis 5,2,.LC2@toc@ha | |||
vspltisw 18,0 | |||
vspltisw 19,0 | |||
addis 6,2,.LC3@toc@ha | |||
addi 5,5,.LC2@toc@l | |||
stvx 24,1,0 | |||
li 0,-128 | |||
addi 6,6,.LC3@toc@l | |||
xxlor 49,50,50 | |||
addis 7,2,.LC4@toc@ha | |||
lxvd2x 44,0,5 | |||
addis 10,2,.LC5@toc@ha | |||
stvx 25,1,0 | |||
li 0,-112 | |||
addi 7,7,.LC4@toc@l | |||
lxvd2x 45,0,6 | |||
addis 5,2,.LC6@toc@ha | |||
addis 6,2,.LC7@toc@ha | |||
stvx 26,1,0 | |||
li 0,-96 | |||
addi 10,10,.LC5@toc@l | |||
addi 6,6,.LC7@toc@l | |||
addi 5,5,.LC6@toc@l | |||
stvx 27,1,0 | |||
li 0,-80 | |||
lxvd2x 46,0,10 | |||
xxpermdi 44,44,44,2 | |||
mr 10,4 | |||
lxvd2x 48,0,6 | |||
lxvd2x 47,0,5 | |||
xxpermdi 45,45,45,2 | |||
li 6,0 | |||
stvx 28,1,0 | |||
li 0,-64 | |||
xxlnand 44,44,44 | |||
xxlnand 45,45,45 | |||
stvx 29,1,0 | |||
li 0,-48 | |||
vspltisw 29,8 | |||
vadduwm 29,29,29 | |||
xxpermdi 46,46,46,2 | |||
stvx 30,1,0 | |||
li 0,-32 | |||
xxpermdi 47,47,47,2 | |||
xxpermdi 48,48,48,2 | |||
stvx 31,1,0 | |||
lxvd2x 63,0,7 | |||
addis 7,2,.LC8@toc@ha | |||
addi 7,7,.LC8@toc@l | |||
lxvd2x 62,0,7 | |||
xxpermdi 63,63,63,2 | |||
.p2align 4,,15 | |||
.L5: | |||
addi 3,10,16 | |||
addi 5,10,32 | |||
lxvd2x 34,0,10 | |||
addi 7,10,64 | |||
addi 31,10,48 | |||
addi 12,10,80 | |||
addi 11,10,96 | |||
lxvd2x 36,0,3 | |||
lxvd2x 37,0,5 | |||
addi 3,10,112 | |||
addi 5,10,128 | |||
lxvd2x 38,0,7 | |||
lxvd2x 7,0,31 | |||
addi 7,10,160 | |||
addi 31,10,144 | |||
lxvd2x 33,0,12 | |||
lxvd2x 39,0,11 | |||
addi 12,10,176 | |||
addi 11,10,192 | |||
lxvd2x 8,0,3 | |||
lxvd2x 40,0,5 | |||
xxpermdi 34,34,34,2 | |||
addi 3,10,208 | |||
addi 5,10,224 | |||
lxvd2x 41,0,7 | |||
lxvd2x 9,0,31 | |||
addi 7,10,240 | |||
lxvd2x 10,0,12 | |||
lxvd2x 42,0,11 | |||
xxpermdi 37,37,37,2 | |||
xxpermdi 36,36,36,2 | |||
addi 6,6,32 | |||
lxvd2x 32,0,3 | |||
lxvd2x 43,0,5 | |||
xxpermdi 7,7,7,2 | |||
xxpermdi 38,38,38,2 | |||
cmpd 7,8,6 | |||
addi 10,10,256 | |||
lxvd2x 11,0,7 | |||
xxpermdi 39,39,39,2 | |||
xxpermdi 33,33,33,2 | |||
xxpermdi 40,40,40,2 | |||
xxpermdi 8,8,8,2 | |||
xxpermdi 41,41,41,2 | |||
xxpermdi 9,9,9,2 | |||
xxpermdi 10,10,10,2 | |||
xxpermdi 42,42,42,2 | |||
xxpermdi 43,43,43,2 | |||
xxpermdi 32,32,32,2 | |||
xxpermdi 11,11,11,2 | |||
xvabssp 57,37 | |||
xvabssp 58,39 | |||
xvabssp 35,40 | |||
xvabssp 59,41 | |||
xvabssp 34,34 | |||
xvabssp 33,33 | |||
xvabssp 32,32 | |||
xvabssp 60,43 | |||
xvabssp 36,36 | |||
xvabssp 37,7 | |||
xvabssp 38,38 | |||
xvabssp 39,8 | |||
xvabssp 40,9 | |||
xvabssp 41,10 | |||
xvabssp 42,42 | |||
xvabssp 43,11 | |||
vperm 24,4,2,12 | |||
vperm 4,4,2,13 | |||
vperm 2,5,25,12 | |||
vperm 5,5,25,13 | |||
vperm 25,1,6,12 | |||
vperm 6,1,6,13 | |||
vperm 1,7,26,12 | |||
vperm 7,7,26,13 | |||
vperm 26,8,3,12 | |||
vperm 8,8,3,13 | |||
vperm 3,9,27,12 | |||
vperm 9,9,27,13 | |||
vperm 27,0,10,12 | |||
vperm 10,0,10,13 | |||
vperm 0,11,28,12 | |||
vperm 11,11,28,13 | |||
xvaddsp 12,33,39 | |||
xvaddsp 38,57,38 | |||
xvaddsp 0,32,43 | |||
xvaddsp 42,59,42 | |||
xvaddsp 36,56,36 | |||
xvaddsp 37,34,37 | |||
xvaddsp 40,58,40 | |||
xvaddsp 41,35,41 | |||
xvcmpgtsp 32,12,38 | |||
xvcmpgtsp 33,0,42 | |||
xvcmpgtsp 43,37,36 | |||
xvcmpgtsp 39,41,40 | |||
xxsel 12,38,12,32 | |||
xxsel 38,47,48,32 | |||
xxsel 0,42,0,33 | |||
xxsel 42,47,48,33 | |||
xxsel 37,36,37,43 | |||
xxsel 43,63,46,43 | |||
xxsel 41,40,41,39 | |||
xxsel 39,63,46,39 | |||
xvcmpgtsp 32,12,37 | |||
xvcmpgtsp 33,0,41 | |||
xxsel 12,37,12,32 | |||
xxsel 43,43,38,32 | |||
xxsel 0,41,0,33 | |||
xxsel 33,39,42,33 | |||
xvcmpgtsp 32,0,12 | |||
vadduwm 1,1,29 | |||
xxsel 0,12,0,32 | |||
xxsel 32,43,33,32 | |||
xvcmpgtsp 33,0,51 | |||
vadduwm 0,17,0 | |||
vadduwm 17,17,30 | |||
xxsel 50,50,32,33 | |||
xxsel 51,51,0,33 | |||
bgt 7,.L5 | |||
xxsldwi 11,51,51,3 | |||
xxsldwi 12,51,51,2 | |||
vspltw 0,18,3 | |||
xxsldwi 0,51,51,1 | |||
xscvspdp 11,11 | |||
xscvspdp 12,12 | |||
mfvsrwz 6,32 | |||
vspltw 0,18,2 | |||
xscvspdp 0,0 | |||
mfvsrwz 7,50 | |||
mfvsrwz 5,32 | |||
vspltw 0,18,0 | |||
xscvspdp 51,51 | |||
mfvsrwz 10,32 | |||
fcmpu 7,11,12 | |||
rldicl 3,6,0,32 | |||
fmr 10,0 | |||
rldicl 11,7,0,32 | |||
rldicl 31,5,0,32 | |||
rldicl 0,10,0,32 | |||
beq 7,.L56 | |||
bnl 7,.L8 | |||
fmr 11,12 | |||
mr 3,31 | |||
.L8: | |||
xscmpudp 7,0,51 | |||
bne 7,.L11 | |||
cmplw 7,7,10 | |||
ble 7,.L12 | |||
mr 7,10 | |||
.L12: | |||
rldicl 11,7,0,32 | |||
.L13: | |||
fcmpu 7,11,10 | |||
beq 7,.L57 | |||
blt 7,.L58 | |||
.L17: | |||
cmpd 7,9,8 | |||
ble 7,.L19 | |||
addi 7,8,1 | |||
sldi 10,8,1 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
add 4,4,10 | |||
subf 10,8,9 | |||
mtctr 10 | |||
bgt 7,.L37 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L37 | |||
.p2align 4,,15 | |||
.L21: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bng 7,.L20 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L20: | |||
addi 8,8,1 | |||
bdnz .L21 | |||
.L19: | |||
li 0,-144 | |||
ld 31,-8(1) | |||
addi 3,3,1 | |||
lvx 24,1,0 | |||
li 0,-128 | |||
lvx 25,1,0 | |||
li 0,-112 | |||
lvx 26,1,0 | |||
li 0,-96 | |||
lvx 27,1,0 | |||
li 0,-80 | |||
lvx 28,1,0 | |||
li 0,-64 | |||
lvx 29,1,0 | |||
li 0,-48 | |||
lvx 30,1,0 | |||
li 0,-32 | |||
lvx 31,1,0 | |||
blr | |||
.p2align 4,,15 | |||
.L56: | |||
cmplw 7,6,5 | |||
ble 7,.L7 | |||
mr 6,5 | |||
.L7: | |||
rldicl 3,6,0,32 | |||
b .L8 | |||
.p2align 4,,15 | |||
.L29: | |||
li 3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L11: | |||
bnl 7,.L13 | |||
xscpsgndp 10,51,51 | |||
mr 11,0 | |||
b .L13 | |||
.p2align 4,,15 | |||
.L57: | |||
cmpd 7,3,11 | |||
ble 7,.L17 | |||
mr 3,11 | |||
b .L17 | |||
.p2align 4,,15 | |||
.L58: | |||
fmr 11,10 | |||
mr 3,11 | |||
b .L17 | |||
.L43: | |||
li 9,1 | |||
mtctr 9 | |||
b .L44 | |||
.L37: | |||
li 9,1 | |||
mtctr 9 | |||
b .L21 | |||
.long 0 | |||
.byte 0,0,0,0,0,1,0,0 | |||
.size icamax_k,.-icamax_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.byte 0 | |||
.byte 1 | |||
.byte 2 | |||
.byte 3 | |||
.byte 8 | |||
.byte 9 | |||
.byte 10 | |||
.byte 11 | |||
.byte 16 | |||
.byte 17 | |||
.byte 18 | |||
.byte 19 | |||
.byte 24 | |||
.byte 25 | |||
.byte 26 | |||
.byte 27 | |||
.LC3: | |||
.byte 4 | |||
.byte 5 | |||
.byte 6 | |||
.byte 7 | |||
.byte 12 | |||
.byte 13 | |||
.byte 14 | |||
.byte 15 | |||
.byte 20 | |||
.byte 21 | |||
.byte 22 | |||
.byte 23 | |||
.byte 28 | |||
.byte 29 | |||
.byte 30 | |||
.byte 31 | |||
.LC4: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC5: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC6: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC7: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.LC8: | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,387 @@ | |||
.file "icamax.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl icamax_k | |||
.type icamax_k, @function | |||
icamax_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry icamax_k,.-icamax_k | |||
mr. 9,3 | |||
ble 0,.L25 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
cmpdi 7,5,1 | |||
beq 7,.L53 | |||
lfs 11,0(4) | |||
lfs 0,4(4) | |||
cmpdi 7,9,1 | |||
fabs 11,11 | |||
fabs 0,0 | |||
fadds 11,11,0 | |||
beq 7,.L29 | |||
addi 9,9,-1 | |||
sldi 5,5,3 | |||
li 3,0 | |||
mtctr 9 | |||
add 4,4,5 | |||
li 9,1 | |||
.p2align 4,,15 | |||
.L24: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
add 4,4,5 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bng 7,.L23 | |||
fmr 11,0 | |||
mr 3,9 | |||
.L23: | |||
addi 9,9,1 | |||
bdnz .L24 | |||
.L51: | |||
addi 3,3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L25: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L53: | |||
rldicr. 8,9,0,58 | |||
bne 0,.L54 | |||
addi 7,8,1 | |||
li 10,0 | |||
subf 6,8,9 | |||
li 3,0 | |||
xxlxor 11,11,11 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
mtctr 6 | |||
add 4,4,10 | |||
bgt 7,.L43 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L43 | |||
.p2align 4,,15 | |||
.L44: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bng 7,.L46 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L46: | |||
addi 8,8,1 | |||
bdnz .L44 | |||
b .L51 | |||
.p2align 4,,15 | |||
.L54: | |||
addis 11,2,.LC2@toc@ha | |||
addis 3,2,.LC3@toc@ha | |||
addis 5,2,.LC6@toc@ha | |||
addis 6,2,.LC7@toc@ha | |||
xxspltib 47,0 | |||
addis 7,2,.LC4@toc@ha | |||
addis 10,2,.LC5@toc@ha | |||
stxv 58,-96(1) | |||
stxv 59,-80(1) | |||
addi 11,11,.LC2@toc@l | |||
addi 3,3,.LC3@toc@l | |||
addi 5,5,.LC6@toc@l | |||
addi 6,6,.LC7@toc@l | |||
stxv 62,-32(1) | |||
stxv 63,-16(1) | |||
xxspltib 58,16 | |||
addi 7,7,.LC4@toc@l | |||
addi 10,10,.LC5@toc@l | |||
xxspltib 59,32 | |||
lxv 44,0(11) | |||
lxv 45,0(3) | |||
xxspltib 48,0 | |||
lxv 62,0(5) | |||
xxlor 46,47,47 | |||
lxv 63,0(6) | |||
stxv 60,-64(1) | |||
stxv 61,-48(1) | |||
lxv 60,0(7) | |||
lxv 61,0(10) | |||
li 7,0 | |||
mr 10,4 | |||
vextsb2w 26,26 | |||
vextsb2w 27,27 | |||
stxv 56,-128(1) | |||
stxv 57,-112(1) | |||
.p2align 4,,15 | |||
.L5: | |||
lxv 0,0(10) | |||
addi 7,7,32 | |||
addi 10,10,256 | |||
cmpd 7,8,7 | |||
xvabssp 34,0 | |||
lxv 0,-240(10) | |||
xvabssp 42,0 | |||
lxv 0,-224(10) | |||
xvabssp 49,0 | |||
lxv 0,-208(10) | |||
vpermr 25,10,2,12 | |||
vpermr 2,10,2,13 | |||
xvabssp 35,0 | |||
lxv 0,-192(10) | |||
xvaddsp 34,57,34 | |||
xvabssp 36,0 | |||
lxv 0,-176(10) | |||
vpermr 10,3,17,12 | |||
vpermr 3,3,17,13 | |||
xvabssp 33,0 | |||
lxv 0,-160(10) | |||
xvaddsp 10,42,35 | |||
xvabssp 50,0 | |||
lxv 0,-144(10) | |||
vpermr 17,1,4,12 | |||
vpermr 4,1,4,13 | |||
xvabssp 37,0 | |||
lxv 0,-128(10) | |||
xvaddsp 36,49,36 | |||
xvabssp 38,0 | |||
lxv 0,-112(10) | |||
vpermr 1,5,18,12 | |||
vpermr 5,5,18,13 | |||
xvabssp 43,0 | |||
lxv 0,-96(10) | |||
xvaddsp 12,33,37 | |||
xvabssp 51,0 | |||
lxv 0,-80(10) | |||
vpermr 18,11,6,12 | |||
vpermr 6,11,6,13 | |||
xvabssp 39,0 | |||
lxv 0,-64(10) | |||
xvaddsp 38,50,38 | |||
xvabssp 40,0 | |||
lxv 0,-48(10) | |||
vpermr 11,7,19,12 | |||
vpermr 7,7,19,13 | |||
xvabssp 32,0 | |||
lxv 0,-32(10) | |||
xvaddsp 11,43,39 | |||
xvcmpgtsp 39,10,34 | |||
xvcmpgtsp 43,12,36 | |||
xvabssp 56,0 | |||
lxv 0,-16(10) | |||
vpermr 19,0,8,12 | |||
vpermr 8,0,8,13 | |||
xxsel 10,34,10,39 | |||
xxsel 12,36,12,43 | |||
xxsel 39,60,61,39 | |||
xxsel 43,62,63,43 | |||
xvabssp 41,0 | |||
xvaddsp 40,51,40 | |||
vpermr 0,9,24,12 | |||
vpermr 9,9,24,13 | |||
xvaddsp 0,32,41 | |||
xvcmpgtsp 41,11,38 | |||
xvcmpgtsp 32,12,10 | |||
xvcmpgtsp 42,0,40 | |||
xxsel 11,38,11,41 | |||
xxsel 12,10,12,32 | |||
xxsel 43,39,43,32 | |||
xxsel 41,60,61,41 | |||
xxsel 0,40,0,42 | |||
xxsel 42,62,63,42 | |||
xvcmpgtsp 33,0,11 | |||
xxsel 0,11,0,33 | |||
xxsel 33,41,42,33 | |||
xvcmpgtsp 32,0,12 | |||
vadduwm 1,1,26 | |||
xxsel 0,12,0,32 | |||
xxsel 32,43,33,32 | |||
xvcmpgtsp 33,0,48 | |||
vadduwm 0,14,0 | |||
vadduwm 14,14,27 | |||
xxsel 47,47,32,33 | |||
xxsel 48,48,0,33 | |||
bgt 7,.L5 | |||
xxsldwi 11,48,48,3 | |||
xxsldwi 12,48,48,2 | |||
li 10,0 | |||
li 3,12 | |||
xxsldwi 0,48,48,1 | |||
xscvspdp 48,48 | |||
vextuwrx 6,10,15 | |||
li 10,4 | |||
xscvspdp 11,11 | |||
xscvspdp 12,12 | |||
xscvspdp 0,0 | |||
vextuwrx 5,10,15 | |||
li 10,8 | |||
vextuwrx 7,10,15 | |||
vextuwrx 10,3,15 | |||
rldicl 12,5,0,32 | |||
rldicl 3,6,0,32 | |||
rldicl 11,7,0,32 | |||
rldicl 0,10,0,32 | |||
fcmpu 7,11,12 | |||
fmr 10,0 | |||
beq 7,.L55 | |||
bnl 7,.L8 | |||
mr 3,12 | |||
fmr 11,12 | |||
.L8: | |||
xscmpudp 7,0,48 | |||
bne 7,.L11 | |||
cmplw 7,7,10 | |||
ble 7,.L12 | |||
mr 7,10 | |||
.L12: | |||
rldicl 11,7,0,32 | |||
.L13: | |||
fcmpu 7,11,10 | |||
beq 7,.L56 | |||
bnl 7,.L17 | |||
mr 3,11 | |||
fmr 11,10 | |||
.L17: | |||
cmpd 7,9,8 | |||
ble 7,.L19 | |||
addi 7,8,1 | |||
sldi 10,8,1 | |||
subf 6,8,9 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
mtctr 6 | |||
add 4,4,10 | |||
bgt 7,.L37 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L37 | |||
.p2align 4,,15 | |||
.L21: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bng 7,.L20 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L20: | |||
addi 8,8,1 | |||
bdnz .L21 | |||
.L19: | |||
lxv 56,-128(1) | |||
lxv 57,-112(1) | |||
addi 3,3,1 | |||
lxv 58,-96(1) | |||
lxv 59,-80(1) | |||
lxv 60,-64(1) | |||
lxv 61,-48(1) | |||
lxv 62,-32(1) | |||
lxv 63,-16(1) | |||
blr | |||
.p2align 4,,15 | |||
.L55: | |||
cmplw 7,6,5 | |||
ble 7,.L7 | |||
mr 6,5 | |||
.L7: | |||
rldicl 3,6,0,32 | |||
b .L8 | |||
.p2align 4,,15 | |||
.L29: | |||
li 3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L11: | |||
bnl 7,.L13 | |||
mr 11,0 | |||
xscpsgndp 10,48,48 | |||
b .L13 | |||
.p2align 4,,15 | |||
.L56: | |||
cmpd 7,3,11 | |||
ble 7,.L17 | |||
mr 3,11 | |||
b .L17 | |||
.L37: | |||
li 9,1 | |||
mtctr 9 | |||
b .L21 | |||
.L43: | |||
li 9,1 | |||
mtctr 9 | |||
b .L44 | |||
.long 0 | |||
.byte 0,0,0,0,0,0,0,0 | |||
.size icamax_k,.-icamax_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.byte 0 | |||
.byte 1 | |||
.byte 2 | |||
.byte 3 | |||
.byte 8 | |||
.byte 9 | |||
.byte 10 | |||
.byte 11 | |||
.byte 16 | |||
.byte 17 | |||
.byte 18 | |||
.byte 19 | |||
.byte 24 | |||
.byte 25 | |||
.byte 26 | |||
.byte 27 | |||
.LC3: | |||
.byte 4 | |||
.byte 5 | |||
.byte 6 | |||
.byte 7 | |||
.byte 12 | |||
.byte 13 | |||
.byte 14 | |||
.byte 15 | |||
.byte 20 | |||
.byte 21 | |||
.byte 22 | |||
.byte 23 | |||
.byte 28 | |||
.byte 29 | |||
.byte 30 | |||
.byte 31 | |||
.LC4: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC5: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC6: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC7: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,454 @@ | |||
/* .file "icamin.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl icamin_k | |||
.type icamin_k, @function | |||
*/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
PROLOGUE | |||
icamin_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry icamin_k,.-icamin_k | |||
mr. 9,3 | |||
ble 0,.L25 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
lfs 11,0(4) | |||
lfs 0,4(4) | |||
cmpdi 7,5,1 | |||
fabs 11,11 | |||
fabs 0,0 | |||
fadds 11,11,0 | |||
beq 7,.L54 | |||
cmpdi 7,9,1 | |||
beq 7,.L29 | |||
addi 9,9,-1 | |||
sldi 5,5,3 | |||
mtctr 9 | |||
add 4,4,5 | |||
li 3,0 | |||
li 9,1 | |||
.p2align 4,,15 | |||
.L24: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
add 4,4,5 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bnl 7,.L23 | |||
fmr 11,0 | |||
mr 3,9 | |||
.L23: | |||
addi 9,9,1 | |||
bdnz .L24 | |||
.L52: | |||
addi 3,3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L25: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L54: | |||
rldicr. 8,9,0,58 | |||
bne 0,.L55 | |||
addi 7,8,1 | |||
li 10,0 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
add 4,4,10 | |||
subf 10,8,9 | |||
mtctr 10 | |||
li 3,0 | |||
bgt 7,.L43 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L43 | |||
.p2align 4,,15 | |||
.L44: | |||
lfs 0,0(4) | |||
lfs 12,4(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,11,0 | |||
bng 7,.L46 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L46: | |||
addi 8,8,1 | |||
bdnz .L44 | |||
b .L52 | |||
.p2align 4,,15 | |||
.L55: | |||
li 0,-128 | |||
std 31,-8(1) | |||
addis 5,2,.LC2@toc@ha | |||
xscvdpspn 11,11 | |||
vspltisw 19,0 | |||
addis 6,2,.LC3@toc@ha | |||
addi 5,5,.LC2@toc@l | |||
stvx 25,1,0 | |||
li 0,-112 | |||
addi 6,6,.LC3@toc@l | |||
xxlor 50,51,51 | |||
addis 7,2,.LC4@toc@ha | |||
lxvd2x 44,0,5 | |||
addis 10,2,.LC5@toc@ha | |||
stvx 26,1,0 | |||
li 0,-96 | |||
addi 7,7,.LC4@toc@l | |||
lxvd2x 45,0,6 | |||
addis 5,2,.LC6@toc@ha | |||
addis 6,2,.LC7@toc@ha | |||
stvx 27,1,0 | |||
li 0,-80 | |||
addi 10,10,.LC5@toc@l | |||
xxspltw 5,11,0 | |||
addi 6,6,.LC7@toc@l | |||
addi 5,5,.LC6@toc@l | |||
stvx 28,1,0 | |||
li 0,-64 | |||
lxvd2x 47,0,10 | |||
xxpermdi 44,44,44,2 | |||
mr 10,4 | |||
lxvd2x 49,0,6 | |||
lxvd2x 48,0,5 | |||
xxpermdi 45,45,45,2 | |||
li 6,0 | |||
stvx 29,1,0 | |||
li 0,-48 | |||
xxlnand 44,44,44 | |||
xxlnand 45,45,45 | |||
stvx 30,1,0 | |||
lxvd2x 62,0,7 | |||
addis 7,2,.LC8@toc@ha | |||
li 0,-32 | |||
addi 7,7,.LC8@toc@l | |||
xxpermdi 47,47,47,2 | |||
stvx 31,1,0 | |||
vspltisw 31,8 | |||
xxpermdi 48,48,48,2 | |||
lxvd2x 46,0,7 | |||
vadduwm 31,31,31 | |||
xxpermdi 49,49,49,2 | |||
xxpermdi 62,62,62,2 | |||
.p2align 4,,15 | |||
.L5: | |||
addi 3,10,16 | |||
addi 5,10,32 | |||
lxvd2x 34,0,10 | |||
addi 7,10,64 | |||
addi 31,10,48 | |||
addi 12,10,80 | |||
addi 11,10,96 | |||
lxvd2x 36,0,3 | |||
lxvd2x 37,0,5 | |||
addi 3,10,112 | |||
addi 5,10,128 | |||
lxvd2x 38,0,7 | |||
lxvd2x 6,0,31 | |||
addi 7,10,160 | |||
addi 31,10,144 | |||
lxvd2x 33,0,12 | |||
lxvd2x 39,0,11 | |||
addi 12,10,176 | |||
addi 11,10,192 | |||
lxvd2x 7,0,3 | |||
lxvd2x 40,0,5 | |||
xxpermdi 34,34,34,2 | |||
addi 3,10,208 | |||
addi 5,10,224 | |||
lxvd2x 41,0,7 | |||
lxvd2x 8,0,31 | |||
addi 7,10,240 | |||
lxvd2x 9,0,12 | |||
lxvd2x 42,0,11 | |||
xxpermdi 37,37,37,2 | |||
xxpermdi 36,36,36,2 | |||
addi 6,6,32 | |||
lxvd2x 32,0,3 | |||
lxvd2x 43,0,5 | |||
xxpermdi 6,6,6,2 | |||
xxpermdi 38,38,38,2 | |||
cmpd 7,8,6 | |||
addi 10,10,256 | |||
lxvd2x 10,0,7 | |||
xxpermdi 39,39,39,2 | |||
xxpermdi 33,33,33,2 | |||
xxpermdi 40,40,40,2 | |||
xxpermdi 7,7,7,2 | |||
xxpermdi 41,41,41,2 | |||
xxpermdi 8,8,8,2 | |||
xxpermdi 9,9,9,2 | |||
xxpermdi 42,42,42,2 | |||
xxpermdi 43,43,43,2 | |||
xxpermdi 32,32,32,2 | |||
xxpermdi 10,10,10,2 | |||
xvabssp 58,37 | |||
xvabssp 59,39 | |||
xvabssp 35,40 | |||
xvabssp 60,41 | |||
xvabssp 34,34 | |||
xvabssp 33,33 | |||
xvabssp 32,32 | |||
xvabssp 61,43 | |||
xvabssp 36,36 | |||
xvabssp 37,6 | |||
xvabssp 38,38 | |||
xvabssp 39,7 | |||
xvabssp 40,8 | |||
xvabssp 41,9 | |||
xvabssp 42,42 | |||
xvabssp 43,10 | |||
vperm 25,4,2,12 | |||
vperm 4,4,2,13 | |||
vperm 2,5,26,12 | |||
vperm 5,5,26,13 | |||
vperm 26,1,6,12 | |||
vperm 6,1,6,13 | |||
vperm 1,7,27,12 | |||
vperm 7,7,27,13 | |||
vperm 27,8,3,12 | |||
vperm 8,8,3,13 | |||
vperm 3,9,28,12 | |||
vperm 9,9,28,13 | |||
vperm 28,0,10,12 | |||
vperm 10,0,10,13 | |||
vperm 0,11,29,12 | |||
vperm 11,11,29,13 | |||
xvaddsp 12,33,39 | |||
xvaddsp 38,58,38 | |||
xvaddsp 0,32,43 | |||
xvaddsp 42,60,42 | |||
xvaddsp 36,57,36 | |||
xvaddsp 37,34,37 | |||
xvaddsp 40,59,40 | |||
xvaddsp 41,35,41 | |||
xvcmpgtsp 32,38,12 | |||
xvcmpgtsp 33,42,0 | |||
xvcmpgtsp 43,36,37 | |||
xvcmpgtsp 39,40,41 | |||
xxsel 12,38,12,32 | |||
xxsel 38,48,49,32 | |||
xxsel 0,42,0,33 | |||
xxsel 42,48,49,33 | |||
xxsel 37,36,37,43 | |||
xxsel 43,62,47,43 | |||
xxsel 41,40,41,39 | |||
xxsel 39,62,47,39 | |||
xvcmpgtsp 32,37,12 | |||
xvcmpgtsp 33,41,0 | |||
xxsel 12,37,12,32 | |||
xxsel 43,43,38,32 | |||
xxsel 0,41,0,33 | |||
xxsel 33,39,42,33 | |||
xvcmpgtsp 32,12,0 | |||
vadduwm 1,1,31 | |||
xxsel 0,12,0,32 | |||
xxsel 32,43,33,32 | |||
xvcmpgtsp 33,5,0 | |||
vadduwm 0,0,18 | |||
vadduwm 18,18,14 | |||
xxsel 51,51,32,33 | |||
xxsel 5,5,0,33 | |||
bgt 7,.L5 | |||
xxsldwi 11,5,5,3 | |||
xxsldwi 12,5,5,2 | |||
vspltw 0,19,3 | |||
xxsldwi 0,5,5,1 | |||
xscvspdp 11,11 | |||
xscvspdp 12,12 | |||
mfvsrwz 6,32 | |||
vspltw 0,19,2 | |||
xscvspdp 0,0 | |||
mfvsrwz 7,51 | |||
mfvsrwz 5,32 | |||
vspltw 0,19,0 | |||
xscvspdp 5,5 | |||
mfvsrwz 10,32 | |||
fcmpu 7,11,12 | |||
rldicl 3,6,0,32 | |||
fmr 10,0 | |||
rldicl 11,7,0,32 | |||
rldicl 31,5,0,32 | |||
rldicl 0,10,0,32 | |||
beq 7,.L56 | |||
bng 7,.L8 | |||
fmr 11,12 | |||
mr 3,31 | |||
.L8: | |||
fcmpu 7,0,5 | |||
bne 7,.L11 | |||
cmplw 7,7,10 | |||
ble 7,.L12 | |||
mr 7,10 | |||
.L12: | |||
rldicl 11,7,0,32 | |||
.L13: | |||
fcmpu 7,11,10 | |||
beq 7,.L57 | |||
bgt 7,.L58 | |||
.L17: | |||
cmpd 7,9,8 | |||
ble 7,.L19 | |||
addi 7,8,1 | |||
sldi 10,8,1 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
add 4,4,10 | |||
subf 10,8,9 | |||
mtctr 10 | |||
bgt 7,.L37 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L37 | |||
.p2align 4,,15 | |||
.L21: | |||
lfs 0,0(4) | |||
lfs 12,4(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,11,0 | |||
bng 7,.L20 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L20: | |||
addi 8,8,1 | |||
bdnz .L21 | |||
.L19: | |||
li 0,-128 | |||
ld 31,-8(1) | |||
addi 3,3,1 | |||
lvx 25,1,0 | |||
li 0,-112 | |||
lvx 26,1,0 | |||
li 0,-96 | |||
lvx 27,1,0 | |||
li 0,-80 | |||
lvx 28,1,0 | |||
li 0,-64 | |||
lvx 29,1,0 | |||
li 0,-48 | |||
lvx 30,1,0 | |||
li 0,-32 | |||
lvx 31,1,0 | |||
blr | |||
.p2align 4,,15 | |||
.L56: | |||
cmplw 7,6,5 | |||
ble 7,.L7 | |||
mr 6,5 | |||
.L7: | |||
rldicl 3,6,0,32 | |||
b .L8 | |||
.p2align 4,,15 | |||
.L29: | |||
li 3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L11: | |||
bng 7,.L13 | |||
fmr 10,5 | |||
mr 11,0 | |||
b .L13 | |||
.p2align 4,,15 | |||
.L57: | |||
cmpd 7,3,11 | |||
ble 7,.L17 | |||
mr 3,11 | |||
b .L17 | |||
.p2align 4,,15 | |||
.L58: | |||
fmr 11,10 | |||
mr 3,11 | |||
b .L17 | |||
.L43: | |||
li 9,1 | |||
mtctr 9 | |||
b .L44 | |||
.L37: | |||
li 9,1 | |||
mtctr 9 | |||
b .L21 | |||
.long 0 | |||
.byte 0,0,0,0,0,1,0,0 | |||
.size icamin_k,.-icamin_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.byte 0 | |||
.byte 1 | |||
.byte 2 | |||
.byte 3 | |||
.byte 8 | |||
.byte 9 | |||
.byte 10 | |||
.byte 11 | |||
.byte 16 | |||
.byte 17 | |||
.byte 18 | |||
.byte 19 | |||
.byte 24 | |||
.byte 25 | |||
.byte 26 | |||
.byte 27 | |||
.LC3: | |||
.byte 4 | |||
.byte 5 | |||
.byte 6 | |||
.byte 7 | |||
.byte 12 | |||
.byte 13 | |||
.byte 14 | |||
.byte 15 | |||
.byte 20 | |||
.byte 21 | |||
.byte 22 | |||
.byte 23 | |||
.byte 28 | |||
.byte 29 | |||
.byte 30 | |||
.byte 31 | |||
.LC4: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC5: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC6: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC7: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.LC8: | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,385 @@ | |||
.file "icamin.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl icamin_k | |||
.type icamin_k, @function | |||
icamin_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry icamin_k,.-icamin_k | |||
mr. 9,3 | |||
ble 0,.L25 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
lfs 11,0(4) | |||
lfs 0,4(4) | |||
cmpdi 7,5,1 | |||
fabs 11,11 | |||
fabs 0,0 | |||
fadds 11,11,0 | |||
beq 7,.L53 | |||
cmpdi 7,9,1 | |||
beq 7,.L29 | |||
addi 9,9,-1 | |||
sldi 5,5,3 | |||
li 3,0 | |||
mtctr 9 | |||
add 4,4,5 | |||
li 9,1 | |||
.p2align 4,,15 | |||
.L24: | |||
lfs 0,4(4) | |||
lfs 12,0(4) | |||
add 4,4,5 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,0,11 | |||
bnl 7,.L23 | |||
fmr 11,0 | |||
mr 3,9 | |||
.L23: | |||
addi 9,9,1 | |||
bdnz .L24 | |||
.L51: | |||
addi 3,3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L25: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L53: | |||
rldicr. 8,9,0,58 | |||
bne 0,.L54 | |||
addi 7,8,1 | |||
li 10,0 | |||
subf 6,8,9 | |||
li 3,0 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
mtctr 6 | |||
add 4,4,10 | |||
bgt 7,.L43 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L43 | |||
.p2align 4,,15 | |||
.L44: | |||
lfs 0,0(4) | |||
lfs 12,4(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,11,0 | |||
bng 7,.L46 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L46: | |||
addi 8,8,1 | |||
bdnz .L44 | |||
b .L51 | |||
.p2align 4,,15 | |||
.L54: | |||
xscvdpspn 9,11 | |||
addis 11,2,.LC2@toc@ha | |||
addis 3,2,.LC3@toc@ha | |||
addis 5,2,.LC6@toc@ha | |||
addis 6,2,.LC7@toc@ha | |||
addis 7,2,.LC4@toc@ha | |||
addis 10,2,.LC5@toc@ha | |||
xxspltib 48,0 | |||
addi 11,11,.LC2@toc@l | |||
addi 3,3,.LC3@toc@l | |||
addi 5,5,.LC6@toc@l | |||
stxv 59,-80(1) | |||
addi 6,6,.LC7@toc@l | |||
stxv 60,-64(1) | |||
stxv 63,-16(1) | |||
addi 7,7,.LC4@toc@l | |||
xxspltib 59,16 | |||
lxv 44,0(11) | |||
xxspltib 60,32 | |||
lxv 45,0(3) | |||
lxv 63,0(5) | |||
xxlor 47,48,48 | |||
lxv 46,0(6) | |||
addi 10,10,.LC5@toc@l | |||
stxv 61,-48(1) | |||
stxv 62,-32(1) | |||
xxspltw 9,9,0 | |||
lxv 61,0(7) | |||
lxv 62,0(10) | |||
li 7,0 | |||
mr 10,4 | |||
vextsb2w 27,27 | |||
vextsb2w 28,28 | |||
stxv 57,-112(1) | |||
stxv 58,-96(1) | |||
.p2align 4,,15 | |||
.L5: | |||
lxv 0,0(10) | |||
addi 7,7,32 | |||
addi 10,10,256 | |||
cmpd 7,8,7 | |||
xvabssp 34,0 | |||
lxv 0,-240(10) | |||
xvabssp 42,0 | |||
lxv 0,-224(10) | |||
xvabssp 49,0 | |||
lxv 0,-208(10) | |||
vpermr 26,10,2,12 | |||
vpermr 2,10,2,13 | |||
xvabssp 35,0 | |||
lxv 0,-192(10) | |||
xvaddsp 34,58,34 | |||
xvabssp 36,0 | |||
lxv 0,-176(10) | |||
vpermr 10,3,17,12 | |||
vpermr 3,3,17,13 | |||
xvabssp 33,0 | |||
lxv 0,-160(10) | |||
xvaddsp 10,42,35 | |||
xvabssp 50,0 | |||
lxv 0,-144(10) | |||
vpermr 17,1,4,12 | |||
vpermr 4,1,4,13 | |||
xvabssp 37,0 | |||
lxv 0,-128(10) | |||
xvaddsp 36,49,36 | |||
xvabssp 38,0 | |||
lxv 0,-112(10) | |||
vpermr 1,5,18,12 | |||
vpermr 5,5,18,13 | |||
xvabssp 43,0 | |||
lxv 0,-96(10) | |||
xvaddsp 12,33,37 | |||
xvabssp 51,0 | |||
lxv 0,-80(10) | |||
vpermr 18,11,6,12 | |||
vpermr 6,11,6,13 | |||
xvabssp 39,0 | |||
lxv 0,-64(10) | |||
xvaddsp 38,50,38 | |||
xvabssp 40,0 | |||
lxv 0,-48(10) | |||
vpermr 11,7,19,12 | |||
vpermr 7,7,19,13 | |||
xvabssp 32,0 | |||
lxv 0,-32(10) | |||
xvaddsp 11,43,39 | |||
xvcmpgtsp 39,34,10 | |||
xvcmpgtsp 43,36,12 | |||
xvabssp 57,0 | |||
lxv 0,-16(10) | |||
vpermr 19,0,8,12 | |||
vpermr 8,0,8,13 | |||
xxsel 10,34,10,39 | |||
xxsel 12,36,12,43 | |||
xxsel 39,61,62,39 | |||
xxsel 43,63,46,43 | |||
xvabssp 41,0 | |||
xvaddsp 40,51,40 | |||
vpermr 0,9,25,12 | |||
vpermr 9,9,25,13 | |||
xvaddsp 0,32,41 | |||
xvcmpgtsp 41,38,11 | |||
xvcmpgtsp 32,10,12 | |||
xvcmpgtsp 42,40,0 | |||
xxsel 11,38,11,41 | |||
xxsel 12,10,12,32 | |||
xxsel 43,39,43,32 | |||
xxsel 41,61,62,41 | |||
xxsel 0,40,0,42 | |||
xxsel 42,63,46,42 | |||
xvcmpgtsp 33,11,0 | |||
xxsel 0,11,0,33 | |||
xxsel 33,41,42,33 | |||
xvcmpgtsp 32,12,0 | |||
vadduwm 1,1,27 | |||
xxsel 0,12,0,32 | |||
xxsel 32,43,33,32 | |||
xvcmpgtsp 33,9,0 | |||
vadduwm 0,0,15 | |||
vadduwm 15,15,28 | |||
xxsel 48,48,32,33 | |||
xxsel 9,9,0,33 | |||
bgt 7,.L5 | |||
xxsldwi 11,9,9,3 | |||
xxsldwi 12,9,9,2 | |||
li 10,0 | |||
li 3,12 | |||
xxsldwi 0,9,9,1 | |||
xscvspdp 9,9 | |||
vextuwrx 6,10,16 | |||
li 10,4 | |||
xscvspdp 11,11 | |||
xscvspdp 12,12 | |||
xscvspdp 0,0 | |||
vextuwrx 5,10,16 | |||
li 10,8 | |||
vextuwrx 7,10,16 | |||
vextuwrx 10,3,16 | |||
rldicl 12,5,0,32 | |||
rldicl 3,6,0,32 | |||
rldicl 11,7,0,32 | |||
rldicl 0,10,0,32 | |||
fcmpu 7,11,12 | |||
fmr 10,0 | |||
beq 7,.L55 | |||
bng 7,.L8 | |||
mr 3,12 | |||
fmr 11,12 | |||
.L8: | |||
fcmpu 7,0,9 | |||
bne 7,.L11 | |||
cmplw 7,7,10 | |||
ble 7,.L12 | |||
mr 7,10 | |||
.L12: | |||
rldicl 11,7,0,32 | |||
.L13: | |||
fcmpu 7,11,10 | |||
beq 7,.L56 | |||
bng 7,.L17 | |||
mr 3,11 | |||
fmr 11,10 | |||
.L17: | |||
cmpd 7,9,8 | |||
ble 7,.L19 | |||
addi 7,8,1 | |||
sldi 10,8,1 | |||
subf 6,8,9 | |||
cmpd 7,7,9 | |||
sldi 10,10,2 | |||
mtctr 6 | |||
add 4,4,10 | |||
bgt 7,.L37 | |||
li 10,-1 | |||
rldicr 10,10,0,0 | |||
cmpd 7,9,10 | |||
beq 7,.L37 | |||
.p2align 4,,15 | |||
.L21: | |||
lfs 0,0(4) | |||
lfs 12,4(4) | |||
addi 4,4,8 | |||
fabs 0,0 | |||
fabs 12,12 | |||
fadds 0,0,12 | |||
fcmpu 7,11,0 | |||
bng 7,.L20 | |||
fmr 11,0 | |||
mr 3,8 | |||
.L20: | |||
addi 8,8,1 | |||
bdnz .L21 | |||
.L19: | |||
lxv 57,-112(1) | |||
lxv 58,-96(1) | |||
addi 3,3,1 | |||
lxv 59,-80(1) | |||
lxv 60,-64(1) | |||
lxv 61,-48(1) | |||
lxv 62,-32(1) | |||
lxv 63,-16(1) | |||
blr | |||
.p2align 4,,15 | |||
.L55: | |||
cmplw 7,6,5 | |||
ble 7,.L7 | |||
mr 6,5 | |||
.L7: | |||
rldicl 3,6,0,32 | |||
b .L8 | |||
.p2align 4,,15 | |||
.L29: | |||
li 3,1 | |||
blr | |||
.p2align 4,,15 | |||
.L11: | |||
bng 7,.L13 | |||
mr 11,0 | |||
fmr 10,9 | |||
b .L13 | |||
.p2align 4,,15 | |||
.L56: | |||
cmpd 7,3,11 | |||
ble 7,.L17 | |||
mr 3,11 | |||
b .L17 | |||
.L37: | |||
li 9,1 | |||
mtctr 9 | |||
b .L21 | |||
.L43: | |||
li 9,1 | |||
mtctr 9 | |||
b .L44 | |||
.long 0 | |||
.byte 0,0,0,0,0,0,0,0 | |||
.size icamin_k,.-icamin_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.byte 0 | |||
.byte 1 | |||
.byte 2 | |||
.byte 3 | |||
.byte 8 | |||
.byte 9 | |||
.byte 10 | |||
.byte 11 | |||
.byte 16 | |||
.byte 17 | |||
.byte 18 | |||
.byte 19 | |||
.byte 24 | |||
.byte 25 | |||
.byte 26 | |||
.byte 27 | |||
.LC3: | |||
.byte 4 | |||
.byte 5 | |||
.byte 6 | |||
.byte 7 | |||
.byte 12 | |||
.byte 13 | |||
.byte 14 | |||
.byte 15 | |||
.byte 20 | |||
.byte 21 | |||
.byte 22 | |||
.byte 23 | |||
.byte 28 | |||
.byte 29 | |||
.byte 30 | |||
.byte 31 | |||
.LC4: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC5: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC6: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC7: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,434 @@ | |||
/* .file "isamax.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl isamax_k | |||
.type isamax_k, @function | |||
*/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
PROLOGUE | |||
isamax_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry isamax_k,.-isamax_k | |||
mr. 11,3 | |||
ble 0,.L36 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
cmpdi 7,5,1 | |||
beq 7,.L69 | |||
rldicr. 7,11,0,61 | |||
beq 0,.L40 | |||
sldi 3,5,1 | |||
xxlxor 0,0,0 | |||
sldi 6,5,2 | |||
add 3,3,5 | |||
sldi 0,5,4 | |||
sldi 3,3,2 | |||
sldi 5,5,3 | |||
mr 9,4 | |||
li 8,0 | |||
li 10,0 | |||
.p2align 4,,15 | |||
.L31: | |||
lfs 12,0(9) | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L23 | |||
fmr 0,12 | |||
mr 8,10 | |||
.L23: | |||
lfsx 12,9,6 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L25 | |||
fmr 0,12 | |||
addi 8,10,1 | |||
.L25: | |||
lfsx 12,9,5 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L27 | |||
fmr 0,12 | |||
addi 8,10,2 | |||
.L27: | |||
lfsx 12,9,3 | |||
add 9,9,0 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L29 | |||
fmr 0,12 | |||
addi 8,10,3 | |||
.L29: | |||
addi 10,10,4 | |||
cmpd 7,7,10 | |||
bgt 7,.L31 | |||
addi 7,7,-1 | |||
srdi 7,7,2 | |||
addi 7,7,1 | |||
sldi 9,7,2 | |||
mulld 7,6,7 | |||
cmpd 7,11,9 | |||
ble 7,.L67 | |||
.L22: | |||
addi 10,9,1 | |||
sldi 7,7,2 | |||
cmpd 7,10,11 | |||
subf 10,9,11 | |||
mtctr 10 | |||
add 4,4,7 | |||
bgt 7,.L54 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L54 | |||
.p2align 4,,15 | |||
.L35: | |||
lfs 12,0(4) | |||
add 4,4,6 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L33 | |||
fmr 0,12 | |||
mr 8,9 | |||
.L33: | |||
addi 9,9,1 | |||
bdnz .L35 | |||
.L67: | |||
addi 3,8,1 | |||
blr | |||
.p2align 4,,15 | |||
.L36: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L69: | |||
rldicr. 10,11,0,57 | |||
bne 0,.L70 | |||
addi 7,10,1 | |||
sldi 9,10,2 | |||
xxlxor 12,12,12 | |||
cmpd 7,7,11 | |||
add 4,4,9 | |||
subf 9,10,11 | |||
li 8,0 | |||
mtctr 9 | |||
bgt 7,.L60 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L60 | |||
.p2align 4,,15 | |||
.L61: | |||
lfs 0,0(4) | |||
addi 4,4,4 | |||
fabs 0,0 | |||
fcmpu 7,0,12 | |||
bng 7,.L63 | |||
fmr 12,0 | |||
mr 8,10 | |||
.L63: | |||
addi 10,10,1 | |||
bdnz .L61 | |||
b .L67 | |||
.p2align 4,,15 | |||
.L70: | |||
li 0,-64 | |||
std 31,-8(1) | |||
addis 3,2,.LC2@toc@ha | |||
vspltisw 18,0 | |||
vspltisw 12,0 | |||
addis 5,2,.LC3@toc@ha | |||
addis 6,2,.LC6@toc@ha | |||
stvx 29,1,0 | |||
li 0,-48 | |||
addis 8,2,.LC7@toc@ha | |||
xxlor 35,50,50 | |||
addi 3,3,.LC2@toc@l | |||
addi 5,5,.LC3@toc@l | |||
stvx 30,1,0 | |||
addi 6,6,.LC6@toc@l | |||
li 0,-32 | |||
addi 8,8,.LC7@toc@l | |||
lxvd2x 51,0,3 | |||
lxvd2x 34,0,5 | |||
addis 7,2,.LC4@toc@ha | |||
stvx 31,1,0 | |||
lxvd2x 47,0,6 | |||
addis 9,2,.LC5@toc@ha | |||
addi 7,7,.LC4@toc@l | |||
lxvd2x 48,0,8 | |||
addi 9,9,.LC5@toc@l | |||
vspltisw 17,8 | |||
vadduwm 17,17,17 | |||
lxvd2x 36,0,7 | |||
li 7,0 | |||
lxvd2x 37,0,9 | |||
mr 9,4 | |||
.p2align 4,,15 | |||
.L5: | |||
addi 5,9,16 | |||
addi 6,9,32 | |||
lxvd2x 41,0,9 | |||
vadduwm 31,3,15 | |||
addi 8,9,64 | |||
addi 31,9,48 | |||
addi 12,9,80 | |||
addi 3,9,96 | |||
lxvd2x 5,0,5 | |||
lxvd2x 43,0,6 | |||
addi 5,9,112 | |||
addi 6,9,128 | |||
lxvd2x 1,0,8 | |||
lxvd2x 9,0,31 | |||
addi 8,9,160 | |||
addi 31,9,144 | |||
lxvd2x 6,0,12 | |||
lxvd2x 13,0,3 | |||
addi 12,9,176 | |||
addi 3,9,192 | |||
lxvd2x 11,0,5 | |||
lxvd2x 2,0,6 | |||
xvabssp 41,41 | |||
addi 5,9,208 | |||
addi 6,9,224 | |||
lxvd2x 3,0,8 | |||
lxvd2x 7,0,31 | |||
addi 8,9,240 | |||
lxvd2x 10,0,12 | |||
lxvd2x 4,0,3 | |||
xvabssp 43,43 | |||
xvabssp 5,5 | |||
addi 7,7,64 | |||
lxvd2x 8,0,5 | |||
lxvd2x 0,0,6 | |||
xvabssp 9,9 | |||
xvabssp 1,1 | |||
cmpd 7,10,7 | |||
addi 9,9,256 | |||
lxvd2x 12,0,8 | |||
xvabssp 6,6 | |||
xvabssp 13,13 | |||
xvabssp 11,11 | |||
xvabssp 2,2 | |||
xvabssp 7,7 | |||
xvabssp 3,3 | |||
xvabssp 10,10 | |||
xvabssp 4,4 | |||
xvabssp 8,8 | |||
xvabssp 0,0 | |||
xvabssp 12,12 | |||
xvcmpgtsp 32,5,41 | |||
xvcmpgtsp 61,9,43 | |||
xvcmpgtsp 45,6,1 | |||
xvcmpgtsp 62,11,13 | |||
xvcmpgtsp 38,7,2 | |||
xvcmpgtsp 46,10,3 | |||
xvcmpgtsp 40,8,4 | |||
xvcmpgtsp 39,12,0 | |||
xxsel 5,41,5,32 | |||
xxsel 32,51,34,32 | |||
xxsel 9,43,9,61 | |||
xxsel 6,1,6,45 | |||
xxsel 11,13,11,62 | |||
xxsel 43,51,34,45 | |||
xxsel 7,2,7,38 | |||
xvcmpgtsp 41,9,5 | |||
xxsel 10,3,10,46 | |||
xvcmpgtsp 45,11,6 | |||
xxsel 8,4,8,40 | |||
xxsel 62,36,37,62 | |||
xxsel 0,0,12,39 | |||
xvcmpgtsp 42,10,7 | |||
xxsel 61,36,37,61 | |||
xxsel 40,51,34,40 | |||
xvcmpgtsp 33,0,8 | |||
xxsel 39,36,37,39 | |||
xxsel 38,51,34,38 | |||
xxsel 46,36,37,46 | |||
xxsel 9,5,9,41 | |||
xxsel 41,32,61,41 | |||
xxsel 12,6,11,45 | |||
xxsel 45,43,62,45 | |||
xxsel 11,7,10,42 | |||
xvcmpgtsp 32,12,9 | |||
vadduwm 13,13,17 | |||
xxsel 42,38,46,42 | |||
xxsel 0,8,0,33 | |||
xxsel 33,40,39,33 | |||
xvcmpgtsp 43,0,11 | |||
vadduwm 1,1,17 | |||
xxsel 12,9,12,32 | |||
xxsel 32,41,45,32 | |||
vadduwm 0,3,0 | |||
vadduwm 3,3,16 | |||
xxsel 0,11,0,43 | |||
xxsel 33,42,33,43 | |||
xvcmpgtsp 45,0,12 | |||
vadduwm 1,31,1 | |||
xxsel 0,12,0,45 | |||
xxsel 32,32,33,45 | |||
xvcmpgtsp 33,0,44 | |||
xxsel 50,50,32,33 | |||
xxsel 44,44,0,33 | |||
bgt 7,.L5 | |||
xxsldwi 12,44,44,1 | |||
xscvspdp 10,44 | |||
vspltw 0,18,0 | |||
xxsldwi 0,44,44,3 | |||
xscvspdp 12,12 | |||
mfvsrwz 3,50 | |||
mfvsrwz 6,32 | |||
vspltw 0,18,3 | |||
xscvspdp 0,0 | |||
xxsldwi 44,44,44,2 | |||
mfvsrwz 7,32 | |||
vspltw 0,18,2 | |||
xscvspdp 44,44 | |||
mfvsrwz 9,32 | |||
fcmpu 7,12,10 | |||
rldicl 8,3,0,32 | |||
rldicl 31,6,0,32 | |||
fmr 11,0 | |||
rldicl 0,7,0,32 | |||
rldicl 5,9,0,32 | |||
beq 7,.L71 | |||
bnl 7,.L8 | |||
fmr 12,10 | |||
mr 8,31 | |||
.L8: | |||
xscmpudp 7,0,44 | |||
bne 7,.L11 | |||
cmplw 7,7,9 | |||
ble 7,.L12 | |||
mr 7,9 | |||
.L12: | |||
rldicl 5,7,0,32 | |||
.L13: | |||
fcmpu 7,12,11 | |||
beq 7,.L72 | |||
bnl 7,.L17 | |||
fmr 12,11 | |||
mr 8,5 | |||
.L17: | |||
cmpd 7,11,10 | |||
ble 7,.L16 | |||
addi 7,10,1 | |||
sldi 9,10,2 | |||
cmpd 7,7,11 | |||
add 4,4,9 | |||
subf 9,10,11 | |||
mtctr 9 | |||
bgt 7,.L53 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L53 | |||
.p2align 4,,15 | |||
.L21: | |||
lfs 0,0(4) | |||
addi 4,4,4 | |||
fabs 0,0 | |||
fcmpu 7,0,12 | |||
bng 7,.L19 | |||
fmr 12,0 | |||
mr 8,10 | |||
.L19: | |||
addi 10,10,1 | |||
bdnz .L21 | |||
.L16: | |||
li 0,-64 | |||
ld 31,-8(1) | |||
addi 3,8,1 | |||
lvx 29,1,0 | |||
li 0,-48 | |||
lvx 30,1,0 | |||
li 0,-32 | |||
lvx 31,1,0 | |||
blr | |||
.p2align 4,,15 | |||
.L71: | |||
cmplw 7,3,6 | |||
ble 7,.L7 | |||
mr 3,6 | |||
.L7: | |||
rldicl 8,3,0,32 | |||
b .L8 | |||
.p2align 4,,15 | |||
.L40: | |||
xxlxor 0,0,0 | |||
sldi 6,5,2 | |||
li 8,0 | |||
li 9,0 | |||
b .L22 | |||
.p2align 4,,15 | |||
.L11: | |||
blt 7,.L39 | |||
mr 5,0 | |||
b .L13 | |||
.p2align 4,,15 | |||
.L72: | |||
cmpd 7,8,5 | |||
ble 7,.L17 | |||
mr 8,5 | |||
b .L17 | |||
.p2align 4,,15 | |||
.L39: | |||
xscpsgndp 11,44,44 | |||
b .L13 | |||
.L53: | |||
li 9,1 | |||
mtctr 9 | |||
b .L21 | |||
.L54: | |||
li 10,1 | |||
mtctr 10 | |||
b .L35 | |||
.L60: | |||
li 9,1 | |||
mtctr 9 | |||
b .L61 | |||
.long 0 | |||
.byte 0,0,0,0,0,1,0,0 | |||
.size isamax_k,.-isamax_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC3: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC4: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC5: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.LC6: | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.LC7: | |||
.long 64 | |||
.long 64 | |||
.long 64 | |||
.long 64 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,397 @@ | |||
.file "isamax.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl isamax_k | |||
.type isamax_k, @function | |||
isamax_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry isamax_k,.-isamax_k | |||
mr. 11,3 | |||
ble 0,.L36 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
cmpdi 7,5,1 | |||
beq 7,.L69 | |||
rldicr. 7,11,0,61 | |||
beq 0,.L40 | |||
sldi 10,5,1 | |||
sldi 6,5,2 | |||
sldi 0,5,4 | |||
sldi 3,5,3 | |||
mr 9,4 | |||
xxlxor 0,0,0 | |||
li 8,0 | |||
add 5,10,5 | |||
li 10,0 | |||
sldi 5,5,2 | |||
.p2align 4,,15 | |||
.L31: | |||
lfs 12,0(9) | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L23 | |||
fmr 0,12 | |||
mr 8,10 | |||
.L23: | |||
lfsx 12,9,6 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L25 | |||
fmr 0,12 | |||
addi 8,10,1 | |||
.L25: | |||
lfsx 12,9,3 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L27 | |||
fmr 0,12 | |||
addi 8,10,2 | |||
.L27: | |||
lfsx 12,9,5 | |||
add 9,9,0 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L29 | |||
fmr 0,12 | |||
addi 8,10,3 | |||
.L29: | |||
addi 10,10,4 | |||
cmpd 7,7,10 | |||
bgt 7,.L31 | |||
addi 7,7,-1 | |||
srdi 7,7,2 | |||
addi 7,7,1 | |||
sldi 9,7,2 | |||
mulld 7,6,7 | |||
cmpd 7,11,9 | |||
ble 7,.L67 | |||
.L22: | |||
addi 10,9,1 | |||
sldi 7,7,2 | |||
subf 5,9,11 | |||
cmpd 7,10,11 | |||
mtctr 5 | |||
add 4,4,7 | |||
bgt 7,.L54 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L54 | |||
.p2align 4,,15 | |||
.L35: | |||
lfs 12,0(4) | |||
add 4,4,6 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bng 7,.L33 | |||
fmr 0,12 | |||
mr 8,9 | |||
.L33: | |||
addi 9,9,1 | |||
bdnz .L35 | |||
.L67: | |||
addi 3,8,1 | |||
blr | |||
.p2align 4,,15 | |||
.L36: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L69: | |||
rldicr. 10,11,0,57 | |||
bne 0,.L70 | |||
addi 7,10,1 | |||
sldi 9,10,2 | |||
subf 6,10,11 | |||
li 8,0 | |||
xxlxor 12,12,12 | |||
cmpd 7,7,11 | |||
mtctr 6 | |||
add 4,4,9 | |||
bgt 7,.L60 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L60 | |||
.p2align 4,,15 | |||
.L61: | |||
lfs 0,0(4) | |||
addi 4,4,4 | |||
fabs 0,0 | |||
fcmpu 7,0,12 | |||
bng 7,.L63 | |||
fmr 12,0 | |||
mr 8,10 | |||
.L63: | |||
addi 10,10,1 | |||
bdnz .L61 | |||
b .L67 | |||
.p2align 4,,15 | |||
.L70: | |||
addis 6,2,.LC2@toc@ha | |||
addis 7,2,.LC3@toc@ha | |||
addis 8,2,.LC4@toc@ha | |||
addis 9,2,.LC5@toc@ha | |||
xxspltib 46,0 | |||
stxv 61,-48(1) | |||
stxv 62,-32(1) | |||
addi 6,6,.LC2@toc@l | |||
addi 7,7,.LC3@toc@l | |||
stxv 63,-16(1) | |||
xxspltib 61,32 | |||
xxspltib 63,16 | |||
xxspltib 62,64 | |||
addi 8,8,.LC4@toc@l | |||
addi 9,9,.LC5@toc@l | |||
lxv 47,0(6) | |||
xxspltib 34,0 | |||
lxv 48,0(7) | |||
xxlor 51,46,46 | |||
lxv 49,0(8) | |||
lxv 50,0(9) | |||
li 8,0 | |||
mr 9,4 | |||
vextsb2w 29,29 | |||
vextsb2w 31,31 | |||
vextsb2w 30,30 | |||
stxv 59,-80(1) | |||
stxv 60,-64(1) | |||
.p2align 4,,15 | |||
.L5: | |||
lxv 0,0(9) | |||
vadduwm 27,19,29 | |||
lxv 12,240(9) | |||
addi 8,8,64 | |||
addi 9,9,256 | |||
cmpd 7,10,8 | |||
xvabssp 44,0 | |||
lxv 0,-240(9) | |||
xvabssp 12,12 | |||
xvabssp 5,0 | |||
lxv 0,-224(9) | |||
xvabssp 32,0 | |||
lxv 0,-208(9) | |||
xvcmpgtsp 35,5,44 | |||
xvabssp 9,0 | |||
lxv 0,-192(9) | |||
xxsel 5,44,5,35 | |||
xxsel 35,47,48,35 | |||
xvabssp 1,0 | |||
lxv 0,-176(9) | |||
xvcmpgtsp 60,9,32 | |||
xvabssp 6,0 | |||
lxv 0,-160(9) | |||
xxsel 9,32,9,60 | |||
xxsel 60,49,50,60 | |||
xvabssp 13,0 | |||
lxv 0,-144(9) | |||
xvcmpgtsp 42,9,5 | |||
xvcmpgtsp 37,6,1 | |||
xvabssp 11,0 | |||
lxv 0,-128(9) | |||
xxsel 9,5,9,42 | |||
xxsel 42,35,60,42 | |||
xxsel 6,1,6,37 | |||
xxsel 37,47,48,37 | |||
xvabssp 2,0 | |||
lxv 0,-112(9) | |||
xvcmpgtsp 36,11,13 | |||
xvabssp 7,0 | |||
lxv 0,-96(9) | |||
xxsel 11,13,11,36 | |||
xxsel 36,49,50,36 | |||
xvabssp 3,0 | |||
lxv 0,-80(9) | |||
xvcmpgtsp 45,11,6 | |||
xvcmpgtsp 39,7,2 | |||
xvabssp 10,0 | |||
lxv 0,-64(9) | |||
xxsel 7,2,7,39 | |||
xxsel 39,47,48,39 | |||
xvabssp 4,0 | |||
lxv 0,-48(9) | |||
xvcmpgtsp 38,10,3 | |||
xvabssp 8,0 | |||
lxv 0,-32(9) | |||
xxsel 10,3,10,38 | |||
xxsel 38,49,50,38 | |||
xvabssp 0,0 | |||
xvcmpgtsp 43,10,7 | |||
xvcmpgtsp 41,8,4 | |||
xvcmpgtsp 40,12,0 | |||
xxsel 8,4,8,41 | |||
xxsel 41,47,48,41 | |||
xxsel 0,0,12,40 | |||
xxsel 12,6,11,45 | |||
xxsel 11,7,10,43 | |||
xxsel 45,37,36,45 | |||
xvcmpgtsp 33,0,8 | |||
xvcmpgtsp 32,12,9 | |||
vadduwm 13,13,31 | |||
xxsel 40,49,50,40 | |||
xxsel 43,39,38,43 | |||
xxsel 0,8,0,33 | |||
xxsel 12,9,12,32 | |||
xxsel 33,41,40,33 | |||
xxsel 32,42,45,32 | |||
xvcmpgtsp 44,0,11 | |||
vadduwm 1,1,31 | |||
vadduwm 0,19,0 | |||
vadduwm 19,19,30 | |||
xxsel 0,11,0,44 | |||
xxsel 33,43,33,44 | |||
xvcmpgtsp 45,0,12 | |||
vadduwm 1,27,1 | |||
xxsel 0,12,0,45 | |||
xxsel 32,32,33,45 | |||
xvcmpgtsp 33,0,34 | |||
xxsel 46,46,32,33 | |||
xxsel 34,34,0,33 | |||
bgt 7,.L5 | |||
xxsldwi 12,34,34,3 | |||
xxsldwi 11,34,34,2 | |||
li 9,0 | |||
li 8,12 | |||
xxsldwi 0,34,34,1 | |||
xscvspdp 34,34 | |||
vextuwrx 3,9,14 | |||
li 9,4 | |||
xscvspdp 12,12 | |||
xscvspdp 11,11 | |||
xscvspdp 0,0 | |||
vextuwrx 6,9,14 | |||
li 9,8 | |||
vextuwrx 7,9,14 | |||
vextuwrx 9,8,14 | |||
rldicl 12,6,0,32 | |||
rldicl 8,3,0,32 | |||
rldicl 0,7,0,32 | |||
rldicl 5,9,0,32 | |||
fcmpu 7,12,11 | |||
fmr 10,0 | |||
beq 7,.L71 | |||
bnl 7,.L8 | |||
mr 8,12 | |||
fmr 12,11 | |||
.L8: | |||
xscmpudp 7,0,34 | |||
bne 7,.L11 | |||
cmplw 7,7,9 | |||
ble 7,.L12 | |||
mr 7,9 | |||
.L12: | |||
rldicl 5,7,0,32 | |||
.L13: | |||
fcmpu 7,12,10 | |||
beq 7,.L72 | |||
bnl 7,.L17 | |||
mr 8,5 | |||
fmr 12,10 | |||
.L17: | |||
cmpd 7,11,10 | |||
ble 7,.L16 | |||
addi 7,10,1 | |||
sldi 9,10,2 | |||
subf 6,10,11 | |||
cmpd 7,7,11 | |||
mtctr 6 | |||
add 4,4,9 | |||
bgt 7,.L53 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L53 | |||
.p2align 4,,15 | |||
.L21: | |||
lfs 0,0(4) | |||
addi 4,4,4 | |||
fabs 0,0 | |||
fcmpu 7,0,12 | |||
bng 7,.L19 | |||
fmr 12,0 | |||
mr 8,10 | |||
.L19: | |||
addi 10,10,1 | |||
bdnz .L21 | |||
.L16: | |||
lxv 59,-80(1) | |||
lxv 60,-64(1) | |||
addi 3,8,1 | |||
lxv 61,-48(1) | |||
lxv 62,-32(1) | |||
lxv 63,-16(1) | |||
blr | |||
.p2align 4,,15 | |||
.L71: | |||
cmplw 7,3,6 | |||
ble 7,.L7 | |||
mr 3,6 | |||
.L7: | |||
rldicl 8,3,0,32 | |||
b .L8 | |||
.p2align 4,,15 | |||
.L40: | |||
sldi 6,5,2 | |||
li 8,0 | |||
li 9,0 | |||
xxlxor 0,0,0 | |||
b .L22 | |||
.p2align 4,,15 | |||
.L11: | |||
blt 7,.L39 | |||
mr 5,0 | |||
b .L13 | |||
.p2align 4,,15 | |||
.L72: | |||
cmpd 7,8,5 | |||
ble 7,.L17 | |||
mr 8,5 | |||
b .L17 | |||
.p2align 4,,15 | |||
.L39: | |||
xscpsgndp 10,34,34 | |||
b .L13 | |||
.L53: | |||
li 9,1 | |||
mtctr 9 | |||
b .L21 | |||
.L54: | |||
li 10,1 | |||
mtctr 10 | |||
b .L35 | |||
.L60: | |||
li 9,1 | |||
mtctr 9 | |||
b .L61 | |||
.long 0 | |||
.byte 0,0,0,0,0,0,0,0 | |||
.size isamax_k,.-isamax_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC3: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC4: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC5: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,417 @@ | |||
/* .file "isamin.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl isamin_k | |||
.type isamin_k, @function | |||
*/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
PROLOGUE | |||
isamin_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry isamin_k,.-isamin_k | |||
mr. 11,3 | |||
ble 0,.L36 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
lfs 0,0(4) | |||
li 0,-48 | |||
cmpdi 7,5,1 | |||
stvx 30,1,0 | |||
li 0,-32 | |||
stvx 31,1,0 | |||
fabs 0,0 | |||
beq 7,.L62 | |||
rldicr. 6,11,0,61 | |||
beq 0,.L40 | |||
sldi 0,5,1 | |||
sldi 12,5,2 | |||
std 31,-8(1) | |||
add 0,0,5 | |||
neg 31,5 | |||
sldi 3,5,4 | |||
sldi 0,0,2 | |||
add 7,4,12 | |||
sldi 31,31,2 | |||
sldi 5,5,3 | |||
li 9,0 | |||
li 10,0 | |||
b .L24 | |||
.p2align 4,,15 | |||
.L41: | |||
mr 10,9 | |||
.L25: | |||
fmr 0,12 | |||
add 7,7,3 | |||
.L24: | |||
lfs 12,0(7) | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L26 | |||
fmr 0,12 | |||
addi 10,9,1 | |||
.L26: | |||
add 8,31,7 | |||
lfsx 12,8,5 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L28 | |||
fmr 0,12 | |||
addi 10,9,2 | |||
.L28: | |||
lfsx 12,8,0 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L30 | |||
fmr 0,12 | |||
addi 10,9,3 | |||
.L30: | |||
addi 9,9,4 | |||
cmpd 7,6,9 | |||
ble 7,.L63 | |||
lfsx 12,8,3 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
blt 7,.L41 | |||
fmr 12,0 | |||
b .L25 | |||
.p2align 4,,15 | |||
.L36: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L63: | |||
addi 6,6,-1 | |||
ld 31,-8(1) | |||
srdi 6,6,2 | |||
addi 6,6,1 | |||
sldi 9,6,2 | |||
mulld 6,12,6 | |||
cmpd 7,11,9 | |||
ble 7,.L33 | |||
.L23: | |||
addi 8,9,1 | |||
sldi 6,6,2 | |||
cmpd 7,8,11 | |||
subf 8,9,11 | |||
mtctr 8 | |||
add 4,4,6 | |||
bgt 7,.L52 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L52 | |||
.p2align 4,,15 | |||
.L35: | |||
lfs 12,0(4) | |||
add 4,4,12 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L34 | |||
fmr 0,12 | |||
mr 10,9 | |||
.L34: | |||
addi 9,9,1 | |||
bdnz .L35 | |||
.L33: | |||
li 0,-48 | |||
addi 3,10,1 | |||
lvx 30,1,0 | |||
li 0,-32 | |||
lvx 31,1,0 | |||
blr | |||
.p2align 4,,15 | |||
.L62: | |||
rldicr. 8,11,0,57 | |||
li 10,0 | |||
bne 0,.L64 | |||
.L4: | |||
addi 7,8,1 | |||
sldi 9,8,2 | |||
cmpd 7,7,11 | |||
add 4,4,9 | |||
subf 9,8,11 | |||
mtctr 9 | |||
bgt 7,.L51 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L51 | |||
.p2align 4,,15 | |||
.L22: | |||
lfs 12,0(4) | |||
addi 4,4,4 | |||
fabs 12,12 | |||
fcmpu 7,0,12 | |||
bng 7,.L21 | |||
fmr 0,12 | |||
mr 10,8 | |||
.L21: | |||
addi 8,8,1 | |||
bdnz .L22 | |||
li 0,-48 | |||
addi 3,10,1 | |||
lvx 30,1,0 | |||
li 0,-32 | |||
lvx 31,1,0 | |||
blr | |||
.p2align 4,,15 | |||
.L64: | |||
lxvd2x 4,0,4 | |||
addis 10,2,.LC2@toc@ha | |||
addis 5,2,.LC3@toc@ha | |||
std 31,-8(1) | |||
vspltisw 2,0 | |||
addi 10,10,.LC2@toc@l | |||
addis 7,2,.LC4@toc@ha | |||
addis 9,2,.LC5@toc@ha | |||
addis 6,2,.LC6@toc@ha | |||
lxvd2x 51,0,10 | |||
addis 10,2,.LC7@toc@ha | |||
addi 7,7,.LC4@toc@l | |||
addi 9,9,.LC5@toc@l | |||
addi 5,5,.LC3@toc@l | |||
xvabssp 4,4 | |||
addi 6,6,.LC6@toc@l | |||
addi 10,10,.LC7@toc@l | |||
lxvd2x 36,0,7 | |||
vspltisw 18,8 | |||
lxvd2x 37,0,9 | |||
lxvd2x 35,0,5 | |||
mr 9,4 | |||
li 7,0 | |||
lxvd2x 48,0,6 | |||
lxvd2x 49,0,10 | |||
vadduwm 18,18,18 | |||
xxlor 38,51,51 | |||
xxlor 40,4,4 | |||
b .L6 | |||
.p2align 4,,15 | |||
.L65: | |||
lxvd2x 5,0,9 | |||
xvabssp 40,5 | |||
.L6: | |||
addi 5,9,16 | |||
addi 6,9,32 | |||
vadduwm 14,2,16 | |||
addi 10,9,64 | |||
addi 12,9,48 | |||
addi 31,9,80 | |||
addi 3,9,96 | |||
lxvd2x 5,0,5 | |||
lxvd2x 42,0,6 | |||
addi 5,9,112 | |||
addi 6,9,128 | |||
lxvd2x 44,0,10 | |||
lxvd2x 9,0,12 | |||
addi 10,9,160 | |||
addi 12,9,144 | |||
lxvd2x 6,0,31 | |||
lxvd2x 1,0,3 | |||
addi 31,9,176 | |||
addi 3,9,192 | |||
lxvd2x 11,0,5 | |||
lxvd2x 13,0,6 | |||
addi 5,9,208 | |||
addi 6,9,224 | |||
lxvd2x 2,0,10 | |||
lxvd2x 7,0,12 | |||
addi 10,9,240 | |||
lxvd2x 10,0,31 | |||
lxvd2x 3,0,3 | |||
xvabssp 42,42 | |||
xvabssp 5,5 | |||
addi 7,7,64 | |||
lxvd2x 8,0,5 | |||
lxvd2x 0,0,6 | |||
xvabssp 44,44 | |||
xvabssp 9,9 | |||
cmpd 7,8,7 | |||
addi 9,9,256 | |||
lxvd2x 12,0,10 | |||
xvabssp 6,6 | |||
xvabssp 1,1 | |||
xvabssp 11,11 | |||
xvabssp 13,13 | |||
xvabssp 7,7 | |||
xvabssp 2,2 | |||
xvabssp 10,10 | |||
xvabssp 3,3 | |||
xvabssp 8,8 | |||
xvabssp 0,0 | |||
xvabssp 12,12 | |||
xvcmpgtsp 32,40,5 | |||
xvcmpgtsp 62,42,9 | |||
xvcmpgtsp 45,44,6 | |||
xvcmpgtsp 63,1,11 | |||
xvcmpgtsp 39,13,7 | |||
xvcmpgtsp 47,2,10 | |||
xvcmpgtsp 41,3,8 | |||
xvcmpgtsp 33,0,12 | |||
xxsel 5,40,5,32 | |||
xxsel 32,38,35,32 | |||
xxsel 9,42,9,62 | |||
xxsel 6,44,6,45 | |||
xxsel 11,1,11,63 | |||
xxsel 44,38,35,45 | |||
xxsel 7,13,7,39 | |||
xvcmpgtsp 42,5,9 | |||
xxsel 10,2,10,47 | |||
xvcmpgtsp 45,6,11 | |||
xxsel 8,3,8,41 | |||
xxsel 63,36,37,63 | |||
xxsel 0,0,12,33 | |||
xvcmpgtsp 43,7,10 | |||
xxsel 40,36,37,33 | |||
xxsel 62,36,37,62 | |||
xvcmpgtsp 33,8,0 | |||
xxsel 41,38,35,41 | |||
xxsel 39,38,35,39 | |||
xxsel 47,36,37,47 | |||
xxsel 9,5,9,42 | |||
xxsel 42,32,62,42 | |||
xxsel 12,6,11,45 | |||
xxsel 45,44,63,45 | |||
xxsel 11,7,10,43 | |||
xvcmpgtsp 32,9,12 | |||
vadduwm 13,13,18 | |||
xxsel 43,39,47,43 | |||
xxsel 0,8,0,33 | |||
xxsel 33,41,40,33 | |||
xvcmpgtsp 44,11,0 | |||
vadduwm 1,1,18 | |||
xxsel 12,9,12,32 | |||
xxsel 32,42,45,32 | |||
vadduwm 0,2,0 | |||
vadduwm 2,2,17 | |||
xxsel 0,11,0,44 | |||
xxsel 33,43,33,44 | |||
xvcmpgtsp 45,12,0 | |||
vadduwm 1,14,1 | |||
xxsel 0,12,0,45 | |||
xxsel 32,32,33,45 | |||
xvcmpgtsp 33,4,0 | |||
xxsel 51,51,32,33 | |||
xxsel 4,4,0,33 | |||
bgt 7,.L65 | |||
xxsldwi 0,4,4,1 | |||
xscvspdp 10,4 | |||
vspltw 0,19,0 | |||
xxsldwi 12,4,4,3 | |||
xscvspdp 0,0 | |||
mfvsrwz 3,51 | |||
mfvsrwz 6,32 | |||
vspltw 0,19,3 | |||
xscvspdp 12,12 | |||
xxsldwi 4,4,4,2 | |||
mfvsrwz 7,32 | |||
vspltw 0,19,2 | |||
xscvspdp 4,4 | |||
mfvsrwz 9,32 | |||
fcmpu 7,0,10 | |||
rldicl 10,3,0,32 | |||
rldicl 31,6,0,32 | |||
fmr 11,12 | |||
rldicl 5,7,0,32 | |||
rldicl 0,9,0,32 | |||
beq 7,.L66 | |||
bng 7,.L9 | |||
fmr 0,10 | |||
mr 10,31 | |||
.L9: | |||
fcmpu 7,12,4 | |||
bne 7,.L12 | |||
cmplw 7,7,9 | |||
ble 7,.L13 | |||
mr 7,9 | |||
.L13: | |||
rldicl 5,7,0,32 | |||
.L14: | |||
fcmpu 7,0,11 | |||
beq 7,.L67 | |||
bng 7,.L19 | |||
fmr 0,11 | |||
mr 10,5 | |||
.L19: | |||
cmpd 7,11,8 | |||
ld 31,-8(1) | |||
bgt 7,.L4 | |||
b .L33 | |||
.p2align 4,,15 | |||
.L66: | |||
cmplw 7,3,6 | |||
ble 7,.L8 | |||
mr 3,6 | |||
.L8: | |||
rldicl 10,3,0,32 | |||
b .L9 | |||
.p2align 4,,15 | |||
.L40: | |||
sldi 12,5,2 | |||
li 10,0 | |||
li 9,0 | |||
b .L23 | |||
.p2align 4,,15 | |||
.L12: | |||
bng 7,.L14 | |||
fmr 11,4 | |||
mr 5,0 | |||
b .L14 | |||
.p2align 4,,15 | |||
.L67: | |||
cmpd 7,10,5 | |||
ble 7,.L19 | |||
mr 10,5 | |||
b .L19 | |||
.L51: | |||
li 9,1 | |||
mtctr 9 | |||
b .L22 | |||
.L52: | |||
li 8,1 | |||
mtctr 8 | |||
b .L35 | |||
.long 0 | |||
.byte 0,0,0,0,0,1,0,0 | |||
.size isamin_k,.-isamin_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC3: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC4: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC5: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.LC6: | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.long 32 | |||
.LC7: | |||
.long 64 | |||
.long 64 | |||
.long 64 | |||
.long 64 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |
@@ -0,0 +1,382 @@ | |||
.file "isamin.c" | |||
.abiversion 2 | |||
.section ".text" | |||
.align 2 | |||
.p2align 4,,15 | |||
.globl isamin_k | |||
.type isamin_k, @function | |||
isamin_k: | |||
.LCF0: | |||
0: addis 2,12,.TOC.-.LCF0@ha | |||
addi 2,2,.TOC.-.LCF0@l | |||
.localentry isamin_k,.-isamin_k | |||
mr. 11,3 | |||
ble 0,.L36 | |||
cmpdi 7,5,0 | |||
li 3,0 | |||
blelr 7 | |||
lfs 0,0(4) | |||
cmpdi 7,5,1 | |||
stxv 61,-64(1) | |||
stxv 62,-48(1) | |||
stxv 63,-32(1) | |||
fabs 0,0 | |||
beq 7,.L62 | |||
rldicr. 6,11,0,61 | |||
beq 0,.L40 | |||
sldi 8,5,1 | |||
sldi 0,5,2 | |||
neg 12,5 | |||
std 31,-8(1) | |||
sldi 3,5,4 | |||
sldi 31,5,3 | |||
li 9,0 | |||
li 10,0 | |||
add 5,8,5 | |||
add 7,4,0 | |||
sldi 12,12,2 | |||
sldi 5,5,2 | |||
b .L24 | |||
.p2align 4,,15 | |||
.L41: | |||
mr 10,9 | |||
.L25: | |||
add 7,7,3 | |||
fmr 0,12 | |||
.L24: | |||
lfs 12,0(7) | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L26 | |||
fmr 0,12 | |||
addi 10,9,1 | |||
.L26: | |||
add 8,7,12 | |||
lfsx 12,8,31 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L28 | |||
fmr 0,12 | |||
addi 10,9,2 | |||
.L28: | |||
lfsx 12,8,5 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L30 | |||
fmr 0,12 | |||
addi 10,9,3 | |||
.L30: | |||
addi 9,9,4 | |||
cmpd 7,6,9 | |||
ble 7,.L63 | |||
lfsx 12,8,3 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
blt 7,.L41 | |||
fmr 12,0 | |||
b .L25 | |||
.p2align 4,,15 | |||
.L36: | |||
li 3,0 | |||
blr | |||
.p2align 4,,15 | |||
.L63: | |||
addi 6,6,-1 | |||
ld 31,-8(1) | |||
srdi 6,6,2 | |||
addi 6,6,1 | |||
sldi 9,6,2 | |||
mulld 6,0,6 | |||
cmpd 7,11,9 | |||
ble 7,.L33 | |||
.L23: | |||
addi 8,9,1 | |||
sldi 6,6,2 | |||
subf 7,9,11 | |||
cmpd 7,8,11 | |||
mtctr 7 | |||
add 4,4,6 | |||
bgt 7,.L52 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L52 | |||
.p2align 4,,15 | |||
.L35: | |||
lfs 12,0(4) | |||
add 4,4,0 | |||
fabs 12,12 | |||
fcmpu 7,12,0 | |||
bnl 7,.L34 | |||
fmr 0,12 | |||
mr 10,9 | |||
.L34: | |||
addi 9,9,1 | |||
bdnz .L35 | |||
.L33: | |||
lxv 61,-64(1) | |||
lxv 62,-48(1) | |||
addi 3,10,1 | |||
lxv 63,-32(1) | |||
blr | |||
.p2align 4,,15 | |||
.L62: | |||
rldicr. 8,11,0,57 | |||
li 10,0 | |||
bne 0,.L64 | |||
.L4: | |||
addi 7,8,1 | |||
sldi 9,8,2 | |||
subf 6,8,11 | |||
cmpd 7,7,11 | |||
mtctr 6 | |||
add 4,4,9 | |||
bgt 7,.L51 | |||
li 3,-1 | |||
rldicr 3,3,0,0 | |||
cmpd 7,11,3 | |||
beq 7,.L51 | |||
.p2align 4,,15 | |||
.L22: | |||
lfs 12,0(4) | |||
addi 4,4,4 | |||
fabs 12,12 | |||
fcmpu 7,0,12 | |||
bng 7,.L21 | |||
fmr 0,12 | |||
mr 10,8 | |||
.L21: | |||
addi 8,8,1 | |||
bdnz .L22 | |||
lxv 61,-64(1) | |||
lxv 62,-48(1) | |||
addi 3,10,1 | |||
lxv 63,-32(1) | |||
blr | |||
.p2align 4,,15 | |||
.L64: | |||
lxv 0,0(4) | |||
xxspltib 47,16 | |||
addis 6,2,.LC2@toc@ha | |||
addis 7,2,.LC3@toc@ha | |||
addis 10,2,.LC4@toc@ha | |||
addis 9,2,.LC5@toc@ha | |||
xxspltib 63,32 | |||
xxspltib 46,64 | |||
addi 6,6,.LC2@toc@l | |||
addi 10,10,.LC4@toc@l | |||
addi 7,7,.LC3@toc@l | |||
std 31,-8(1) | |||
addi 9,9,.LC5@toc@l | |||
xxspltib 50,0 | |||
vextsb2w 15,15 | |||
lxv 48,0(6) | |||
lxv 51,0(10) | |||
vextsb2w 31,31 | |||
vextsb2w 14,14 | |||
xvabssp 4,0 | |||
lxv 34,0(9) | |||
lxv 49,0(7) | |||
mr 9,4 | |||
li 10,0 | |||
xxlor 35,48,48 | |||
xxlor 40,4,4 | |||
b .L6 | |||
.p2align 4,,15 | |||
.L65: | |||
lxv 0,0(9) | |||
xvabssp 40,0 | |||
.L6: | |||
lxv 0,16(9) | |||
vadduwm 29,18,31 | |||
lxv 12,240(9) | |||
addi 10,10,64 | |||
addi 9,9,256 | |||
cmpd 7,8,10 | |||
xvabssp 5,0 | |||
lxv 0,-224(9) | |||
xvabssp 12,12 | |||
xvabssp 32,0 | |||
lxv 0,-208(9) | |||
xvcmpgtsp 42,40,5 | |||
xvabssp 9,0 | |||
lxv 0,-192(9) | |||
xxsel 5,40,5,42 | |||
xvabssp 44,0 | |||
lxv 0,-176(9) | |||
xvcmpgtsp 62,32,9 | |||
xvabssp 6,0 | |||
lxv 0,-160(9) | |||
xxsel 9,32,9,62 | |||
xxsel 32,35,49,42 | |||
xvabssp 1,0 | |||
lxv 0,-144(9) | |||
xxsel 62,51,34,62 | |||
xvcmpgtsp 42,5,9 | |||
xvcmpgtsp 37,44,6 | |||
xvabssp 11,0 | |||
lxv 0,-128(9) | |||
xxsel 9,5,9,42 | |||
xxsel 42,32,62,42 | |||
xxsel 6,44,6,37 | |||
xxsel 37,35,49,37 | |||
xvabssp 13,0 | |||
lxv 0,-112(9) | |||
xvcmpgtsp 36,1,11 | |||
xvabssp 7,0 | |||
lxv 0,-96(9) | |||
xxsel 11,1,11,36 | |||
xxsel 36,51,34,36 | |||
xvabssp 2,0 | |||
lxv 0,-80(9) | |||
xvcmpgtsp 45,6,11 | |||
xvcmpgtsp 39,13,7 | |||
xvabssp 10,0 | |||
lxv 0,-64(9) | |||
xxsel 7,13,7,39 | |||
xxsel 39,35,49,39 | |||
xvabssp 3,0 | |||
lxv 0,-48(9) | |||
xvcmpgtsp 38,2,10 | |||
xvabssp 8,0 | |||
lxv 0,-32(9) | |||
xxsel 10,2,10,38 | |||
xxsel 38,51,34,38 | |||
xvabssp 0,0 | |||
xvcmpgtsp 43,7,10 | |||
xvcmpgtsp 41,3,8 | |||
xvcmpgtsp 33,0,12 | |||
xxsel 8,3,8,41 | |||
xxsel 41,35,49,41 | |||
xxsel 0,0,12,33 | |||
xxsel 40,51,34,33 | |||
xxsel 12,6,11,45 | |||
xxsel 11,7,10,43 | |||
xvcmpgtsp 33,8,0 | |||
xxsel 45,37,36,45 | |||
xvcmpgtsp 32,9,12 | |||
xxsel 43,39,38,43 | |||
vadduwm 13,13,15 | |||
xxsel 0,8,0,33 | |||
xxsel 33,41,40,33 | |||
xxsel 12,9,12,32 | |||
xxsel 32,42,45,32 | |||
xvcmpgtsp 44,11,0 | |||
vadduwm 1,1,15 | |||
vadduwm 0,18,0 | |||
vadduwm 18,18,14 | |||
xxsel 0,11,0,44 | |||
xxsel 33,43,33,44 | |||
xvcmpgtsp 45,12,0 | |||
vadduwm 1,29,1 | |||
xxsel 0,12,0,45 | |||
xxsel 32,32,33,45 | |||
xvcmpgtsp 33,4,0 | |||
xxsel 48,48,32,33 | |||
xxsel 4,4,0,33 | |||
bgt 7,.L65 | |||
xxsldwi 0,4,4,3 | |||
xxsldwi 11,4,4,2 | |||
li 9,0 | |||
li 10,12 | |||
xxsldwi 12,4,4,1 | |||
xscvspdp 4,4 | |||
vextuwrx 3,9,16 | |||
li 9,4 | |||
xscvspdp 0,0 | |||
xscvspdp 11,11 | |||
xscvspdp 12,12 | |||
vextuwrx 6,9,16 | |||
li 9,8 | |||
vextuwrx 7,9,16 | |||
vextuwrx 9,10,16 | |||
rldicl 31,6,0,32 | |||
rldicl 10,3,0,32 | |||
rldicl 5,7,0,32 | |||
rldicl 0,9,0,32 | |||
fcmpu 7,0,11 | |||
fmr 10,12 | |||
beq 7,.L66 | |||
bng 7,.L9 | |||
mr 10,31 | |||
fmr 0,11 | |||
.L9: | |||
fcmpu 7,12,4 | |||
bne 7,.L12 | |||
cmplw 7,7,9 | |||
ble 7,.L13 | |||
mr 7,9 | |||
.L13: | |||
rldicl 5,7,0,32 | |||
.L14: | |||
fcmpu 7,0,10 | |||
beq 7,.L67 | |||
bng 7,.L19 | |||
mr 10,5 | |||
fmr 0,10 | |||
.L19: | |||
cmpd 7,11,8 | |||
ld 31,-8(1) | |||
bgt 7,.L4 | |||
b .L33 | |||
.p2align 4,,15 | |||
.L66: | |||
cmplw 7,3,6 | |||
ble 7,.L8 | |||
mr 3,6 | |||
.L8: | |||
rldicl 10,3,0,32 | |||
b .L9 | |||
.p2align 4,,15 | |||
.L40: | |||
sldi 0,5,2 | |||
li 10,0 | |||
li 9,0 | |||
b .L23 | |||
.p2align 4,,15 | |||
.L12: | |||
bng 7,.L14 | |||
mr 5,0 | |||
fmr 10,4 | |||
b .L14 | |||
.p2align 4,,15 | |||
.L67: | |||
cmpd 7,10,5 | |||
ble 7,.L19 | |||
mr 10,5 | |||
b .L19 | |||
.L51: | |||
li 9,1 | |||
mtctr 9 | |||
b .L22 | |||
.L52: | |||
li 8,1 | |||
mtctr 8 | |||
b .L35 | |||
.long 0 | |||
.byte 0,0,0,0,0,1,0,0 | |||
.size isamin_k,.-isamin_k | |||
.section .rodata.cst16,"aM",@progbits,16 | |||
.align 4 | |||
.LC2: | |||
.long 0 | |||
.long 1 | |||
.long 2 | |||
.long 3 | |||
.LC3: | |||
.long 4 | |||
.long 5 | |||
.long 6 | |||
.long 7 | |||
.LC4: | |||
.long 8 | |||
.long 9 | |||
.long 10 | |||
.long 11 | |||
.LC5: | |||
.long 12 | |||
.long 13 | |||
.long 14 | |||
.long 15 | |||
.ident "GCC: (SUSE Linux) 7.3.1 20180323 [gcc-7-branch revision 258812]" | |||
.section .note.GNU-stack,"",@progbits |