@@ -89,17 +89,23 @@ | |||
#endif | |||
#define STACKSIZE 16 | |||
#define M 4 + STACKSIZE(%esp) | |||
#define N 8 + STACKSIZE(%esp) | |||
#define ALPHA 16 + STACKSIZE(%esp) | |||
#define A 20 + STACKSIZE(%esp) | |||
#define STACK_LDA 24 + STACKSIZE(%esp) | |||
#define STACK_X 28 + STACKSIZE(%esp) | |||
#define STACK_INCX 32 + STACKSIZE(%esp) | |||
#define Y 36 + STACKSIZE(%esp) | |||
#define STACK_INCY 40 + STACKSIZE(%esp) | |||
#define BUFFER 44 + STACKSIZE(%esp) | |||
#define ARGS 16 | |||
#define M 4 + STACKSIZE+ARGS(%esp) | |||
#define N 8 + STACKSIZE+ARGS(%esp) | |||
#define ALPHA 16 + STACKSIZE+ARGS(%esp) | |||
#define A 20 + STACKSIZE+ARGS(%esp) | |||
#define STACK_LDA 24 + STACKSIZE+ARGS(%esp) | |||
#define STACK_X 28 + STACKSIZE+ARGS(%esp) | |||
#define STACK_INCX 32 + STACKSIZE+ARGS(%esp) | |||
#define Y 36 + STACKSIZE+ARGS(%esp) | |||
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp) | |||
#define BUFFER 44 + STACKSIZE+ARGS(%esp) | |||
#define MMM 0+STACKSIZE(%esp) | |||
#define NN 4+STACKSIZE(%esp) | |||
#define AA 8+STACKSIZE(%esp) | |||
#define LDAX 12+STACKSIZE(%esp) | |||
#define I %eax | |||
#define J %ebx | |||
@@ -114,6 +120,7 @@ | |||
PROLOGUE | |||
subl $ARGS,%esp | |||
pushl %ebp | |||
pushl %edi | |||
pushl %esi | |||
@@ -122,6 +129,37 @@ | |||
PROFCODE | |||
movl STACK_LDA, LDA | |||
movl LDA,LDAX # backup LDA | |||
movl N,J | |||
movl J,NN # backup N | |||
movl A,J | |||
movl J,AA # backup A | |||
movl M,J | |||
movl J,MMM # mov M to MMM | |||
.L0t: | |||
xorl J,J | |||
addl $1,J | |||
sall $23,J # J=2^22 | |||
subl J,MMM # MMM=MMM-J | |||
movl J,M | |||
jge .L00t | |||
ALIGN_4 | |||
movl MMM,%eax | |||
addl J,%eax | |||
jle .L999x | |||
movl %eax,M | |||
.L00t: | |||
movl AA,%eax | |||
movl %eax,A # mov AA to A | |||
movl NN,%eax | |||
movl %eax,N # reset N | |||
movl LDAX, LDA # reset LDA | |||
movl STACK_X, X | |||
movl STACK_INCX, INCX | |||
movl STACK_INCY, INCY | |||
@@ -628,10 +666,19 @@ | |||
ALIGN_4 | |||
.L999: | |||
movl M,J | |||
leal (,J,SIZE),%eax | |||
addl %eax,AA | |||
jmp .L0t | |||
ALIGN_4 | |||
.L999x: | |||
popl %ebx | |||
popl %esi | |||
popl %edi | |||
popl %ebp | |||
addl $ARGS,%esp | |||
ret | |||
EPILOGUE |
@@ -76,18 +76,24 @@ | |||
#endif | |||
#define STACKSIZE 16 | |||
#define ARGS 16 | |||
#define M 4 + STACKSIZE+ARGS(%esp) | |||
#define N 8 + STACKSIZE+ARGS(%esp) | |||
#define ALPHA 16 + STACKSIZE+ARGS(%esp) | |||
#define A 24 + STACKSIZE+ARGS(%esp) | |||
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp) | |||
#define STACK_X 32 + STACKSIZE+ARGS(%esp) | |||
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp) | |||
#define Y 40 + STACKSIZE+ARGS(%esp) | |||
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp) | |||
#define BUFFER 48 + STACKSIZE+ARGS(%esp) | |||
#define MMM 0+STACKSIZE(%esp) | |||
#define AA 4+STACKSIZE(%esp) | |||
#define LDAX 8+STACKSIZE(%esp) | |||
#define NN 12+STACKSIZE(%esp) | |||
#define M 4 + STACKSIZE(%esp) | |||
#define N 8 + STACKSIZE(%esp) | |||
#define ALPHA 16 + STACKSIZE(%esp) | |||
#define A 24 + STACKSIZE(%esp) | |||
#define STACK_LDA 28 + STACKSIZE(%esp) | |||
#define STACK_X 32 + STACKSIZE(%esp) | |||
#define STACK_INCX 36 + STACKSIZE(%esp) | |||
#define Y 40 + STACKSIZE(%esp) | |||
#define STACK_INCY 44 + STACKSIZE(%esp) | |||
#define BUFFER 48 + STACKSIZE(%esp) | |||
#define I %eax | |||
#define J %ebx | |||
@@ -101,6 +107,8 @@ | |||
PROLOGUE | |||
subl $ARGS,%esp | |||
pushl %ebp | |||
pushl %edi | |||
pushl %esi | |||
@@ -108,7 +116,38 @@ | |||
PROFCODE | |||
movl STACK_LDA, LDA | |||
movl LDA,LDAX # backup LDA | |||
movl N,J | |||
movl J,NN # backup N | |||
movl A,J | |||
movl J,AA # backup A | |||
movl M,J | |||
movl J,MMM # mov M to MMM | |||
.L0t: | |||
xorl J,J | |||
addl $1,J | |||
sall $22,J # J=2^22 | |||
subl J,MMM # MMM=MMM-J | |||
movl J,M | |||
jge .L00t | |||
ALIGN_4 | |||
movl MMM,%eax | |||
addl J,%eax | |||
jle .L999x | |||
movl %eax,M | |||
.L00t: | |||
movl AA,%eax | |||
movl %eax,A # mov AA to A | |||
movl NN,%eax | |||
movl %eax,N # reset N | |||
movl LDAX, LDA # reset LDA | |||
movl STACK_X, X | |||
movl STACK_INCX, INCX | |||
movl STACK_INCY, INCY | |||
@@ -117,6 +156,7 @@ | |||
leal (,INCY, SIZE), INCY | |||
leal (,LDA, SIZE), LDA | |||
subl $-16 * SIZE, A | |||
cmpl $0, N | |||
@@ -560,10 +600,19 @@ | |||
ALIGN_4 | |||
.L999: | |||
movl M,J | |||
leal (,J,SIZE),%eax | |||
addl %eax,AA | |||
jmp .L0t | |||
ALIGN_4 | |||
.L999x: | |||
popl %ebx | |||
popl %esi | |||
popl %edi | |||
popl %ebp | |||
addl $ARGS,%esp | |||
ret | |||
EPILOGUE |
@@ -47,7 +47,7 @@ | |||
#ifndef WINDOWS_ABI | |||
#define STACKSIZE 64 | |||
#define STACKSIZE 128 | |||
#define OLD_M %rdi | |||
#define OLD_N %rsi | |||
@@ -57,6 +57,10 @@ | |||
#define STACK_Y 16 + STACKSIZE(%rsp) | |||
#define STACK_INCY 24 + STACKSIZE(%rsp) | |||
#define STACK_BUFFER 32 + STACKSIZE(%rsp) | |||
#define MMM 56(%rsp) | |||
#define NN 64(%rsp) | |||
#define AA 72(%rsp) | |||
#define LDAX 80(%rsp) | |||
#else | |||
@@ -71,6 +75,10 @@ | |||
#define STACK_Y 72 + STACKSIZE(%rsp) | |||
#define STACK_INCY 80 + STACKSIZE(%rsp) | |||
#define STACK_BUFFER 88 + STACKSIZE(%rsp) | |||
#defien MMM 216(%rsp) | |||
#defien NN 224(%rsp) | |||
#define AA 232(%rsp) | |||
#define LDAX 240(%rsp) | |||
#endif | |||
@@ -127,29 +135,46 @@ | |||
movups %xmm14, 192(%rsp) | |||
movups %xmm15, 208(%rsp) | |||
movq OLD_M, M | |||
movq OLD_N, N | |||
movq OLD_A, A | |||
movq OLD_LDA, LDA | |||
movq OLD_M, MMM | |||
movq OLD_N, NN | |||
movq OLD_A, AA | |||
movq OLD_LDA, LDAX | |||
movq OLD_X, X | |||
#else | |||
movq OLD_M, M | |||
movq OLD_N, N | |||
movq OLD_A, A | |||
movq OLD_LDA, LDA | |||
movq OLD_M, MMM | |||
movq OLD_N, NN | |||
movq OLD_A, AA | |||
movq OLD_LDA, LDAX | |||
#endif | |||
movq STACK_INCX, INCX | |||
movq STACK_Y, Y | |||
movq STACK_INCY, INCY | |||
movq STACK_BUFFER, BUFFER | |||
#ifndef WINDOWS_ABI | |||
pshufd $0, %xmm0, ALPHA | |||
#else | |||
pshufd $0, %xmm3, ALPHA | |||
#endif | |||
.L0t: | |||
xorq M,M | |||
addq $1,M | |||
salq $22,M | |||
subq M,MMM | |||
jge .L00t | |||
ALIGN_4 | |||
movq MMM,%rax | |||
addq M,%rax | |||
jle .L999x | |||
movq %rax,M | |||
.L00t: | |||
movq LDAX,LDA | |||
movq NN,N | |||
movq AA,A | |||
movq STACK_INCX, INCX | |||
movq STACK_Y, Y | |||
movq STACK_INCY, INCY | |||
movq STACK_BUFFER, BUFFER | |||
leaq (,INCX, SIZE), INCX | |||
leaq (,INCY, SIZE), INCY | |||
leaq (,LDA, SIZE), LDA | |||
@@ -6341,6 +6366,12 @@ | |||
ALIGN_4 | |||
.L999: | |||
leaq (,M,SIZE),%rax | |||
addq %rax,AA | |||
jmp .L0t | |||
ALIGN_4 | |||
.L999x: | |||
movq 0(%rsp), %rbx | |||
movq 8(%rsp), %rbp | |||
movq 16(%rsp), %r12 | |||