@@ -60,8 +60,10 @@ | |||||
#ifdef WINDOWS_ABI | #ifdef WINDOWS_ABI | ||||
movq 40(%rsp), X | movq 40(%rsp), X | ||||
movq 48(%rsp), INCX | movq 48(%rsp), INCX | ||||
movq 64(%rsp), %r9 | |||||
movaps %xmm3, %xmm0 | movaps %xmm3, %xmm0 | ||||
#else | |||||
movq 24(%rsp), %r9 | |||||
#endif | #endif | ||||
SAVEREGISTERS | SAVEREGISTERS | ||||
@@ -76,6 +78,8 @@ | |||||
shufps $0, %xmm0, %xmm0 | shufps $0, %xmm0, %xmm0 | ||||
jne .L100 # Alpha != ZERO | jne .L100 # Alpha != ZERO | ||||
cmpq $1, %r9 | |||||
je .L100 | je .L100 | ||||
/* Alpha == ZERO */ | /* Alpha == ZERO */ | ||||
cmpq $SIZE, INCX | cmpq $SIZE, INCX | ||||
@@ -48,6 +48,7 @@ | |||||
#define X ARG2 | #define X ARG2 | ||||
#define INCX ARG3 | #define INCX ARG3 | ||||
#endif | #endif | ||||
#define FLAG %r9 | |||||
#define XX %r10 | #define XX %r10 | ||||
#define I %rax | #define I %rax | ||||
@@ -60,8 +61,10 @@ | |||||
#ifdef WINDOWS_ABI | #ifdef WINDOWS_ABI | ||||
movq 40(%rsp), X | movq 40(%rsp), X | ||||
movq 48(%rsp), INCX | movq 48(%rsp), INCX | ||||
movq 64(%rsp), FLAG | |||||
movaps %xmm3, %xmm0 | movaps %xmm3, %xmm0 | ||||
#else | |||||
movq 24(%rsp), FLAG | |||||
#endif | #endif | ||||
SAVEREGISTERS | SAVEREGISTERS | ||||
@@ -75,6 +78,8 @@ | |||||
comisd %xmm0, %xmm1 | comisd %xmm0, %xmm1 | ||||
jne .L100 # Alpha != ZERO | jne .L100 # Alpha != ZERO | ||||
jp .L100 # For Alpha = NaN | jp .L100 # For Alpha = NaN | ||||
cmpq $1, FLAG | |||||
je .L100 # disable the Alpha=zero path as it does not handle x=inf or nan | je .L100 # disable the Alpha=zero path as it does not handle x=inf or nan | ||||
/* Alpha == ZERO */ | /* Alpha == ZERO */ | ||||
cmpq $SIZE, INCX | cmpq $SIZE, INCX | ||||