| @@ -1440,6 +1440,12 @@ | |||||
| .L50: | .L50: | ||||
| movl M, %eax | movl M, %eax | ||||
| movl Y, YY | movl Y, YY | ||||
| //If incx==0 || incy==0, avoid unloop. | |||||
| cmpl $0, INCX | |||||
| je .L56 | |||||
| cmpl $0, INCY | |||||
| je .L56 | |||||
| sarl $3, %eax | sarl $3, %eax | ||||
| jle .L55 | jle .L55 | ||||
| ALIGN_3 | ALIGN_3 | ||||
| @@ -698,6 +698,12 @@ | |||||
| .L40: | .L40: | ||||
| movl Y, YY | movl Y, YY | ||||
| movl M, %eax | movl M, %eax | ||||
| //If incx==0 || incy==0, avoid unloop. | |||||
| cmpl $0, INCX | |||||
| je .L46 | |||||
| cmpl $0, INCY | |||||
| je .L46 | |||||
| sarl $3, %eax | sarl $3, %eax | ||||
| jle .L45 | jle .L45 | ||||
| ALIGN_3 | ALIGN_3 | ||||
| @@ -2857,6 +2857,11 @@ | |||||
| unpcklps ALPHA_I, ALPHA_R | unpcklps ALPHA_I, ALPHA_R | ||||
| unpcklps %xmm5, ALPHA_I | unpcklps %xmm5, ALPHA_I | ||||
| #endif | #endif | ||||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||||
| cmpl $0, INCX | |||||
| je .L200 | |||||
| cmpl $0, INCY | |||||
| je .L200 | |||||
| movl Y, YY | movl Y, YY | ||||
| @@ -3090,8 +3095,41 @@ | |||||
| addps %xmm1, %xmm4 | addps %xmm1, %xmm4 | ||||
| movsd %xmm4, (Y) | movsd %xmm4, (Y) | ||||
| jmp .L999 | |||||
| ALIGN_3 | |||||
| .L200: | |||||
| movl M, %eax | |||||
| cmpl $0, %eax | |||||
| jle .L999 | |||||
| ALIGN_3 | ALIGN_3 | ||||
| .L201: | |||||
| movsd (X), %xmm0 | |||||
| #ifdef HAVE_SSE3 | |||||
| movshdup %xmm0, %xmm1 | |||||
| movsldup %xmm0, %xmm0 | |||||
| #else | |||||
| movaps %xmm0, %xmm1 | |||||
| shufps $0xa0, %xmm0, %xmm0 | |||||
| shufps $0xf5, %xmm1, %xmm1 | |||||
| #endif | |||||
| mulps ALPHA_R, %xmm0 | |||||
| mulps ALPHA_I, %xmm1 | |||||
| movsd (Y), %xmm4 | |||||
| addps %xmm0, %xmm4 | |||||
| addps %xmm1, %xmm4 | |||||
| movsd %xmm4, (Y) | |||||
| decl %eax | |||||
| jg .L201 | |||||
| ALIGN_3 | |||||
| .L999: | .L999: | ||||
| popl %ebp | popl %ebp | ||||
| popl %ebx | popl %ebx | ||||
| @@ -1318,6 +1318,12 @@ | |||||
| movl Y, YY | movl Y, YY | ||||
| movl M, %eax | movl M, %eax | ||||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||||
| cmpl $0, INCX | |||||
| je .L58 | |||||
| cmpl $0, INCY | |||||
| je .L58 | |||||
| sarl $2, %eax | sarl $2, %eax | ||||
| jle .L55 | jle .L55 | ||||
| @@ -1498,6 +1504,7 @@ | |||||
| andl $1, %eax | andl $1, %eax | ||||
| jle .L999 | jle .L999 | ||||
| .L58: | |||||
| MOVDDUP( 0 * SIZE, X, %xmm0) | MOVDDUP( 0 * SIZE, X, %xmm0) | ||||
| MOVDDUP( 1 * SIZE, X, %xmm1) | MOVDDUP( 1 * SIZE, X, %xmm1) | ||||
| @@ -1510,6 +1517,10 @@ | |||||
| movlpd %xmm4, 0 * SIZE(YY) | movlpd %xmm4, 0 * SIZE(YY) | ||||
| movhpd %xmm4, 1 * SIZE(YY) | movhpd %xmm4, 1 * SIZE(YY) | ||||
| decl %eax | |||||
| jg .L58 | |||||
| ALIGN_3 | ALIGN_3 | ||||
| .L999: | .L999: | ||||