LoongArch: Fixed issue 4728tags/v0.3.28^2
@@ -56,80 +56,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
MTC a1, $r0 | MTC a1, $r0 | ||||
slli.d INCX, INCX, BASE_SHIFT | slli.d INCX, INCX, BASE_SHIFT | ||||
bge $r0, N, .L999 | bge $r0, N, .L999 | ||||
CMPEQ $fcc0, ALPHA, a1 | |||||
bceqz $fcc0, .L50 | |||||
srai.d I, N, 3 | |||||
bne INCX, TEMP, .L20 | |||||
bge $r0, I, .L15 | |||||
.align 3 | |||||
.L12: | |||||
ST a1, X, 0 * SIZE | |||||
ST a1, X, 1 * SIZE | |||||
ST a1, X, 2 * SIZE | |||||
ST a1, X, 3 * SIZE | |||||
ST a1, X, 4 * SIZE | |||||
ST a1, X, 5 * SIZE | |||||
ST a1, X, 6 * SIZE | |||||
ST a1, X, 7 * SIZE | |||||
addi.w I, I, -1 | |||||
addi.d X, X, 8 * SIZE | |||||
blt $r0, I, .L12 | |||||
.align 3 | |||||
.L15: | |||||
andi I, N, 7 | |||||
bge $r0, I, .L999 | |||||
.align 3 | |||||
.L16: | |||||
ST a1, X, 0 * SIZE | |||||
addi.d I, I, -1 | |||||
addi.d X, X, SIZE | |||||
blt $r0, I, .L16 | |||||
move $r4, $r17 | |||||
fmov.d $f0, $f22 | |||||
jirl $r0, $r1, 0x0 | |||||
.align 3 | |||||
.L20: | |||||
srai.d I, N, 3 | |||||
bge $r0, I, .L25 | |||||
.align 3 | |||||
.L22: | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 * SIZE | |||||
addi.d I, I, -1 | |||||
add.d X, X, INCX | |||||
blt $r0, I, .L22 | |||||
.align 3 | |||||
.L25: | |||||
andi I, N, 7 | |||||
bge $r0, I, .L999 | |||||
.align 3 | |||||
.L26: | |||||
addi.d I, I, -1 | |||||
ST a1, X, 0 * SIZE | |||||
add.d X, X, INCX | |||||
blt $r0, I, .L26 | |||||
move $r4, $r17 | |||||
fmov.d $f0, $f22 | |||||
jirl $r0, $r1, 0x0 | |||||
.align 3 | |||||
.L50: | .L50: | ||||
srai.d I, N, 3 | srai.d I, N, 3 | ||||
@@ -58,12 +58,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
FFINT a2, a2 | FFINT a2, a2 | ||||
slli.d TEMP, TEMP, BASE_SHIFT | slli.d TEMP, TEMP, BASE_SHIFT | ||||
slli.d INCX, INCX, BASE_SHIFT | slli.d INCX, INCX, BASE_SHIFT | ||||
CMPEQ $fcc0, ALPHA, a1 | |||||
bcnez $fcc0, .L20 //ALPHA==0 | |||||
CMPEQ $fcc0, ALPHA, a2 | CMPEQ $fcc0, ALPHA, a2 | ||||
bcnez $fcc0, .L999 //ALPHA==1 return | bcnez $fcc0, .L999 //ALPHA==1 return | ||||
srai.d I, N, 3 | srai.d I, N, 3 | ||||
beq INCX, TEMP, .L30 //ALPHA!=0|1 and INCX==1 | |||||
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 | |||||
MTG TEMP, ALPHA | MTG TEMP, ALPHA | ||||
#ifdef DOUBLE | #ifdef DOUBLE | ||||
xvreplgr2vr.d VALPHA, TEMP | xvreplgr2vr.d VALPHA, TEMP | ||||
@@ -73,7 +72,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
move XX, X | move XX, X | ||||
.align 3 | .align 3 | ||||
.L10: //ALPHA!=0|1 and INCX!=1 | |||||
.L10: //ALPHA!=1 and INCX!=1 | |||||
bge $r0, I, .L32 | bge $r0, I, .L32 | ||||
.align 3 | .align 3 | ||||
.L11: | .L11: | ||||
@@ -166,74 +165,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
blt $r0, I, .L11 | blt $r0, I, .L11 | ||||
b .L32 | b .L32 | ||||
.align 3 | .align 3 | ||||
.L20: | |||||
srai.d I, N, 3 | |||||
beq INCX, TEMP, .L24 | |||||
bge $r0, I, .L22 | |||||
.align 3 | |||||
.L21: | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
addi.d I, I, -1 | |||||
blt $r0, I, .L21 | |||||
.align 3 | |||||
.L22: | |||||
andi I, N, 7 | |||||
bge $r0, I, .L999 | |||||
.align 3 | |||||
.L23: | |||||
ST a1, X, 0 * SIZE | |||||
addi.d I, I, -1 | |||||
add.d X, X, INCX | |||||
blt $r0, I, .L23 | |||||
jirl $r0, $r1, 0 | |||||
.align 3 | |||||
.L24: | |||||
bge $r0, I, .L26 /*N<8 INCX==1*/ | |||||
.align 3 | |||||
.L25: | |||||
xvxor.v VX0, VX0, VX0 | |||||
xvst VX0, X, 0 * SIZE | |||||
#ifdef DOUBLE | |||||
xvst VX0, X, 4 * SIZE | |||||
#endif | |||||
addi.d I, I, -1 | |||||
addi.d X, X, 8 * SIZE | |||||
blt $r0, I, .L25 | |||||
.align 3 | |||||
.L26: | |||||
andi I, N, 7 | |||||
bge $r0, I, .L999 | |||||
.align 3 | |||||
.L27: | |||||
ST a1, X, 0 * SIZE | |||||
addi.d I, I, -1 | |||||
addi.d X, X, SIZE | |||||
blt $r0, I, .L27 | |||||
jirl $r0, $r1, 0 | |||||
.align 3 | |||||
.L30: | .L30: | ||||
bge $r0, I, .L32/*N<8 INCX==1*/ | bge $r0, I, .L32/*N<8 INCX==1*/ | ||||
MTG TEMP, ALPHA | MTG TEMP, ALPHA | ||||
@@ -58,12 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
FFINT a2, a2 | FFINT a2, a2 | ||||
slli.d TEMP, TEMP, BASE_SHIFT | slli.d TEMP, TEMP, BASE_SHIFT | ||||
slli.d INCX, INCX, BASE_SHIFT | slli.d INCX, INCX, BASE_SHIFT | ||||
CMPEQ $fcc0, ALPHA, a1 | |||||
bcnez $fcc0, .L20 //ALPHA==0 | |||||
CMPEQ $fcc0, ALPHA, a2 | CMPEQ $fcc0, ALPHA, a2 | ||||
bcnez $fcc0, .L999 //ALPHA==1 return | bcnez $fcc0, .L999 //ALPHA==1 return | ||||
srai.d I, N, 3 | srai.d I, N, 3 | ||||
beq INCX, TEMP, .L30 //ALPHA!=0|1 and INCX==1 | |||||
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 | |||||
MTG TEMP, ALPHA | MTG TEMP, ALPHA | ||||
#ifdef DOUBLE | #ifdef DOUBLE | ||||
vreplgr2vr.d VALPHA, TEMP | vreplgr2vr.d VALPHA, TEMP | ||||
@@ -73,7 +71,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
move XX, X | move XX, X | ||||
.align 3 | .align 3 | ||||
.L10: //ALPHA!=0|1 and INCX!=1 | |||||
.L10: //ALPHA!=1 and INCX!=1 | |||||
bge $r0, I, .L32 | bge $r0, I, .L32 | ||||
.align 3 | .align 3 | ||||
@@ -171,78 +169,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
b .L32 | b .L32 | ||||
.align 3 | .align 3 | ||||
.L20: | |||||
srai.d I, N, 3 | |||||
beq INCX, TEMP, .L24 | |||||
bge $r0, I, .L22 | |||||
.align 3 | |||||
.L21: | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
ST a1, X, 0 | |||||
add.d X, X, INCX | |||||
addi.d I, I, -1 | |||||
blt $r0, I, .L21 | |||||
.align 3 | |||||
.L22: | |||||
andi I, N, 7 | |||||
bge $r0, I, .L999 | |||||
.align 3 | |||||
.L23: | |||||
ST a1, X, 0 * SIZE | |||||
addi.d I, I, -1 | |||||
add.d X, X, INCX | |||||
blt $r0, I, .L23 | |||||
jirl $r0, $r1, 0 | |||||
.align 3 | |||||
.L24: | |||||
bge $r0, I, .L26 /*N<8 INCX==1*/ | |||||
.align 3 | |||||
.L25: | |||||
vxor.v VX0, VX0, VX0 | |||||
vst VX0, X, 0 * SIZE | |||||
#ifdef DOUBLE | |||||
vst VX0, X, 2 * SIZE | |||||
vst VX0, X, 4 * SIZE | |||||
vst VX0, X, 6 * SIZE | |||||
#else | |||||
vst VX0, X, 4 * SIZE | |||||
#endif | |||||
addi.d I, I, -1 | |||||
addi.d X, X, 8 * SIZE | |||||
blt $r0, I, .L25 | |||||
.align 3 | |||||
.L26: | |||||
andi I, N, 7 | |||||
bge $r0, I, .L999 | |||||
.align 3 | |||||
.L27: | |||||
ST a1, X, 0 * SIZE | |||||
addi.d I, I, -1 | |||||
addi.d X, X, SIZE | |||||
blt $r0, I, .L27 | |||||
jirl $r0, $r1, 0 | |||||
.align 3 | |||||
.L30: | .L30: | ||||
bge $r0, I, .L32/*N<8 INCX==1*/ | bge $r0, I, .L32/*N<8 INCX==1*/ | ||||
MTG TEMP, ALPHA | MTG TEMP, ALPHA | ||||