LoongArch: Fixed issue 4728tags/v0.3.28^2
@@ -56,80 +56,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
MTC a1, $r0 | |||
slli.d INCX, INCX, BASE_SHIFT | |||
bge $r0, N, .L999 | |||
CMPEQ $fcc0, ALPHA, a1 | |||
bceqz $fcc0, .L50 | |||
srai.d I, N, 3 | |||
bne INCX, TEMP, .L20 | |||
bge $r0, I, .L15 | |||
.align 3 | |||
.L12: | |||
ST a1, X, 0 * SIZE | |||
ST a1, X, 1 * SIZE | |||
ST a1, X, 2 * SIZE | |||
ST a1, X, 3 * SIZE | |||
ST a1, X, 4 * SIZE | |||
ST a1, X, 5 * SIZE | |||
ST a1, X, 6 * SIZE | |||
ST a1, X, 7 * SIZE | |||
addi.w I, I, -1 | |||
addi.d X, X, 8 * SIZE | |||
blt $r0, I, .L12 | |||
.align 3 | |||
.L15: | |||
andi I, N, 7 | |||
bge $r0, I, .L999 | |||
.align 3 | |||
.L16: | |||
ST a1, X, 0 * SIZE | |||
addi.d I, I, -1 | |||
addi.d X, X, SIZE | |||
blt $r0, I, .L16 | |||
move $r4, $r17 | |||
fmov.d $f0, $f22 | |||
jirl $r0, $r1, 0x0 | |||
.align 3 | |||
.L20: | |||
srai.d I, N, 3 | |||
bge $r0, I, .L25 | |||
.align 3 | |||
.L22: | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
ST a1, X, 0 * SIZE | |||
addi.d I, I, -1 | |||
add.d X, X, INCX | |||
blt $r0, I, .L22 | |||
.align 3 | |||
.L25: | |||
andi I, N, 7 | |||
bge $r0, I, .L999 | |||
.align 3 | |||
.L26: | |||
addi.d I, I, -1 | |||
ST a1, X, 0 * SIZE | |||
add.d X, X, INCX | |||
blt $r0, I, .L26 | |||
move $r4, $r17 | |||
fmov.d $f0, $f22 | |||
jirl $r0, $r1, 0x0 | |||
.align 3 | |||
.L50: | |||
srai.d I, N, 3 | |||
@@ -58,12 +58,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
FFINT a2, a2 | |||
slli.d TEMP, TEMP, BASE_SHIFT | |||
slli.d INCX, INCX, BASE_SHIFT | |||
CMPEQ $fcc0, ALPHA, a1 | |||
bcnez $fcc0, .L20 //ALPHA==0 | |||
CMPEQ $fcc0, ALPHA, a2 | |||
bcnez $fcc0, .L999 //ALPHA==1 return | |||
srai.d I, N, 3 | |||
beq INCX, TEMP, .L30 //ALPHA!=0|1 and INCX==1 | |||
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 | |||
MTG TEMP, ALPHA | |||
#ifdef DOUBLE | |||
xvreplgr2vr.d VALPHA, TEMP | |||
@@ -73,7 +72,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
move XX, X | |||
.align 3 | |||
.L10: //ALPHA!=0|1 and INCX!=1 | |||
.L10: //ALPHA!=1 and INCX!=1 | |||
bge $r0, I, .L32 | |||
.align 3 | |||
.L11: | |||
@@ -166,74 +165,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
blt $r0, I, .L11 | |||
b .L32 | |||
.align 3 | |||
.L20: | |||
srai.d I, N, 3 | |||
beq INCX, TEMP, .L24 | |||
bge $r0, I, .L22 | |||
.align 3 | |||
.L21: | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
addi.d I, I, -1 | |||
blt $r0, I, .L21 | |||
.align 3 | |||
.L22: | |||
andi I, N, 7 | |||
bge $r0, I, .L999 | |||
.align 3 | |||
.L23: | |||
ST a1, X, 0 * SIZE | |||
addi.d I, I, -1 | |||
add.d X, X, INCX | |||
blt $r0, I, .L23 | |||
jirl $r0, $r1, 0 | |||
.align 3 | |||
.L24: | |||
bge $r0, I, .L26 /*N<8 INCX==1*/ | |||
.align 3 | |||
.L25: | |||
xvxor.v VX0, VX0, VX0 | |||
xvst VX0, X, 0 * SIZE | |||
#ifdef DOUBLE | |||
xvst VX0, X, 4 * SIZE | |||
#endif | |||
addi.d I, I, -1 | |||
addi.d X, X, 8 * SIZE | |||
blt $r0, I, .L25 | |||
.align 3 | |||
.L26: | |||
andi I, N, 7 | |||
bge $r0, I, .L999 | |||
.align 3 | |||
.L27: | |||
ST a1, X, 0 * SIZE | |||
addi.d I, I, -1 | |||
addi.d X, X, SIZE | |||
blt $r0, I, .L27 | |||
jirl $r0, $r1, 0 | |||
.align 3 | |||
.L30: | |||
bge $r0, I, .L32/*N<8 INCX==1*/ | |||
MTG TEMP, ALPHA | |||
@@ -58,12 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
FFINT a2, a2 | |||
slli.d TEMP, TEMP, BASE_SHIFT | |||
slli.d INCX, INCX, BASE_SHIFT | |||
CMPEQ $fcc0, ALPHA, a1 | |||
bcnez $fcc0, .L20 //ALPHA==0 | |||
CMPEQ $fcc0, ALPHA, a2 | |||
bcnez $fcc0, .L999 //ALPHA==1 return | |||
srai.d I, N, 3 | |||
beq INCX, TEMP, .L30 //ALPHA!=0|1 and INCX==1 | |||
beq INCX, TEMP, .L30 //ALPHA!=1 and INCX==1 | |||
MTG TEMP, ALPHA | |||
#ifdef DOUBLE | |||
vreplgr2vr.d VALPHA, TEMP | |||
@@ -73,7 +71,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
move XX, X | |||
.align 3 | |||
.L10: //ALPHA!=0|1 and INCX!=1 | |||
.L10: //ALPHA!=1 and INCX!=1 | |||
bge $r0, I, .L32 | |||
.align 3 | |||
@@ -171,78 +169,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
b .L32 | |||
.align 3 | |||
.L20: | |||
srai.d I, N, 3 | |||
beq INCX, TEMP, .L24 | |||
bge $r0, I, .L22 | |||
.align 3 | |||
.L21: | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
ST a1, X, 0 | |||
add.d X, X, INCX | |||
addi.d I, I, -1 | |||
blt $r0, I, .L21 | |||
.align 3 | |||
.L22: | |||
andi I, N, 7 | |||
bge $r0, I, .L999 | |||
.align 3 | |||
.L23: | |||
ST a1, X, 0 * SIZE | |||
addi.d I, I, -1 | |||
add.d X, X, INCX | |||
blt $r0, I, .L23 | |||
jirl $r0, $r1, 0 | |||
.align 3 | |||
.L24: | |||
bge $r0, I, .L26 /*N<8 INCX==1*/ | |||
.align 3 | |||
.L25: | |||
vxor.v VX0, VX0, VX0 | |||
vst VX0, X, 0 * SIZE | |||
#ifdef DOUBLE | |||
vst VX0, X, 2 * SIZE | |||
vst VX0, X, 4 * SIZE | |||
vst VX0, X, 6 * SIZE | |||
#else | |||
vst VX0, X, 4 * SIZE | |||
#endif | |||
addi.d I, I, -1 | |||
addi.d X, X, 8 * SIZE | |||
blt $r0, I, .L25 | |||
.align 3 | |||
.L26: | |||
andi I, N, 7 | |||
bge $r0, I, .L999 | |||
.align 3 | |||
.L27: | |||
ST a1, X, 0 * SIZE | |||
addi.d I, I, -1 | |||
addi.d X, X, SIZE | |||
blt $r0, I, .L27 | |||
jirl $r0, $r1, 0 | |||
.align 3 | |||
.L30: | |||
bge $r0, I, .L32/*N<8 INCX==1*/ | |||
MTG TEMP, ALPHA | |||