Browse Source

Merge pull request #5078 from XiWeiGu/la64_fixed_cscal_zscal

LoongArch64: fixed cscal and zscal
tags/v0.3.30
Martin Kroeker GitHub 3 months ago
parent
commit
2e2691b34b
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
3 changed files with 66 additions and 164 deletions
  1. +5
    -4
      kernel/loongarch64/cscal_lasx.S
  2. +58
    -160
      kernel/loongarch64/cscal_lsx.S
  3. +3
    -0
      kernel/loongarch64/zscal.S

+ 5
- 4
kernel/loongarch64/cscal_lasx.S View File

@@ -94,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
CMPEQ $fcc1, ALPHAI, a1
bge $r0, I, .L19
/////// INCX == 1 && N >= 4 ////////
bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L17 // if DUMMY2 == 1, called from c/zscal.

bceqz $fcc0, .L17

@@ -146,6 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d I, I, -1
blt $r0, I, .L17
b .L19

.align 3

/////// INCX == 1 && N < 8 ///////
@@ -156,7 +157,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
andi I, N, 7
#endif
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal.

bceqz $fcc0, .L998

@@ -171,7 +172,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
CMPEQ $fcc1, ALPHAI, a1
move XX, X
bge $r0, I, .L29
bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L25 // if DUMMY2 == 1, called from c/zscal.
bceqz $fcc0, .L25

bceqz $fcc1, .L25
@@ -341,7 +342,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
andi I, N, 7
#endif
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal.

bceqz $fcc0, .L998



+ 58
- 160
kernel/loongarch64/cscal_lsx.S View File

@@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHAI $f1
#define X $r7
#define INCX $r8
#define DUMMY2 $r9

#define I $r12
#define TEMP $r13
@@ -65,6 +66,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

bge $r0, N, .L999
bge $r0, INCX, .L999
ld.d DUMMY2, $sp, 0
li.d TEMP, 1
movgr2fr.d a1, $r0
FFINT a1, a1
@@ -84,24 +86,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
srai.d I, N, 2
bne INCX, TEMP, .L22

/////// INCX == 1 ////////
.L11:
bge $r0, I, .L997
CMPEQ $fcc0, ALPHAR, a1
CMPEQ $fcc1, ALPHAI, a1
bceqz $fcc0, .L13
b .L14
.align 3
bge $r0, I, .L19

.L13:
bceqz $fcc1, .L114 //alpha_r != 0.0 && alpha_i != 0.0
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
/////// INCX == 1 && N >= 4 ////////
bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal.

.L14:
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
.align 3
bceqz $fcc0, .L17

.L111: //alpha_r == 0.0 && alpha_i == 0.0
bceqz $fcc1, .L17

.L15: //alpha_r == 0.0 && alpha_i == 0.0
vst VXZ, X, 0 * SIZE
#ifdef DOUBLE
vst VXZ, X, 2 * SIZE
@@ -112,50 +110,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L111
b .L997
.align 3

.L113: //alpha_r != 0.0 && alpha_i == 0.0
vld VX0, X, 0 * SIZE
#ifdef DOUBLE
vld VX1, X, 2 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
vilvl.d VX2, x4 ,x3
vilvh.d VX3, x4, x3
vst VX2, X, 0 * SIZE
vst VX3, X, 2 * SIZE
vld VX0, X, 4 * SIZE
vld VX1, X, 6 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
vilvl.d VX2, x4 ,x3
vilvh.d VX3, x4, x3
vst VX2, X, 4 * SIZE
vst VX3, X, 6 * SIZE
#else
vld VX1, X, 4 * SIZE
vpickev.w x1, VX1, VX0
vpickod.w x2, VX1, VX0
vfmul.s x3, VXAR, x1
vfmul.s x4, VXAR, x2
vilvl.w VX2, x4 ,x3
vilvh.w VX3, x4, x3
vst VX2, X, 0 * SIZE
vst VX3, X, 4 * SIZE
#endif
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L113
b .L997
blt $r0, I, .L15
b .L19
.align 3

.L114: //alpha_r != 0.0 && alpha_i != 0.0
.L17:
vld VX0, X, 0 * SIZE
#ifdef DOUBLE
vld VX1, X, 2 * SIZE
@@ -196,29 +155,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L114
b .L997
blt $r0, I, .L17
b .L19
.align 3

/////// INCX == 1 && N < 8 ///////
.L19:
andi I, N, 3
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.

bceqz $fcc0, .L998

bceqz $fcc1, .L998

b .L995 // alpha_r == 0.0 && alpha_i == 0.0

/////// INCX != 1 ////////
.L22:
bge $r0, I, .L997
move XX, X
CMPEQ $fcc0, ALPHAR, a1
CMPEQ $fcc1, ALPHAI, a1
bceqz $fcc0, .L23
b .L24
.align 3
move XX, X
bge $r0, I, .L29
bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal.

.L23:
bceqz $fcc1, .L224 //alpha_r != 0.0 && alpha_i != 0.0
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
bceqz $fcc0, .L25

.L24:
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
.align 3
bceqz $fcc1, .L25

.L221: //alpha_r == 0.0 && alpha_i == 0.0
.L27: //alpha_r == 0.0 && alpha_i == 0.0
#ifdef DOUBLE
vstelm.d VXZ, X, 0, 0
vstelm.d VXZ, X, 1 * SIZE, 0
@@ -246,92 +211,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d X, X, INCX
addi.d I, I, -1
blt $r0, I, .L221
b .L997
blt $r0, I, .L27
b .L29
.align 3

.L223: //alpha_r != 0.0 && alpha_i == 0.0
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
add.d X, X, INCX
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
vstelm.d x3, XX, 0 * SIZE, 0
vstelm.d x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.d x3, XX, 0 * SIZE, 1
vstelm.d x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX

ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d X, X, INCX
ld.d t3, X, 0 * SIZE
ld.d t4, X, 1 * SIZE
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
add.d X, X, INCX
vfmul.d x3, VXAR, x1
vfmul.d x4, VXAR, x2
addi.d I, I, -1
vstelm.d x3, XX, 0 * SIZE, 0
vstelm.d x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.d x3, XX, 0 * SIZE, 1
vstelm.d x4, XX, 1 * SIZE, 1
#else
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
add.d X, X, INCX
vinsgr2vr.w x1, t1, 0
vinsgr2vr.w x2, t2, 0
vinsgr2vr.w x1, t3, 1
vinsgr2vr.w x2, t4, 1
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d X, X, INCX
ld.w t3, X, 0 * SIZE
ld.w t4, X, 1 * SIZE
vinsgr2vr.w x1, t1, 2
vinsgr2vr.w x2, t2, 2
vinsgr2vr.w x1, t3, 3
vinsgr2vr.w x2, t4, 3
add.d X, X, INCX

vfmul.s x3, VXAR, x1
vfmul.s x4, VXAR, x2
addi.d I, I, -1
vstelm.w x3, XX, 0 * SIZE, 0
vstelm.w x4, XX, 1 * SIZE, 0
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 1
vstelm.w x4, XX, 1 * SIZE, 1
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 2
vstelm.w x4, XX, 1 * SIZE, 2
add.d XX, XX, INCX
vstelm.w x3, XX, 0 * SIZE, 3
vstelm.w x4, XX, 1 * SIZE, 3
#endif
add.d XX, XX, INCX
blt $r0, I, .L223
b .L997
.align 3

.L224: //alpha_r != 0.0 && alpha_i != 0.0
.L25:
#ifdef DOUBLE
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
@@ -414,15 +298,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstelm.w x4, XX, 1 * SIZE, 3
#endif
add.d XX, XX, INCX
blt $r0, I, .L224
b .L997
blt $r0, I, .L25
b .L29
.align 3

.L997:
andi I, N, 3
bge $r0, I, .L999
.align 3
/////// INCX != 1 && N < 8 ///////
.L29:
andi I, N, 3
beqz I, .L999
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.

bceqz $fcc0, .L998

bceqz $fcc1, .L998

b .L995 // alpha_r == 0.0 && alpha_i == 0.0

.L995: // alpha_r == 0.0 && alpha_i == 0.0
ST a1, X, 0 * SIZE
ST a1, X, 1 * SIZE
addi.d I, I, -1
add.d X, X, INCX
blt $r0, I, .L995
b .L999
.L998:
LD a1, X, 0 * SIZE
LD a2, X, 1 * SIZE
@@ -435,7 +333,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ST s2, X, 1 * SIZE
add.d X, X, INCX
blt $r0, I, .L998
.align 3
b .L999

.L999:
move $r4, $r12


+ 3
- 0
kernel/loongarch64/zscal.S View File

@@ -53,6 +53,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE

li.d TEMP, 2 * SIZE
ld.d XX, $sp, 0 // Load dummy2
slli.d XX, XX, ZBASE_SHIFT
MTC a1, $r0
slli.d INCX, INCX, ZBASE_SHIFT
bge $r0, N, .L999
@@ -60,6 +62,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
CMPEQ $fcc1, ALPHA_I, a1
bceqz $fcc0, .L50
bceqz $fcc1, .L50
beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0
srai.d I, N, 2
bne INCX, TEMP, .L20
bge $r0, I, .L15


Loading…
Cancel
Save