Browse Source

Loongarch64: fixed cnrm2_lasx

tags/v0.3.30
pengxu 5 months ago
parent
commit
d49319c2d2
1 changed files with 53 additions and 25 deletions
  1. +53
    -25
      kernel/loongarch64/cnrm2_lasx.S

+ 53
- 25
kernel/loongarch64/cnrm2_lasx.S View File

@@ -47,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VX4 $xr21
#define res1 $xr19
#define res2 $xr20
#define RCP $f2
#define VALPHA $xr3

PROLOGUE

@@ -55,10 +57,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDINT INCX, 0(INCX)
#endif

xvxor.v res1, res1, res1
xvxor.v res2, res2, res2
bge $r0, N, .L999
beq $r0, INCX, .L999

addi.d $sp, $sp, -32
st.d $ra, $sp, 0
st.d N, $sp, 8
st.d X, $sp, 16
st.d INCX, $sp, 24
#ifdef DYNAMIC_ARCH
bl camax_k_LA264
#else
bl camax_k
#endif
ld.d $ra, $sp, 0
ld.d N, $sp, 8
ld.d X, $sp, 16
ld.d INCX, $sp, 24
addi.d $sp, $sp, 32

frecip.s RCP, $f0
vreplvei.w $vr3, $vr2, 0
xvpermi.d VALPHA, $xr3,0x00
xvxor.v res1, res1, res1
xvxor.v res2, res2, res2
fcmp.ceq.s $fcc0, $f0, $f19
bcnez $fcc0, .L999

li.d TEMP, SIZE
slli.d INCX, INCX, ZBASE_SHIFT
srai.d I, N, 2
@@ -67,13 +92,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 3

.L10:
xvld VX0, X, 0 * SIZE
xvfcvtl.d.s VX1, VX0
xvfcvth.d.s VX2, VX0
xvfmadd.d res1, VX1, VX1, res1
xvfmadd.d res2, VX2, VX2, res2
addi.d I, I, -1
addi.d X, X, 8 * SIZE

xvld VX0, X, 0 * SIZE
xvld VX1, X, 8 * SIZE
xvfmul.s VX0, VX0, VALPHA
xvfmul.s VX1, VX1, VALPHA
xvfmadd.s res1, VX0, VX0, res1
xvfmadd.s res2, VX1, VX1, res2

addi.d X, X, 16 * SIZE
blt $r0, I, .L10
.align 3
b .L996
@@ -103,22 +131,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VX0, t3, 6
xvinsgr2vr.w VX0, t4, 7
add.d X, X, INCX
xvfcvtl.d.s VX1, VX0
xvfcvth.d.s VX2, VX0
xvfmadd.d res1, VX1, VX1, res1
xvfmadd.d res2, VX2, VX2, res2
xvfmul.s VX0, VX0, VALPHA
xvfmadd.s res2, VX0, VX0, res2
addi.d I, I, -1
blt $r0, I, .L21
b .L996

.L996:
xvfadd.d res1, res1, res2
xvpickve.d VX1, res1, 1
xvpickve.d VX2, res1, 2
xvpickve.d VX3, res1, 3
xvfadd.d res1, VX1, res1
xvfadd.d res1, VX2, res1
xvfadd.d res1, VX3, res1
xvfadd.s res1, res1, res2
xvpermi.d VX1, res1, 0x4e
xvfadd.s res1, res1, VX1
vreplvei.w $vr17, $vr19, 1
vreplvei.w $vr18, $vr19, 2
vreplvei.w $vr21, $vr19, 3
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
xvfadd.s res1, VX4, res1
.align 3

.L997:
@@ -130,18 +158,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fld.s a1, X, 0 * SIZE
fld.s a2, X, 1 * SIZE
addi.d I, I, -1
fcvt.d.s a1, a1
fcvt.d.s a2, a2
fmadd.d res, a1, a1, res
fmadd.d res, a2, a2, res
fmul.s a1, a1, RCP
fmul.s a2, a2, RCP
fmadd.s res, a1, a1, res
fmadd.s res, a2, a2, res
add.d X, X, INCX
blt $r0, I, .L998
.align 3

.L999:
fsqrt.d res, res
fsqrt.s res, res
fmul.s $f0, res, $f0
move $r4, $r17
fcvt.s.d $f0, res
jirl $r0, $r1, 0x0

EPILOGUE

Loading…
Cancel
Save