From 31d326f8951330ffcf191da987fdac5ffe48665a Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 20 Jan 2025 10:45:20 +0800 Subject: [PATCH] LoongArch64: Fixed dot_lsx.S Fixed incorrect register usage in instructions Signed-off-by: gxw --- kernel/loongarch64/dot_lsx.S | 84 +++++++++++++----------------------- 1 file changed, 29 insertions(+), 55 deletions(-) diff --git a/kernel/loongarch64/dot_lsx.S b/kernel/loongarch64/dot_lsx.S index 8a74d82e7..ecdf8da44 100644 --- a/kernel/loongarch64/dot_lsx.S +++ b/kernel/loongarch64/dot_lsx.S @@ -53,8 +53,8 @@ PROLOGUE #endif /* init $f8 and $f9 to zero */ - SUB s1, s1, s1 - SUB s2, s2, s2 + vxor.v $vr8, $vr8, $vr8 + vxor.v $vr9, $vr9, $vr9 slli.d INCX, INCX, BASE_SHIFT li.d TEMP, SIZE slli.d INCY, INCY, BASE_SHIFT @@ -64,20 +64,6 @@ PROLOGUE /* !((inc_x == 1) && (inc_y == 1)) */ - /* init $vr8 and $vr9 to zero */ -#ifdef DOUBLE - vldrepl.d $vr0, X, 0 -#else - vldrepl.w $vr0, X, 0 -#endif -#ifdef DSDOT - vfcvtl.d.s $vr0, $vr0 - vfsub.d $vr8, $vr0, $vr0 - vfsub.d $vr9, $vr0, $vr0 -#else - VFSUB $vr8, $vr0, $vr0 - VFSUB $vr9, $vr0, $vr0 -#endif #ifdef DOUBLE srai.d I, N, 3 @@ -99,31 +85,31 @@ PROLOGUE addi.w I, I, -1 addi.d X, X, 64 addi.d Y, Y, 64 -#ifdef DSDOT +#ifndef DOUBLE vfcvtl.d.s $vr10, $vr0 vfcvtl.d.s $vr11, $vr4 vfcvth.d.s $vr12, $vr0 vfcvth.d.s $vr13, $vr4 - vfmadd.d $vr8, $vr10, $vr12, $vr8 - vfmadd.d $vr9, $vr11, $vr13, $vr9 + vfmadd.d $vr8, $vr10, $vr11, $vr8 + vfmadd.d $vr9, $vr12, $vr13, $vr9 vfcvtl.d.s $vr10, $vr1 vfcvtl.d.s $vr11, $vr5 vfcvth.d.s $vr12, $vr1 vfcvth.d.s $vr13, $vr5 - vfmadd.d $vr8, $vr10, $vr12, $vr8 - vfmadd.d $vr9, $vr11, $vr13, $vr9 + vfmadd.d $vr8, $vr10, $vr11, $vr8 + vfmadd.d $vr9, $vr12, $vr13, $vr9 vfcvtl.d.s $vr10, $vr2 vfcvtl.d.s $vr11, $vr6 vfcvth.d.s $vr12, $vr2 vfcvth.d.s $vr13, $vr6 - vfmadd.d $vr8, $vr10, $vr12, $vr8 - vfmadd.d $vr9, $vr11, $vr13, $vr9 + vfmadd.d $vr8, $vr10, $vr11, $vr8 + vfmadd.d $vr9, $vr12, $vr13, $vr9 vfcvtl.d.s $vr10, $vr3 vfcvtl.d.s $vr11, $vr7 vfcvth.d.s $vr12, $vr3 vfcvth.d.s $vr13, $vr7 - vfmadd.d $vr8, $vr10, $vr12, $vr8 - vfmadd.d $vr9, $vr11, $vr13, $vr9 + vfmadd.d $vr8, $vr10, $vr11, $vr8 + vfmadd.d $vr9, $vr12, $vr13, $vr9 #else VFMADD $vr8, $vr0, $vr4, $vr8 VFMADD $vr9, $vr1, $vr5, $vr9 @@ -149,13 +135,13 @@ PROLOGUE addi.w I, I, -1 addi.d X, X, 16 addi.d Y, Y, 16 -#ifdef DSDOT +#ifndef DOUBLE vfcvtl.d.s $vr10, $vr0 vfcvtl.d.s $vr11, $vr4 vfcvth.d.s $vr12, $vr0 vfcvth.d.s $vr13, $vr4 - vfmadd.d $vr8, $vr10, $vr12, $vr8 - vfmadd.d $vr9, $vr11, $vr13, $vr9 + vfmadd.d $vr8, $vr10, $vr11, $vr8 + vfmadd.d $vr9, $vr12, $vr13, $vr9 #else VFMADD $vr8, $vr0, $vr4, $vr8 #endif @@ -163,23 +149,10 @@ PROLOGUE .align 3 .L14: /* store dot in s1 $f8 */ -#ifdef DSDOT vfadd.d $vr8, $vr8, $vr9 - fsub.s s2, s2, s2 /* set s2 to 0.0 */ + fsub.d s2, s2, s2 /* set s2 to 0.0 */ vpackod.d $vr0, $vr8, $vr8 vfadd.d $vr8, $vr8, $vr0 -#else - VFADD $vr8, $vr8, $vr9 - SUB s2, s2, s2 /* set s2 to 0.0 */ - vpackod.d $vr0, $vr8, $vr8 -#ifdef DOUBLE - VFADD $vr8, $vr8, $vr0 -#else - VFADD $vr8, $vr8, $vr0 - vpackod.w $vr0, $vr8, $vr8 - VFADD $vr8, $vr8, $vr0 -#endif /* defined DOUBLE */ -#endif /* defined DSDOT */ .align 3 .L15: #ifdef DOUBLE @@ -193,7 +166,7 @@ PROLOGUE /* DOUBLE: 1 ; FLOAT: 1~3 */ LD a1, X, 0 LD b1, Y, 0 -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s1, b1, a1, s1 @@ -236,7 +209,7 @@ PROLOGUE add.d X, X, INCX LD b1, Y, 0 * SIZE add.d Y, Y, INCY -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s1, b1, a1, s1 @@ -248,7 +221,7 @@ PROLOGUE add.d X, X, INCX LD b1, Y, 0 * SIZE add.d Y, Y, INCY -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s2, b1, a1, s2 @@ -260,7 +233,7 @@ PROLOGUE add.d X, X, INCX LD b1, Y, 0 * SIZE add.d Y, Y, INCY -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s1, b1, a1, s1 @@ -272,7 +245,7 @@ PROLOGUE add.d X, X, INCX LD b1, Y, 0 * SIZE add.d Y, Y, INCY -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s2, b1, a1, s2 @@ -284,7 +257,7 @@ PROLOGUE add.d X, X, INCX LD b1, Y, 0 * SIZE add.d Y, Y, INCY -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s1, b1, a1, s1 @@ -296,7 +269,7 @@ PROLOGUE add.d X, X, INCX LD b1, Y, 0 * SIZE add.d Y, Y, INCY -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s2, b1, a1, s2 @@ -308,7 +281,7 @@ PROLOGUE add.d X, X, INCX LD b1, Y, 0 * SIZE add.d Y, Y, INCY -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s1, b1, a1, s1 @@ -321,7 +294,7 @@ PROLOGUE LD b1, Y, 0 * SIZE add.d Y, Y, INCY addi.d I, I, -1 -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s2, b1, a1, s2 @@ -342,7 +315,7 @@ PROLOGUE LD b1, Y, 0 * SIZE add.d Y, Y, INCY addi.d I, I, -1 -#ifdef DSDOT +#ifndef DOUBLE fcvt.d.s a1, a1 fcvt.d.s b1, b1 fmadd.d s1, b1, a1, s1 @@ -353,12 +326,13 @@ PROLOGUE .align 3 .L999: -#ifdef DSDOT fadd.d $f0, s1, s2 + move $r4, $r17 +#if defined(DOUBLE) +#elif defined(DSDOT) #else - ADD $f0, s1, s2 + fcvt.s.d $f0, $f0 #endif - move $r4, $r17 jirl $r0, $r1, 0x0 EPILOGUE