LoongArch64: Fix dsymv and ssymv LASX version

"fmov.d $f2, $f4" leaves all the bits higher than the 63-th bit unpredictable but it's obvious that the following code uses the value of those high bits. We actually want to replicate the lower 64 bits here, so we should use xvreplve0.d instead. LA464 (Loongson 3[A-Z]-5000) happens to replicate them for us due to some uarch internal details so the issue was not detected, but for LA664 (Loongson 3[A-Z]-6000) and future uarch we need to do things correctly or we end up getting a lot of test failures. Closes: https://bbs.aosc.io/t/topic/302 Signed-off-by: Xi Ruoyao <xry111@xry111.site>
8 months ago · af10c132b8
--- a/kernel/loongarch64/dsymv_L_lasx.S
+++ b/kernel/loongarch64/dsymv_L_lasx.S
@@ -288,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    //Acc U2
    GACC   xvf, d, U4, U2
    fmov.d $f2, $f4
    xvreplve0.d U2, U4
 .L03:  /* &4 */
    sub.d     T0,     M,    J
    addi.d    T0,     T0,   -1
--- a/kernel/loongarch64/dsymv_U_lasx.S
+++ b/kernel/loongarch64/dsymv_U_lasx.S
@@ -272,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    //Acc U2
    GACC   xvf, d, U4, U2
    fmov.d $f2, $f4
    xvreplve0.d U2, U4

 .L03:  /* &4 */
    andi      T0,     J,   4
--- a/kernel/loongarch64/ssymv_L_lasx.S
+++ b/kernel/loongarch64/ssymv_L_lasx.S
@@ -279,7 +279,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    //Acc U2
    GACC   xvf, s, U4, U2
    fmov.d $f2, $f4
    xvreplve0.d U2, U4

 .L03:  /* &4 */
    sub.d     T0,     M,    J
--- a/kernel/loongarch64/ssymv_U_lasx.S
+++ b/kernel/loongarch64/ssymv_U_lasx.S
@@ -263,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    //Acc U2
    GACC   xvf, s, U4, U2
    fmov.d $f2, $f4
    xvreplve0.d U2, U4

 .L03:  /* &4 */
    andi      T0,     J,   4