Browse Source

Fix precision problem in DSDOT

tags/v0.3.2^2
Martin Kroeker GitHub 7 years ago
parent
commit
d2142760e0
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 159 additions and 10 deletions
  1. +159
    -10
      kernel/mips64/dot.S

+ 159
- 10
kernel/mips64/dot.S View File

@@ -103,35 +103,83 @@
.align 3 .align 3


.L12: .L12:
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 4 * SIZE(X) LD a1, 4 * SIZE(X)
LD b1, 4 * SIZE(Y) LD b1, 4 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
LD a2, 5 * SIZE(X) LD a2, 5 * SIZE(X)
LD b2, 5 * SIZE(Y) LD b2, 5 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
LD a3, 6 * SIZE(X) LD a3, 6 * SIZE(X)
LD b3, 6 * SIZE(Y) LD b3, 6 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
LD a4, 7 * SIZE(X) LD a4, 7 * SIZE(X)
LD b4, 7 * SIZE(Y) LD b4, 7 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 8 * SIZE(X) LD a1, 8 * SIZE(X)
LD b1, 8 * SIZE(Y) LD b1, 8 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
LD a2, 9 * SIZE(X) LD a2, 9 * SIZE(X)
LD b2, 9 * SIZE(Y) LD b2, 9 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
LD a3, 10 * SIZE(X) LD a3, 10 * SIZE(X)
LD b3, 10 * SIZE(Y) LD b3, 10 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
LD a4, 11 * SIZE(X) LD a4, 11 * SIZE(X)
LD b4, 11 * SIZE(Y) LD b4, 11 * SIZE(Y)


@@ -143,29 +191,77 @@
.align 3 .align 3


.L13: .L13:
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
LD a1, 4 * SIZE(X) LD a1, 4 * SIZE(X)
LD b1, 4 * SIZE(Y) LD b1, 4 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
LD a2, 5 * SIZE(X) LD a2, 5 * SIZE(X)
LD b2, 5 * SIZE(Y) LD b2, 5 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
LD a3, 6 * SIZE(X) LD a3, 6 * SIZE(X)
LD b3, 6 * SIZE(Y) LD b3, 6 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
LD a4, 7 * SIZE(X) LD a4, 7 * SIZE(X)
LD b4, 7 * SIZE(Y) LD b4, 7 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
daddiu X, X, 8 * SIZE daddiu X, X, 8 * SIZE
#ifdef DSDOT
cvt.d.s a2, a2
cvt.d.s b2, b2
madd.d s2, s2, a2, b2
#else
MADD s2, s2, a2, b2 MADD s2, s2, a2, b2
#endif
daddiu Y, Y, 8 * SIZE daddiu Y, Y, 8 * SIZE


#ifdef DSDOT
cvt.d.s a3, a3
cvt.d.s b3, b3
madd.d s1, s1, a3, b3
#else
MADD s1, s1, a3, b3 MADD s1, s1, a3, b3
#endif
#ifdef DSDOT
cvt.d.s a4, a4
cvt.d.s b4, b4
madd.d s2, s2, a4, b4
#else
MADD s2, s2, a4, b4 MADD s2, s2, a4, b4
#endif
.align 3 .align 3


.L15: .L15:
@@ -179,8 +275,13 @@
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1

#endif
daddiu I, I, -1 daddiu I, I, -1


daddiu X, X, SIZE daddiu X, X, SIZE
@@ -225,50 +326,85 @@
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1

#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1

#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1

#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1

#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1

#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1

#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
dadd Y, Y, INCY dadd Y, Y, INCY


#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1

#endif
LD a1, 0 * SIZE(X) LD a1, 0 * SIZE(X)
dadd X, X, INCX dadd X, X, INCX
LD b1, 0 * SIZE(Y) LD b1, 0 * SIZE(Y)
@@ -277,7 +413,13 @@
daddiu I, I, -1 daddiu I, I, -1


bgtz I, .L23 bgtz I, .L23
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s2, s2, a1, b1
#else
MADD s2, s2, a1, b1 MADD s2, s2, a1, b1
#endif
.align 3 .align 3


.L25: .L25:
@@ -296,13 +438,20 @@
daddiu I, I, -1 daddiu I, I, -1


bgtz I, .L26 bgtz I, .L26
#ifdef DSDOT
cvt.d.s a1, a1
cvt.d.s b1, b1
madd.d s1, s1, a1, b1
#else
MADD s1, s1, a1, b1 MADD s1, s1, a1, b1
#endif
.align 3 .align 3


.L999: .L999:
ADD s1, s1, s2
#ifdef DSDOT #ifdef DSDOT
cvt.d.s s1, s1
add.d s1, s1, s2
#else
ADD s1, s1, s2
#endif #endif
j $31 j $31
NOP NOP


Loading…
Cancel
Save