Browse Source

Fixed #30 strmm computational error on Loongson3A.

tags/v0.1alpha2^2
traz 14 years ago
parent
commit
88d94d0ec8
2 changed files with 70 additions and 50 deletions
  1. +65
    -45
      kernel/mips64/sgemm_kernel_loongson3a.S
  2. +5
    -5
      param.h

+ 65
- 45
kernel/mips64/sgemm_kernel_loongson3a.S View File

@@ -1,6 +1,7 @@
#define REALNAME ASMNAME #define REALNAME ASMNAME
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"

#define FETCH ld #define FETCH ld
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) #define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) #define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
@@ -215,35 +216,36 @@
daddu A,A,K # move A B to data part daddu A,A,K # move A B to data part
daddu B,BO,TEMP daddu B,BO,TEMP
#endif #endif
MTC $0,t11
MOV t21,t11
MTC $0,t11 # GEMM part NR=4,MR=4
LD a0,0(A) LD a0,0(A)

MOV t21,t11
MOV t31,t11 MOV t31,t11
MOV t41,t11
LD a1,1*SIZE(A) LD a1,1*SIZE(A)


MOV t41,t11
MOV t12,t11 MOV t12,t11
MOV t22,t11
LD b0,0(B) LD b0,0(B)
MOV t22,t11
MOV t32,t11 MOV t32,t11
MOV t42,t11
LD b1,1*SIZE(B) LD b1,1*SIZE(B)


MOV t42,t11
LD a2,2*SIZE(A)
MOV t13,t11 MOV t13,t11
MOV t23,t11 MOV t23,t11
LD a2,2*SIZE(A)
LD b2,2*SIZE(B)
MOV t33,t11 MOV t33,t11
MOV t43,t11 MOV t43,t11
LD b2,2*SIZE(B)
LD a3,3*SIZE(A)


MOV t14,t11 MOV t14,t11
MOV t24,t11 MOV t24,t11
LD a3,3*SIZE(A)


LD b3,3*SIZE(B)


#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP,KCO,KK # temp is the length of the data part dsubu TEMP,KCO,KK # temp is the length of the data part
@@ -733,22 +735,22 @@
daddu B,BO,TEMP daddu B,BO,TEMP
#endif #endif


MTC $0,t11
LD a0,0*SIZE(A) LD a0,0*SIZE(A)
MOV t21,t11
MTC $0,t11
LD a1,1*SIZE(A) LD a1,1*SIZE(A)
MOV t12,t11
MOV t21,t11
LD b0,0*SIZE(B) LD b0,0*SIZE(B)
MOV t22,t11
MOV t12,t11
LD b1,1*SIZE(B) LD b1,1*SIZE(B)


MOV t13,t11
MOV t22,t11
LD b2,2*SIZE(B) LD b2,2*SIZE(B)

MOV t13,t11
MOV t23,t11 MOV t23,t11
LD b3,3*SIZE(B) LD b3,3*SIZE(B)



#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP,KCO,KK dsubu TEMP,KCO,KK
#elif defined(LEFT) #elif defined(LEFT)
@@ -1043,20 +1045,26 @@
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move B,BO # Reset B move B,BO # Reset B
#else #else
dsll K,KK, 0 + BASE_SHIFT
dsll K,KK, BASE_SHIFT
dsll TEMP,KK,2 + BASE_SHIFT dsll TEMP,KK,2 + BASE_SHIFT


daddu A,A,K daddu A,A,K
daddu B,BO,TEMP daddu B,BO,TEMP
#endif #endif

LD a0, 0 * SIZE(A) # a0

MTC $0,t11 MTC $0,t11
LD b0,0*SIZE(B)
MOV t12,t11 MOV t12,t11
LD a0, 0 * SIZE(A) # a0
LD b1,1*SIZE(B)


MOV t13,t11 MOV t13,t11
LD b0,0*SIZE(B)
MOV t14,t11 # clear result registers
LD b1,1*SIZE(B)
LD b2,2*SIZE(B)
MOV t14,t11
LD b3,3*SIZE(B)


#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK dsubu TEMP, KCO, KK
@@ -1236,7 +1244,7 @@
daddiu TEMP, TEMP, -4 daddiu TEMP, TEMP, -4
#endif #endif


dsll K,TEMP, 0 + BASE_SHIFT
dsll K,TEMP, BASE_SHIFT
dsll TEMP,TEMP, 2 + BASE_SHIFT dsll TEMP,TEMP, 2 + BASE_SHIFT


daddu A,A,K daddu A,A,K
@@ -1291,21 +1299,21 @@
daddu A,A,K daddu A,A,K
daddu B,BO,TEMP daddu B,BO,TEMP
#endif #endif
MTC $0,t11
LD a0,0*SIZE(A) LD a0,0*SIZE(A)
MOV t21,t11
MTC $0,t11 # gemm part
LD a1,1*SIZE(A) LD a1,1*SIZE(A)


MOV t31,t11
MOV t21,t11
LD b0,0*SIZE(B) LD b0,0*SIZE(B)
MOV t41,t11
MOV t31,t11
LD b1,1*SIZE(B) LD b1,1*SIZE(B)


MOV t12,t11
MOV t41,t11
LD a2,2*SIZE(A) LD a2,2*SIZE(A)
MOV t22,t11
LD a3,3*SIZE(A) LD a3,3*SIZE(A)

MOV t12,t11
MOV t22,t11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP,KCO,KK dsubu TEMP,KCO,KK
@@ -1621,11 +1629,14 @@
daddu A, A, K daddu A, A, K
daddu B, BO, TEMP daddu B, BO, TEMP
#endif #endif
MTC $0,t11
LD a0,0*SIZE(A) LD a0,0*SIZE(A)
MOV t21,t11
LD a1,1*SIZE(A) LD a1,1*SIZE(A)


MTC $0,t11
LD b0,0*SIZE(B)
MOV t21,t11
LD b1,1*SIZE(B)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK dsubu TEMP, KCO, KK
#elif defined(LEFT) #elif defined(LEFT)
@@ -1830,11 +1841,14 @@
daddu A, A, K daddu A, A, K
daddu B, BO, TEMP daddu B, BO, TEMP
#endif #endif
MTC $0,t11
LD a0, 0*SIZE(A) # a0
LD a0,0*SIZE(A)
MTC $0,t11
MOV t21,t11 MOV t21,t11
LD b0,0*SIZE(B)
LD b0,0*SIZE(B)

MOV t12,t11
LD b1,1*SIZE(B)


#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK dsubu TEMP, KCO, KK
@@ -1844,9 +1858,9 @@
daddiu TEMP, KK, 2 daddiu TEMP, KK, 2
#endif #endif
dsra K,TEMP,2 dsra K,TEMP,2
MOV t12,t11
beqz K,.L65
MOV t22,t11 MOV t22,t11
beqz K,.L65
nop


#else #else
dsra K,KCO,2 dsra K,KCO,2
@@ -2023,13 +2037,18 @@
daddu A, A, K daddu A, A, K
daddu B, BO, TEMP daddu B, BO, TEMP
#endif #endif
MTC $0,t11
LD b0, 0*SIZE(B) LD b0, 0*SIZE(B)
MOV t21,t11
MTC $0,t11
LD a0,0*SIZE(A) LD a0,0*SIZE(A)
MOV t31,t11
MOV t21,t11
LD a1,1*SIZE(A) LD a1,1*SIZE(A)
MOV t31,t11
LD a2,2*SIZE(A)
MOV t41,t11
LD a3,3*SIZE(A)



#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK dsubu TEMP, KCO, KK
@@ -2039,7 +2058,6 @@
daddiu TEMP, KK, 1 daddiu TEMP, KK, 1
#endif #endif
dsra K,TEMP,2 dsra K,TEMP,2
MOV t41,t11
beqz K,.L75 beqz K,.L75
nop nop
#else #else
@@ -2276,10 +2294,11 @@
daddu B, BO, TEMP daddu B, BO, TEMP
#endif #endif
LD b0, 0*SIZE(B) LD b0, 0*SIZE(B)

MTC $0,t11 MTC $0,t11
LD a0,0*SIZE(A)
MOV t21,t11 MOV t21,t11
LD a0,0*SIZE(A)
LD a1,1*SIZE(A)


#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK dsubu TEMP, KCO, KK
@@ -2443,6 +2462,7 @@
LD a0, 0*SIZE(A) LD a0, 0*SIZE(A)
LD b0, 0*SIZE(B) LD b0, 0*SIZE(B)
MTC $0,t11 MTC $0,t11

#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, KCO, KK dsubu TEMP, KCO, KK
#elif defined(LEFT) #elif defined(LEFT)


+ 5
- 5
param.h View File

@@ -1480,8 +1480,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL


#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 8
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4


#define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 4
@@ -1491,17 +1491,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 1 #define ZGEMM_DEFAULT_UNROLL_M 1
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 4


#define SGEMM_DEFAULT_P 108
#define SGEMM_DEFAULT_P 32
#define DGEMM_DEFAULT_P 32 #define DGEMM_DEFAULT_P 32
#define CGEMM_DEFAULT_P 108 #define CGEMM_DEFAULT_P 108
#define ZGEMM_DEFAULT_P 112 #define ZGEMM_DEFAULT_P 112


#define SGEMM_DEFAULT_Q 288
#define SGEMM_DEFAULT_Q 116
#define DGEMM_DEFAULT_Q 116 #define DGEMM_DEFAULT_Q 116
#define CGEMM_DEFAULT_Q 144 #define CGEMM_DEFAULT_Q 144
#define ZGEMM_DEFAULT_Q 72 #define ZGEMM_DEFAULT_Q 72


#define SGEMM_DEFAULT_R 2000
#define SGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R 1000 #define DGEMM_DEFAULT_R 1000
#define CGEMM_DEFAULT_R 2000 #define CGEMM_DEFAULT_R 2000
#define ZGEMM_DEFAULT_R 2000 #define ZGEMM_DEFAULT_R 2000


Loading…
Cancel
Save