Browse Source

Complete all the plura single precision functions of level3 on Loongson3a, the performance is 2.3GFlops.

tags/v0.1.0^2
traz 14 years ago
parent
commit
c8360e3ae5
7 changed files with 1499 additions and 9 deletions
  1. +11
    -0
      kernel/mips64/KERNEL
  2. +14
    -3
      kernel/mips64/KERNEL.LOONGSON3A
  3. +1468
    -0
      kernel/mips64/cgemm_kernel_loongson3a_2x2.S
  4. +0
    -0
      kernel/mips64/dgemm_kernel_loongson3a_4x4.S
  5. +0
    -0
      kernel/mips64/sgemm_kernel_loongson3a_4x4.S
  6. +1
    -1
      kernel/mips64/zgemm_kernel_loongson3a_2x2.S
  7. +5
    -5
      param.h

+ 11
- 0
kernel/mips64/KERNEL View File

@@ -123,10 +123,21 @@ ifndef DTRSMKERNEL_RT
DTRSMKERNEL_RT = trsm_kernel_RT.S
endif

ifndef CTRSMKERNEL_LN
CTRSMKERNEL_LN = ztrsm_kernel_LT.S
endif

ifndef CTRSMKERNEL_LT
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
endif

ifndef CTRSMKERNEL_RN
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
endif

ifndef CTRSMKERNEL_RT
CTRSMKERNEL_RT = ztrsm_kernel_RT.S
endif

ifndef ZTRSMKERNEL_LN
ZTRSMKERNEL_LN = ztrsm_kernel_LT.S


+ 14
- 3
kernel/mips64/KERNEL.LOONGSON3A View File

@@ -1,19 +1,25 @@
SAXPYKERNEL=axpy_loongson3a.S
DAXPYKERNEL=daxpy_loongson3a_simd.S

SGEMMKERNEL = sgemm_kernel_loongson3a.S
SGEMMKERNEL = sgemm_kernel_loongson3a_4x4.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o

DGEMMKERNEL = gemm_kernel_loongson3a.S
DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o

ZGEMMKERNEL = zgemm_kernel_loongson3a.S
CGEMMKERNEL = cgemm_kernel_loongson3a_2x2.S
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o

ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
@@ -29,6 +35,11 @@ DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c


+ 1468
- 0
kernel/mips64/cgemm_kernel_loongson3a_2x2.S
File diff suppressed because it is too large
View File


kernel/mips64/gemm_kernel_loongson3a.S → kernel/mips64/dgemm_kernel_loongson3a_4x4.S View File


kernel/mips64/sgemm_kernel_loongson3a.S → kernel/mips64/sgemm_kernel_loongson3a_4x4.S View File


kernel/mips64/zgemm_kernel_loongson3a.S → kernel/mips64/zgemm_kernel_loongson3a_2x2.S View File


+ 5
- 5
param.h View File

@@ -1486,25 +1486,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4

#define CGEMM_DEFAULT_UNROLL_M 1
#define CGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2

#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2

#define SGEMM_DEFAULT_P 64
#define DGEMM_DEFAULT_P 32
#define CGEMM_DEFAULT_P 108
#define CGEMM_DEFAULT_P 64
#define ZGEMM_DEFAULT_P 32

#define SGEMM_DEFAULT_Q 116
#define DGEMM_DEFAULT_Q 116
#define CGEMM_DEFAULT_Q 144
#define CGEMM_DEFAULT_Q 100
#define ZGEMM_DEFAULT_Q 80

#define SGEMM_DEFAULT_R 1000
#define DGEMM_DEFAULT_R 1000
#define CGEMM_DEFAULT_R 2000
#define CGEMM_DEFAULT_R 1000
#define ZGEMM_DEFAULT_R 1000

#define SYMV_P 16


Loading…
Cancel
Save