Browse Source

Functional Assembly Kernels for CortexA57

Adding functional (non-optimized) kernels for Cortex-A57
with the following layouts.
SGEMM - 16x4, 8x8
CGEMM - 8x4
DGEMM - 8x4, 4x8
tags/v0.2.16^2
Ashwin Sekhar T K 9 years ago
parent
commit
7aa1ad4923
11 changed files with 21161 additions and 17 deletions
  1. +40
    -17
      kernel/arm64/KERNEL.CORTEXA57
  2. +2044
    -0
      kernel/arm64/cgemm_kernel_8x4.S
  3. +2425
    -0
      kernel/arm64/ctrmm_kernel_8x4.S
  4. +1689
    -0
      kernel/arm64/dgemm_kernel_4x8.S
  5. +1570
    -0
      kernel/arm64/dgemm_kernel_8x4.S
  6. +2026
    -0
      kernel/arm64/dtrmm_kernel_4x8.S
  7. +1849
    -0
      kernel/arm64/dtrmm_kernel_8x4.S
  8. +1987
    -0
      kernel/arm64/sgemm_kernel_16x4.S
  9. +2305
    -0
      kernel/arm64/sgemm_kernel_8x8.S
  10. +2431
    -0
      kernel/arm64/strmm_kernel_16x4.S
  11. +2795
    -0
      kernel/arm64/strmm_kernel_8x8.S

+ 40
- 17
kernel/arm64/KERNEL.CORTEXA57 View File

@@ -60,32 +60,55 @@ DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S

STRMMKERNEL = strmm_kernel_4x4.S
DTRMMKERNEL = dtrmm_kernel_4x4.S
CTRMMKERNEL = ctrmm_kernel_4x4.S
ZTRMMKERNEL = ztrmm_kernel_4x4.S

SGEMMKERNEL = sgemm_kernel_4x4.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy.o
SGEMMITCOPYOBJ = sgemm_itcopy.o
endif
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o

DGEMMKERNEL = dgemm_kernel_4x4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
endif
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o

CGEMMKERNEL = cgemm_kernel_4x4.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
CGEMMINCOPYOBJ = cgemm_incopy.o
CGEMMITCOPYOBJ = cgemm_itcopy.o
endif
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o

ZGEMMKERNEL = zgemm_kernel_4x4.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
ZGEMMINCOPYOBJ = zgemm_incopy.o
ZGEMMITCOPYOBJ = zgemm_itcopy.o
endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o


+ 2044
- 0
kernel/arm64/cgemm_kernel_8x4.S
File diff suppressed because it is too large
View File


+ 2425
- 0
kernel/arm64/ctrmm_kernel_8x4.S
File diff suppressed because it is too large
View File


+ 1689
- 0
kernel/arm64/dgemm_kernel_4x8.S
File diff suppressed because it is too large
View File


+ 1570
- 0
kernel/arm64/dgemm_kernel_8x4.S
File diff suppressed because it is too large
View File


+ 2026
- 0
kernel/arm64/dtrmm_kernel_4x8.S
File diff suppressed because it is too large
View File


+ 1849
- 0
kernel/arm64/dtrmm_kernel_8x4.S
File diff suppressed because it is too large
View File


+ 1987
- 0
kernel/arm64/sgemm_kernel_16x4.S
File diff suppressed because it is too large
View File


+ 2305
- 0
kernel/arm64/sgemm_kernel_8x8.S
File diff suppressed because it is too large
View File


+ 2431
- 0
kernel/arm64/strmm_kernel_16x4.S
File diff suppressed because it is too large
View File


+ 2795
- 0
kernel/arm64/strmm_kernel_8x8.S
File diff suppressed because it is too large
View File


Loading…
Cancel
Save