Use ThunderX2 Neon Kernels for ARMV8 Targettags/v0.3.4
@@ -730,7 +730,7 @@ void blas_set_parameter(void){ | |||||
#if defined(ARCH_ARM64) | #if defined(ARCH_ARM64) | ||||
#if defined(VULCAN) || defined(THUNDERX2T99) | |||||
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8) | |||||
unsigned long dgemm_prefetch_size_a; | unsigned long dgemm_prefetch_size_a; | ||||
unsigned long dgemm_prefetch_size_b; | unsigned long dgemm_prefetch_size_b; | ||||
unsigned long dgemm_prefetch_size_c; | unsigned long dgemm_prefetch_size_c; | ||||
@@ -738,7 +738,7 @@ unsigned long dgemm_prefetch_size_c; | |||||
void blas_set_parameter(void) | void blas_set_parameter(void) | ||||
{ | { | ||||
#if defined(VULCAN) || defined(THUNDERX2T99) | |||||
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8) | |||||
dgemm_p = 160; | dgemm_p = 160; | ||||
dgemm_q = 128; | dgemm_q = 128; | ||||
dgemm_r = 4096; | dgemm_r = 4096; | ||||
@@ -42,7 +42,7 @@ | |||||
#include "functable.h" | #include "functable.h" | ||||
#endif | #endif | ||||
#if defined(THUNDERX2T99) || defined(VULCAN) | |||||
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) | |||||
// Multithreaded swap gives performance benefits in ThunderX2T99 | // Multithreaded swap gives performance benefits in ThunderX2T99 | ||||
#else | #else | ||||
// Disable multi-threading as it does not show any performance | // Disable multi-threading as it does not show any performance | ||||
@@ -1,8 +1,3 @@ | |||||
SAMAXKERNEL = amax.S | |||||
DAMAXKERNEL = amax.S | |||||
CAMAXKERNEL = zamax.S | |||||
ZAMAXKERNEL = zamax.S | |||||
SAMINKERNEL = ../arm/amin.c | SAMINKERNEL = ../arm/amin.c | ||||
DAMINKERNEL = ../arm/amin.c | DAMINKERNEL = ../arm/amin.c | ||||
CAMINKERNEL = ../arm/zamin.c | CAMINKERNEL = ../arm/zamin.c | ||||
@@ -14,11 +9,6 @@ DMAXKERNEL = ../arm/max.c | |||||
SMINKERNEL = ../arm/min.c | SMINKERNEL = ../arm/min.c | ||||
DMINKERNEL = ../arm/min.c | DMINKERNEL = ../arm/min.c | ||||
ISAMAXKERNEL = iamax.S | |||||
IDAMAXKERNEL = iamax.S | |||||
ICAMAXKERNEL = izamax.S | |||||
IZAMAXKERNEL = izamax.S | |||||
ISAMINKERNEL = ../arm/iamin.c | ISAMINKERNEL = ../arm/iamin.c | ||||
IDAMINKERNEL = ../arm/iamin.c | IDAMINKERNEL = ../arm/iamin.c | ||||
ICAMINKERNEL = ../arm/izamin.c | ICAMINKERNEL = ../arm/izamin.c | ||||
@@ -30,33 +20,35 @@ IDMAXKERNEL = ../arm/imax.c | |||||
ISMINKERNEL = ../arm/imin.c | ISMINKERNEL = ../arm/imin.c | ||||
IDMINKERNEL = ../arm/imin.c | IDMINKERNEL = ../arm/imin.c | ||||
SASUMKERNEL = asum.S | |||||
DASUMKERNEL = asum.S | |||||
CASUMKERNEL = casum.S | |||||
ZASUMKERNEL = zasum.S | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
SAXPYKERNEL = axpy.S | |||||
DAXPYKERNEL = axpy.S | |||||
CAXPYKERNEL = zaxpy.S | |||||
ZAXPYKERNEL = zaxpy.S | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
SCOPYKERNEL = copy.S | |||||
DCOPYKERNEL = copy.S | |||||
CCOPYKERNEL = copy.S | |||||
ZCOPYKERNEL = copy.S | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
SDOTKERNEL = dot.S | |||||
DDOTKERNEL = dot.S | |||||
CDOTKERNEL = zdot.S | |||||
ZDOTKERNEL = zdot.S | |||||
DSDOTKERNEL = dot.S | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ifneq ($(OS_DARWIN)$(CROSS),11) | |||||
SNRM2KERNEL = nrm2.S | |||||
DNRM2KERNEL = nrm2.S | |||||
CNRM2KERNEL = znrm2.S | |||||
ZNRM2KERNEL = znrm2.S | |||||
endif | |||||
SAMAXKERNEL = amax.S | |||||
DAMAXKERNEL = amax.S | |||||
CAMAXKERNEL = zamax.S | |||||
ZAMAXKERNEL = zamax.S | |||||
SAXPYKERNEL = axpy.S | |||||
DAXPYKERNEL = daxpy_thunderx2t99.S | |||||
CAXPYKERNEL = zaxpy.S | |||||
ZAXPYKERNEL = zaxpy.S | |||||
SROTKERNEL = rot.S | SROTKERNEL = rot.S | ||||
DROTKERNEL = rot.S | DROTKERNEL = rot.S | ||||
@@ -68,11 +60,6 @@ DSCALKERNEL = scal.S | |||||
CSCALKERNEL = zscal.S | CSCALKERNEL = zscal.S | ||||
ZSCALKERNEL = zscal.S | ZSCALKERNEL = zscal.S | ||||
SSWAPKERNEL = swap.S | |||||
DSWAPKERNEL = swap.S | |||||
CSWAPKERNEL = swap.S | |||||
ZSWAPKERNEL = swap.S | |||||
SGEMVNKERNEL = gemv_n.S | SGEMVNKERNEL = gemv_n.S | ||||
DGEMVNKERNEL = gemv_n.S | DGEMVNKERNEL = gemv_n.S | ||||
CGEMVNKERNEL = zgemv_n.S | CGEMVNKERNEL = zgemv_n.S | ||||
@@ -83,18 +70,137 @@ DGEMVTKERNEL = gemv_t.S | |||||
CGEMVTKERNEL = zgemv_t.S | CGEMVTKERNEL = zgemv_t.S | ||||
ZGEMVTKERNEL = zgemv_t.S | ZGEMVTKERNEL = zgemv_t.S | ||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
SASUMKERNEL = sasum_thunderx2t99.c | |||||
DASUMKERNEL = dasum_thunderx2t99.c | |||||
CASUMKERNEL = casum_thunderx2t99.c | |||||
ZASUMKERNEL = zasum_thunderx2t99.c | |||||
SCOPYKERNEL = copy_thunderx2t99.c | |||||
DCOPYKERNEL = copy_thunderx2t99.c | |||||
CCOPYKERNEL = copy_thunderx2t99.c | |||||
ZCOPYKERNEL = copy_thunderx2t99.c | |||||
SSWAPKERNEL = swap_thunderx2t99.S | |||||
DSWAPKERNEL = swap_thunderx2t99.S | |||||
CSWAPKERNEL = swap_thunderx2t99.S | |||||
ZSWAPKERNEL = swap_thunderx2t99.S | |||||
ISAMAXKERNEL = iamax_thunderx2t99.c | |||||
IDAMAXKERNEL = iamax_thunderx2t99.c | |||||
ICAMAXKERNEL = izamax_thunderx2t99.c | |||||
IZAMAXKERNEL = izamax_thunderx2t99.c | |||||
ifneq ($(OS_DARWIN)$(CROSS),11) | |||||
SNRM2KERNEL = scnrm2_thunderx2t99.c | |||||
CNRM2KERNEL = scnrm2_thunderx2t99.c | |||||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c | |||||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c | |||||
DNRM2KERNEL = dznrm2_thunderx2t99.c | |||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c | |||||
endif | |||||
DDOTKERNEL = dot_thunderx2t99.c | |||||
SDOTKERNEL = dot_thunderx2t99.c | |||||
CDOTKERNEL = zdot_thunderx2t99.c | |||||
ZDOTKERNEL = zdot_thunderx2t99.c | |||||
DSDOTKERNEL = dot.S | |||||
ifneq ($(OS_DARWIN)$(CROSS),11) | |||||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
endif | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||||
ifeq ($(DGEMM_UNROLL_M), 8) | |||||
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S | |||||
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S | |||||
else | |||||
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||||
endif | |||||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
endif | |||||
ifeq ($(DGEMM_UNROLL_N), 4) | |||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
else | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||||
endif | |||||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||||
endif | |||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||||
ZGEMMINCOPYOBJ = zgemm_incopy.o | |||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||||
endif | |||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | |||||
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | |||||
endif | |||||
ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4) | |||||
SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S | |||||
endif | |||||
ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4) | |||||
CGEMMKERNEL = cgemm_kernel_8x4_thunderx2t99.S | |||||
endif | |||||
ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4) | |||||
ZGEMMKERNEL = zgemm_kernel_4x4_thunderx2t99.S | |||||
endif | |||||
else | |||||
STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | DTRMMKERNEL = ../generic/trmmkernel_2x2.c | ||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | ||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | ||||
ifneq ($(OS_DARWIN)$(CROSS),11) | |||||
SGEMMKERNEL = sgemm_kernel_4x4.S | |||||
else | |||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | SGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
endif | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
SGEMMONCOPYOBJ = sgemm_oncopy.o | SGEMMONCOPYOBJ = sgemm_oncopy.o | ||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | SGEMMOTCOPYOBJ = sgemm_otcopy.o | ||||
@@ -116,26 +222,4 @@ ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | ZGEMMONCOPYOBJ = zgemm_oncopy.o | ||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | ZGEMMOTCOPYOBJ = zgemm_otcopy.o | ||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
endif |
@@ -1,4 +1,49 @@ | |||||
include $(KERNELDIR)/KERNEL.ARMV8 | |||||
SAMINKERNEL = ../arm/amin.c | |||||
DAMINKERNEL = ../arm/amin.c | |||||
CAMINKERNEL = ../arm/zamin.c | |||||
ZAMINKERNEL = ../arm/zamin.c | |||||
SMAXKERNEL = ../arm/max.c | |||||
DMAXKERNEL = ../arm/max.c | |||||
SMINKERNEL = ../arm/min.c | |||||
DMINKERNEL = ../arm/min.c | |||||
ISAMINKERNEL = ../arm/iamin.c | |||||
IDAMINKERNEL = ../arm/iamin.c | |||||
ICAMINKERNEL = ../arm/izamin.c | |||||
IZAMINKERNEL = ../arm/izamin.c | |||||
ISMAXKERNEL = ../arm/imax.c | |||||
IDMAXKERNEL = ../arm/imax.c | |||||
ISMINKERNEL = ../arm/imin.c | |||||
IDMINKERNEL = ../arm/imin.c | |||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
SAMAXKERNEL = amax.S | SAMAXKERNEL = amax.S | ||||
DAMAXKERNEL = amax.S | DAMAXKERNEL = amax.S | ||||
@@ -1,6 +1,133 @@ | |||||
include $(KERNELDIR)/KERNEL.ARMV8 | |||||
SAMAXKERNEL = amax.S | |||||
DAMAXKERNEL = amax.S | |||||
CAMAXKERNEL = zamax.S | |||||
ZAMAXKERNEL = zamax.S | |||||
SAMINKERNEL = ../arm/amin.c | |||||
DAMINKERNEL = ../arm/amin.c | |||||
CAMINKERNEL = ../arm/zamin.c | |||||
ZAMINKERNEL = ../arm/zamin.c | |||||
SMAXKERNEL = ../arm/max.c | |||||
DMAXKERNEL = ../arm/max.c | |||||
SMINKERNEL = ../arm/min.c | |||||
DMINKERNEL = ../arm/min.c | |||||
ISAMAXKERNEL = iamax.S | |||||
IDAMAXKERNEL = iamax.S | |||||
ICAMAXKERNEL = izamax.S | |||||
IZAMAXKERNEL = izamax.S | |||||
ISAMINKERNEL = ../arm/iamin.c | |||||
IDAMINKERNEL = ../arm/iamin.c | |||||
ICAMINKERNEL = ../arm/izamin.c | |||||
IZAMINKERNEL = ../arm/izamin.c | |||||
ISMAXKERNEL = ../arm/imax.c | |||||
IDMAXKERNEL = ../arm/imax.c | |||||
ISMINKERNEL = ../arm/imin.c | |||||
IDMINKERNEL = ../arm/imin.c | |||||
SASUMKERNEL = asum.S | |||||
DASUMKERNEL = asum.S | |||||
CASUMKERNEL = casum.S | |||||
ZASUMKERNEL = zasum.S | |||||
SAXPYKERNEL = axpy.S | |||||
DAXPYKERNEL = daxpy_thunderx.c | |||||
CAXPYKERNEL = zaxpy.S | |||||
ZAXPYKERNEL = zaxpy.S | |||||
SCOPYKERNEL = copy.S | |||||
DCOPYKERNEL = copy.S | |||||
CCOPYKERNEL = copy.S | |||||
ZCOPYKERNEL = copy.S | |||||
SDOTKERNEL = dot_thunderx.c | |||||
DDOTKERNEL = ddot_thunderx.c | |||||
CDOTKERNEL = zdot.S | |||||
ZDOTKERNEL = zdot.S | |||||
DSDOTKERNEL = dot.S | |||||
SNRM2KERNEL = nrm2.S | |||||
DNRM2KERNEL = nrm2.S | |||||
CNRM2KERNEL = znrm2.S | |||||
ZNRM2KERNEL = znrm2.S | |||||
SROTKERNEL = rot.S | |||||
DROTKERNEL = rot.S | |||||
CROTKERNEL = zrot.S | |||||
ZROTKERNEL = zrot.S | |||||
SSCALKERNEL = scal.S | |||||
DSCALKERNEL = scal.S | |||||
CSCALKERNEL = zscal.S | |||||
ZSCALKERNEL = zscal.S | |||||
SSWAPKERNEL = swap.S | |||||
DSWAPKERNEL = swap.S | |||||
CSWAPKERNEL = swap.S | |||||
ZSWAPKERNEL = swap.S | |||||
SGEMVNKERNEL = gemv_n.S | |||||
DGEMVNKERNEL = gemv_n.S | |||||
CGEMVNKERNEL = zgemv_n.S | |||||
ZGEMVNKERNEL = zgemv_n.S | |||||
SGEMVTKERNEL = gemv_t.S | |||||
DGEMVTKERNEL = gemv_t.S | |||||
CGEMVTKERNEL = zgemv_t.S | |||||
ZGEMVTKERNEL = zgemv_t.S | |||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
SGEMMKERNEL = sgemm_kernel_4x4.S | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
SDOTKERNEL=dot_thunderx.c | |||||
DDOTKERNEL=ddot_thunderx.c | |||||
DAXPYKERNEL=daxpy_thunderx.c | |||||
@@ -1,4 +1,137 @@ | |||||
include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
SAMINKERNEL = ../arm/amin.c | |||||
DAMINKERNEL = ../arm/amin.c | |||||
CAMINKERNEL = ../arm/zamin.c | |||||
ZAMINKERNEL = ../arm/zamin.c | |||||
SMAXKERNEL = ../arm/max.c | |||||
DMAXKERNEL = ../arm/max.c | |||||
SMINKERNEL = ../arm/min.c | |||||
DMINKERNEL = ../arm/min.c | |||||
ISAMINKERNEL = ../arm/iamin.c | |||||
IDAMINKERNEL = ../arm/iamin.c | |||||
ICAMINKERNEL = ../arm/izamin.c | |||||
IZAMINKERNEL = ../arm/izamin.c | |||||
ISMAXKERNEL = ../arm/imax.c | |||||
IDMAXKERNEL = ../arm/imax.c | |||||
ISMINKERNEL = ../arm/imin.c | |||||
IDMINKERNEL = ../arm/imin.c | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
SAMAXKERNEL = amax.S | |||||
DAMAXKERNEL = amax.S | |||||
CAMAXKERNEL = zamax.S | |||||
ZAMAXKERNEL = zamax.S | |||||
SAXPYKERNEL = axpy.S | |||||
DAXPYKERNEL = daxpy_thunderx2t99.S | |||||
CAXPYKERNEL = zaxpy.S | |||||
ZAXPYKERNEL = zaxpy.S | |||||
SROTKERNEL = rot.S | |||||
DROTKERNEL = rot.S | |||||
CROTKERNEL = zrot.S | |||||
ZROTKERNEL = zrot.S | |||||
SSCALKERNEL = scal.S | |||||
DSCALKERNEL = scal.S | |||||
CSCALKERNEL = zscal.S | |||||
ZSCALKERNEL = zscal.S | |||||
SGEMVNKERNEL = gemv_n.S | |||||
DGEMVNKERNEL = gemv_n.S | |||||
CGEMVNKERNEL = zgemv_n.S | |||||
ZGEMVNKERNEL = zgemv_n.S | |||||
SGEMVTKERNEL = gemv_t.S | |||||
DGEMVTKERNEL = gemv_t.S | |||||
CGEMVTKERNEL = zgemv_t.S | |||||
ZGEMVTKERNEL = zgemv_t.S | |||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
endif | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||||
ifeq ($(DGEMM_UNROLL_M), 8) | |||||
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S | |||||
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S | |||||
else | |||||
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||||
endif | |||||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
endif | |||||
ifeq ($(DGEMM_UNROLL_N), 4) | |||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
else | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||||
endif | |||||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||||
endif | |||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||||
ZGEMMINCOPYOBJ = zgemm_incopy.o | |||||
ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||||
endif | |||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
SASUMKERNEL = sasum_thunderx2t99.c | SASUMKERNEL = sasum_thunderx2t99.c | ||||
DASUMKERNEL = dasum_thunderx2t99.c | DASUMKERNEL = dasum_thunderx2t99.c | ||||
@@ -27,12 +160,12 @@ CNRM2KERNEL = scnrm2_thunderx2t99.c | |||||
DNRM2KERNEL = dznrm2_thunderx2t99.c | DNRM2KERNEL = dznrm2_thunderx2t99.c | ||||
ZNRM2KERNEL = dznrm2_thunderx2t99.c | ZNRM2KERNEL = dznrm2_thunderx2t99.c | ||||
DAXPYKERNEL = daxpy_thunderx2t99.S | |||||
DDOTKERNEL = dot_thunderx2t99.c | DDOTKERNEL = dot_thunderx2t99.c | ||||
SDOTKERNEL = dot_thunderx2t99.c | SDOTKERNEL = dot_thunderx2t99.c | ||||
CDOTKERNEL = zdot_thunderx2t99.c | CDOTKERNEL = zdot_thunderx2t99.c | ||||
ZDOTKERNEL = zdot_thunderx2t99.c | ZDOTKERNEL = zdot_thunderx2t99.c | ||||
DSDOTKERNEL = dot.S | |||||
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | ||||
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | ||||
@@ -1 +1,135 @@ | |||||
include $(KERNELDIR)/KERNEL.ARMV8 | |||||
SAMAXKERNEL = amax.S | |||||
DAMAXKERNEL = amax.S | |||||
CAMAXKERNEL = zamax.S | |||||
ZAMAXKERNEL = zamax.S | |||||
SAMINKERNEL = ../arm/amin.c | |||||
DAMINKERNEL = ../arm/amin.c | |||||
CAMINKERNEL = ../arm/zamin.c | |||||
ZAMINKERNEL = ../arm/zamin.c | |||||
SMAXKERNEL = ../arm/max.c | |||||
DMAXKERNEL = ../arm/max.c | |||||
SMINKERNEL = ../arm/min.c | |||||
DMINKERNEL = ../arm/min.c | |||||
ISAMAXKERNEL = iamax.S | |||||
IDAMAXKERNEL = iamax.S | |||||
ICAMAXKERNEL = izamax.S | |||||
IZAMAXKERNEL = izamax.S | |||||
ISAMINKERNEL = ../arm/iamin.c | |||||
IDAMINKERNEL = ../arm/iamin.c | |||||
ICAMINKERNEL = ../arm/izamin.c | |||||
IZAMINKERNEL = ../arm/izamin.c | |||||
ISMAXKERNEL = ../arm/imax.c | |||||
IDMAXKERNEL = ../arm/imax.c | |||||
ISMINKERNEL = ../arm/imin.c | |||||
IDMINKERNEL = ../arm/imin.c | |||||
SASUMKERNEL = asum.S | |||||
DASUMKERNEL = asum.S | |||||
CASUMKERNEL = casum.S | |||||
ZASUMKERNEL = zasum.S | |||||
SAXPYKERNEL = axpy.S | |||||
DAXPYKERNEL = axpy.S | |||||
CAXPYKERNEL = zaxpy.S | |||||
ZAXPYKERNEL = zaxpy.S | |||||
SCOPYKERNEL = copy.S | |||||
DCOPYKERNEL = copy.S | |||||
CCOPYKERNEL = copy.S | |||||
ZCOPYKERNEL = copy.S | |||||
SDOTKERNEL = dot.S | |||||
DDOTKERNEL = dot.S | |||||
CDOTKERNEL = zdot.S | |||||
ZDOTKERNEL = zdot.S | |||||
DSDOTKERNEL = dot.S | |||||
SNRM2KERNEL = nrm2.S | |||||
DNRM2KERNEL = nrm2.S | |||||
CNRM2KERNEL = znrm2.S | |||||
ZNRM2KERNEL = znrm2.S | |||||
SROTKERNEL = rot.S | |||||
DROTKERNEL = rot.S | |||||
CROTKERNEL = zrot.S | |||||
ZROTKERNEL = zrot.S | |||||
SSCALKERNEL = scal.S | |||||
DSCALKERNEL = scal.S | |||||
CSCALKERNEL = zscal.S | |||||
ZSCALKERNEL = zscal.S | |||||
SSWAPKERNEL = swap.S | |||||
DSWAPKERNEL = swap.S | |||||
CSWAPKERNEL = swap.S | |||||
ZSWAPKERNEL = swap.S | |||||
SGEMVNKERNEL = gemv_n.S | |||||
DGEMVNKERNEL = gemv_n.S | |||||
CGEMVNKERNEL = zgemv_n.S | |||||
ZGEMVNKERNEL = zgemv_n.S | |||||
SGEMVTKERNEL = gemv_t.S | |||||
DGEMVTKERNEL = gemv_t.S | |||||
CGEMVTKERNEL = zgemv_t.S | |||||
ZGEMVTKERNEL = zgemv_t.S | |||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
SGEMMKERNEL = sgemm_kernel_4x4.S | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
@@ -2583,6 +2583,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#endif | #endif | ||||
#if defined(ARMV8) | #if defined(ARMV8) | ||||
#if defined(OS_DARWIN) && defined(CROSS) | |||||
#define SNUMOPT 2 | #define SNUMOPT 2 | ||||
#define DNUMOPT 2 | #define DNUMOPT 2 | ||||
@@ -2590,13 +2592,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define GEMM_DEFAULT_OFFSET_B 0 | #define GEMM_DEFAULT_OFFSET_B 0 | ||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | #define GEMM_DEFAULT_ALIGN 0x03fffUL | ||||
#if defined(OS_DARWIN) && defined(CROSS) | |||||
#define SGEMM_DEFAULT_UNROLL_M 2 | #define SGEMM_DEFAULT_UNROLL_M 2 | ||||
#define SGEMM_DEFAULT_UNROLL N 2 | |||||
#else | |||||
#define SGEMM_DEFAULT_UNROLL_M 4 | |||||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||||
#endif | |||||
#define SGEMM_DEFAULT_UNROLL_N 2 | |||||
#define DGEMM_DEFAULT_UNROLL_M 2 | #define DGEMM_DEFAULT_UNROLL_M 2 | ||||
#define DGEMM_DEFAULT_UNROLL_N 2 | #define DGEMM_DEFAULT_UNROLL_N 2 | ||||
@@ -2622,10 +2619,48 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define CGEMM_DEFAULT_R 4096 | #define CGEMM_DEFAULT_R 4096 | ||||
#define ZGEMM_DEFAULT_R 4096 | #define ZGEMM_DEFAULT_R 4096 | ||||
#define SYMV_P 16 | |||||
#else | |||||
#define SNUMOPT 2 | |||||
#define DNUMOPT 2 | |||||
#define GEMM_DEFAULT_OFFSET_A 0 | |||||
#define GEMM_DEFAULT_OFFSET_B 0 | |||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||||
#define DGEMM_DEFAULT_UNROLL_M 8 | |||||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||||
#define CGEMM_DEFAULT_UNROLL_M 8 | |||||
#define CGEMM_DEFAULT_UNROLL_N 4 | |||||
#define ZGEMM_DEFAULT_UNROLL_M 4 | |||||
#define ZGEMM_DEFAULT_UNROLL_N 4 | |||||
#define SGEMM_DEFAULT_P sgemm_p | |||||
#define DGEMM_DEFAULT_P dgemm_p | |||||
#define CGEMM_DEFAULT_P cgemm_p | |||||
#define ZGEMM_DEFAULT_P zgemm_p | |||||
#define SGEMM_DEFAULT_Q sgemm_q | |||||
#define DGEMM_DEFAULT_Q dgemm_q | |||||
#define CGEMM_DEFAULT_Q cgemm_q | |||||
#define ZGEMM_DEFAULT_Q zgemm_q | |||||
#define SGEMM_DEFAULT_R sgemm_r | |||||
#define DGEMM_DEFAULT_R dgemm_r | |||||
#define CGEMM_DEFAULT_R cgemm_r | |||||
#define ZGEMM_DEFAULT_R zgemm_r | |||||
#define SYMV_P 16 | #define SYMV_P 16 | ||||
#endif | #endif | ||||
#endif | |||||
#if defined(THUNDERX) | #if defined(THUNDERX) | ||||
#define SNUMOPT 2 | #define SNUMOPT 2 | ||||
#define DNUMOPT 2 | #define DNUMOPT 2 | ||||