@@ -56,6 +56,16 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
endif | |||
ifeq ($(CORE), THUNDERX3T110) | |||
ifeq ($(GCCVERSIONGTEQ10), 1) | |||
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110 | |||
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110 | |||
else | |||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
endif | |||
endif | |||
ifeq ($(GCCVERSIONGTEQ9), 1) | |||
ifeq ($(CORE), TSV110) | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 | |||
@@ -578,6 +578,7 @@ DYNAMIC_CORE += THUNDERX | |||
DYNAMIC_CORE += THUNDERX2T99 | |||
DYNAMIC_CORE += TSV110 | |||
DYNAMIC_CORE += EMAG8180 | |||
DYNAMIC_CORE += THUNDERX3T110 | |||
endif | |||
ifeq ($(ARCH), zarch) | |||
@@ -96,6 +96,7 @@ FALKOR | |||
THUNDERX | |||
THUNDERX2T99 | |||
TSV110 | |||
THUNDERX3T110 | |||
9.System Z: | |||
ZARCH_GENERIC | |||
@@ -45,7 +45,7 @@ endif () | |||
if (DYNAMIC_ARCH) | |||
if (ARM64) | |||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1) | |||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) | |||
endif () | |||
if (POWER) | |||
@@ -338,6 +338,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||
set(ZGEMM_UNROLL_M 4) | |||
set(ZGEMM_UNROLL_N 4) | |||
set(SYMV_P 16) | |||
elseif ("${TCORE}" STREQUAL "THUNDERX3T110") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define THUNDERX3T110\n" | |||
"#define L1_CODE_SIZE\t65536\n" | |||
"#define L1_CODE_LINESIZE\t64\n" | |||
"#define L1_CODE_ASSOCIATIVE\t8\n" | |||
"#define L1_DATA_SIZE\t65536\n" | |||
"#define L1_DATA_LINESIZE\t64\n" | |||
"#define L1_DATA_ASSOCIATIVE\t8\n" | |||
"#define L2_SIZE\t524288\n" | |||
"#define L2_LINESIZE\t64\n" | |||
"#define L2_ASSOCIATIVE\t8\n" | |||
"#define L3_SIZE\t94371840\n" | |||
"#define L3_LINESIZE\t64\n" | |||
"#define L3_ASSOCIATIVE\t32\n" | |||
"#define DTB_DEFAULT_ENTRIES\t64\n" | |||
"#define DTB_SIZE\t4096\n" | |||
"#define ARMV8\n") | |||
set(SGEMM_UNROLL_M 16) | |||
set(SGEMM_UNROLL_N 4) | |||
set(DGEMM_UNROLL_M 8) | |||
set(DGEMM_UNROLL_N 4) | |||
set(CGEMM_UNROLL_M 8) | |||
set(CGEMM_UNROLL_N 4) | |||
set(ZGEMM_UNROLL_M 4) | |||
set(ZGEMM_UNROLL_N 4) | |||
set(SYMV_P 16) | |||
elseif ("${TCORE}" STREQUAL "TSV110") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define ARMV8\n" | |||
@@ -40,6 +40,7 @@ | |||
// Cavium | |||
#define CPU_THUNDERX 7 | |||
#define CPU_THUNDERX2T99 8 | |||
#define CPU_THUNDERX3T110 12 | |||
//Hisilicon | |||
#define CPU_TSV110 9 | |||
// Ampere | |||
@@ -57,7 +58,8 @@ static char *cpuname[] = { | |||
"THUNDERX2T99", | |||
"TSV110", | |||
"EMAG8180", | |||
"NEOVERSEN1" | |||
"NEOVERSEN1", | |||
"THUNDERX3T110" | |||
}; | |||
static char *cpuname_lower[] = { | |||
@@ -72,7 +74,8 @@ static char *cpuname_lower[] = { | |||
"thunderx2t99", | |||
"tsv110", | |||
"emag8180", | |||
"neoversen1" | |||
"neoversen1", | |||
"thunderx3t110" | |||
}; | |||
int get_feature(char *search) | |||
@@ -158,6 +161,8 @@ int detect(void) | |||
return CPU_THUNDERX; | |||
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af")) | |||
return CPU_THUNDERX2T99; | |||
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0b8")) | |||
return CPU_THUNDERX3T110; | |||
// HiSilicon | |||
else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01")) | |||
return CPU_TSV110; | |||
@@ -372,7 +377,25 @@ void get_cpuconfig(void) | |||
printf("#define L2_LINESIZE 64\n"); | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
break; | |||
case CPU_THUNDERX3T110: | |||
printf("#define THUNDERX3T110 \n"); | |||
printf("#define L1_CODE_SIZE 65536 \n"); | |||
printf("#define L1_CODE_LINESIZE 64 \n"); | |||
printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | |||
printf("#define L1_DATA_SIZE 32768 \n"); | |||
printf("#define L1_DATA_LINESIZE 64 \n"); | |||
printf("#define L1_DATA_ASSOCIATIVE 8 \n"); | |||
printf("#define L2_SIZE 524288 \n"); | |||
printf("#define L2_LINESIZE 64 \n"); | |||
printf("#define L2_ASSOCIATIVE 8 \n"); | |||
printf("#define L3_SIZE 94371840 \n"); | |||
printf("#define L3_LINESIZE 64 \n"); | |||
printf("#define L3_ASSOCIATIVE 32 \n"); | |||
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
printf("#define DTB_SIZE 4096 \n"); | |||
break; | |||
} | |||
get_cpucount(); | |||
} | |||
@@ -53,10 +53,11 @@ extern gotoblas_t gotoblas_THUNDERX2T99; | |||
extern gotoblas_t gotoblas_TSV110; | |||
extern gotoblas_t gotoblas_EMAG8180; | |||
extern gotoblas_t gotoblas_NEOVERSEN1; | |||
extern gotoblas_t gotoblas_THUNDERX3T110; | |||
extern void openblas_warning(int verbose, const char * msg); | |||
#define NUM_CORETYPES 11 | |||
#define NUM_CORETYPES 12 | |||
/* | |||
* In case asm/hwcap.h is outdated on the build system, make sure | |||
@@ -82,6 +83,7 @@ static char *corename[] = { | |||
"tsv110", | |||
"emag8180", | |||
"neoversen1", | |||
"thunderx3t110", | |||
"unknown" | |||
}; | |||
@@ -97,6 +99,7 @@ char *gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_TSV110) return corename[ 8]; | |||
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; | |||
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; | |||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11]; | |||
return corename[NUM_CORETYPES]; | |||
} | |||
@@ -127,6 +130,7 @@ static gotoblas_t *force_coretype(char *coretype) { | |||
case 8: return (&gotoblas_TSV110); | |||
case 9: return (&gotoblas_EMAG8180); | |||
case 10: return (&gotoblas_NEOVERSEN1); | |||
case 11: return (&gotoblas_THUNDERX3T110); | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
openblas_warning(1, message); | |||
@@ -190,6 +194,8 @@ static gotoblas_t *get_coretype(void) { | |||
return &gotoblas_THUNDERX; | |||
case 0x0af: // ThunderX2 | |||
return &gotoblas_THUNDERX2T99; | |||
case 0x0b8: // ThunderX3 | |||
return &gotoblas_THUNDERX3T110; | |||
} | |||
break; | |||
case 0x48: // HiSilicon | |||
@@ -1174,6 +1174,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CORENAME "EMAG8180" | |||
#endif | |||
#ifdef FORCE_THUNDERX3T110 | |||
#define ARMV8 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM64" | |||
#define SUBARCHITECTURE "THUNDERX3T110" | |||
#define SUBDIRNAME "arm64" | |||
#define ARCHCONFIG "-DTHUNDERX3T110 " \ | |||
"-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||
"-DL3_SIZE=94371840 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "thunderx3t110" | |||
#define CORENAME "THUNDERX3T110" | |||
#else | |||
#endif | |||
#ifdef FORCE_ZARCH_GENERIC | |||
#define FORCE | |||
#define ARCHITECTURE "ZARCH" | |||
@@ -42,7 +42,7 @@ | |||
#include "functable.h" | |||
#endif | |||
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) | |||
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110) | |||
// Multithreaded swap gives performance benefits in ThunderX2T99 | |||
#else | |||
// Disable multi-threading as it does not show any performance | |||
@@ -42,7 +42,7 @@ | |||
#include "functable.h" | |||
#endif | |||
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) | |||
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110) | |||
// Multithreaded swap gives performance benefits in ThunderX2T99 | |||
#else | |||
// Disable multi-threading as it does not show any performance | |||
@@ -0,0 +1,184 @@ | |||
SAMINKERNEL = ../arm/amin.c | |||
DAMINKERNEL = ../arm/amin.c | |||
CAMINKERNEL = ../arm/zamin.c | |||
ZAMINKERNEL = ../arm/zamin.c | |||
SMAXKERNEL = ../arm/max.c | |||
DMAXKERNEL = ../arm/max.c | |||
SMINKERNEL = ../arm/min.c | |||
DMINKERNEL = ../arm/min.c | |||
ISAMINKERNEL = ../arm/iamin.c | |||
IDAMINKERNEL = ../arm/iamin.c | |||
ICAMINKERNEL = ../arm/izamin.c | |||
IZAMINKERNEL = ../arm/izamin.c | |||
ISMAXKERNEL = ../arm/imax.c | |||
IDMAXKERNEL = ../arm/imax.c | |||
ISMINKERNEL = ../arm/imin.c | |||
IDMINKERNEL = ../arm/imin.c | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
SAMAXKERNEL = amax.S | |||
DAMAXKERNEL = amax.S | |||
CAMAXKERNEL = zamax.S | |||
ZAMAXKERNEL = zamax.S | |||
SAXPYKERNEL = axpy.S | |||
DAXPYKERNEL = daxpy_thunderx2t99.S | |||
CAXPYKERNEL = zaxpy.S | |||
ZAXPYKERNEL = zaxpy.S | |||
SROTKERNEL = rot.S | |||
DROTKERNEL = rot.S | |||
CROTKERNEL = zrot.S | |||
ZROTKERNEL = zrot.S | |||
SSCALKERNEL = scal.S | |||
DSCALKERNEL = scal.S | |||
CSCALKERNEL = zscal.S | |||
ZSCALKERNEL = zscal.S | |||
SGEMVNKERNEL = gemv_n.S | |||
DGEMVNKERNEL = gemv_n.S | |||
CGEMVNKERNEL = zgemv_n.S | |||
ZGEMVNKERNEL = zgemv_n.S | |||
SGEMVTKERNEL = gemv_t.S | |||
DGEMVTKERNEL = gemv_t.S | |||
CGEMVTKERNEL = zgemv_t.S | |||
ZGEMVTKERNEL = zgemv_t.S | |||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||
ifeq ($(DGEMM_UNROLL_M), 8) | |||
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S | |||
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S | |||
else | |||
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
endif | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(DGEMM_UNROLL_N), 4) | |||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
else | |||
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
endif | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
SASUMKERNEL = sasum_thunderx2t99.c | |||
DASUMKERNEL = dasum_thunderx2t99.c | |||
CASUMKERNEL = casum_thunderx2t99.c | |||
ZASUMKERNEL = zasum_thunderx2t99.c | |||
SCOPYKERNEL = copy_thunderx2t99.c | |||
DCOPYKERNEL = copy_thunderx2t99.c | |||
CCOPYKERNEL = copy_thunderx2t99.c | |||
ZCOPYKERNEL = copy_thunderx2t99.c | |||
SSWAPKERNEL = swap_thunderx2t99.S | |||
DSWAPKERNEL = swap_thunderx2t99.S | |||
CSWAPKERNEL = swap_thunderx2t99.S | |||
ZSWAPKERNEL = swap_thunderx2t99.S | |||
ISAMAXKERNEL = iamax_thunderx2t99.c | |||
IDAMAXKERNEL = iamax_thunderx2t99.c | |||
ICAMAXKERNEL = izamax_thunderx2t99.c | |||
IZAMAXKERNEL = izamax_thunderx2t99.c | |||
SNRM2KERNEL = scnrm2_thunderx2t99.c | |||
CNRM2KERNEL = scnrm2_thunderx2t99.c | |||
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c | |||
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c | |||
DNRM2KERNEL = dznrm2_thunderx2t99.c | |||
ZNRM2KERNEL = dznrm2_thunderx2t99.c | |||
DDOTKERNEL = dot_thunderx2t99.c | |||
SDOTKERNEL = dot_thunderx2t99.c | |||
CDOTKERNEL = zdot_thunderx2t99.c | |||
ZDOTKERNEL = zdot_thunderx2t99.c | |||
DSDOTKERNEL = dot.S | |||
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | |||
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | |||
endif | |||
ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4) | |||
SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S | |||
endif | |||
ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4) | |||
CGEMMKERNEL = cgemm_kernel_8x4_thunderx2t99.S | |||
endif | |||
ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4) | |||
ZGEMMKERNEL = zgemm_kernel_4x4_thunderx2t99.S | |||
endif |
@@ -2779,6 +2779,35 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#elif defined(THUNDERX3T110) | |||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||
#define DGEMM_DEFAULT_UNROLL_M 8 | |||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||
#define CGEMM_DEFAULT_UNROLL_M 8 | |||
#define CGEMM_DEFAULT_UNROLL_N 4 | |||
#define ZGEMM_DEFAULT_UNROLL_M 4 | |||
#define ZGEMM_DEFAULT_UNROLL_N 4 | |||
#define SGEMM_DEFAULT_P 128 | |||
#define DGEMM_DEFAULT_P 320 | |||
#define CGEMM_DEFAULT_P 128 | |||
#define ZGEMM_DEFAULT_P 128 | |||
#define SGEMM_DEFAULT_Q 352 | |||
#define DGEMM_DEFAULT_Q 128 | |||
#define CGEMM_DEFAULT_Q 224 | |||
#define ZGEMM_DEFAULT_Q 112 | |||
#define SGEMM_DEFAULT_R 4096 | |||
#define DGEMM_DEFAULT_R 4096 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#elif defined(NEOVERSEN1) | |||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||