Add CORTEXA55 cpuid 0xd05 supporttags/v0.3.16^2
@@ -57,6 +57,28 @@ endif | |||||
endif | endif | ||||
endif | endif | ||||
# Use a53 tunings because a55 is only available in GCC>=8.1 | |||||
ifeq ($(CORE), CORTEXA55) | |||||
ifeq ($(GCCVERSIONGTEQ7), 1) | |||||
ifeq ($(GCCVERSIONGTEQ8), 1) | |||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 | |||||
ifneq ($(F_COMPILER), NAG) | |||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 | |||||
endif | |||||
else | |||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53 | |||||
ifneq ($(F_COMPILER), NAG) | |||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53 | |||||
endif | |||||
endif | |||||
else | |||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||||
ifneq ($(F_COMPILER), NAG) | |||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||||
endif | |||||
endif | |||||
endif | |||||
ifeq ($(CORE), THUNDERX) | ifeq ($(CORE), THUNDERX) | ||||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx | CCOMMON_OPT += -march=armv8-a -mtune=thunderx | ||||
ifneq ($(F_COMPILER), NAG) | ifneq ($(F_COMPILER), NAG) | ||||
@@ -333,6 +333,7 @@ GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) | |||||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4) | GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4) | ||||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) | GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) | ||||
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7) | GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7) | ||||
GCCVERSIONGTEQ8 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 8) | |||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) | GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) | ||||
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11) | GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11) | ||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10) | GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10) | ||||
@@ -625,6 +626,7 @@ DYNAMIC_CORE += CORTEXA57 | |||||
DYNAMIC_CORE += CORTEXA72 | DYNAMIC_CORE += CORTEXA72 | ||||
DYNAMIC_CORE += CORTEXA73 | DYNAMIC_CORE += CORTEXA73 | ||||
DYNAMIC_CORE += NEOVERSEN1 | DYNAMIC_CORE += NEOVERSEN1 | ||||
DYNAMIC_CORE += CORTEXA55 | |||||
DYNAMIC_CORE += FALKOR | DYNAMIC_CORE += FALKOR | ||||
DYNAMIC_CORE += THUNDERX | DYNAMIC_CORE += THUNDERX | ||||
DYNAMIC_CORE += THUNDERX2T99 | DYNAMIC_CORE += THUNDERX2T99 | ||||
@@ -92,6 +92,7 @@ CORTEXA57 | |||||
CORTEXA72 | CORTEXA72 | ||||
CORTEXA73 | CORTEXA73 | ||||
NEOVERSEN1 | NEOVERSEN1 | ||||
CORTEXA55 | |||||
EMAG8180 | EMAG8180 | ||||
FALKOR | FALKOR | ||||
THUNDERX | THUNDERX | ||||
@@ -44,7 +44,7 @@ endif () | |||||
if (DYNAMIC_ARCH) | if (DYNAMIC_ARCH) | ||||
if (ARM64) | if (ARM64) | ||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) | |||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) | |||||
if (DYNAMIC_LIST) | if (DYNAMIC_LIST) | ||||
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) | set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) | ||||
endif () | endif () | ||||
@@ -177,7 +177,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||||
set(ZGEMM_UNROLL_M 4) | set(ZGEMM_UNROLL_M 4) | ||||
set(ZGEMM_UNROLL_N 4) | set(ZGEMM_UNROLL_N 4) | ||||
set(SYMV_P 16) | set(SYMV_P 16) | ||||
elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53") | |||||
elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53" OR "${TCORE}" STREQUAL "CORTEXA55") | |||||
file(APPEND ${TARGET_CONF_TEMP} | file(APPEND ${TARGET_CONF_TEMP} | ||||
"#define L1_CODE_SIZE\t32768\n" | "#define L1_CODE_SIZE\t32768\n" | ||||
"#define L1_CODE_LINESIZE\t64\n" | "#define L1_CODE_LINESIZE\t64\n" | ||||
@@ -39,7 +39,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | |||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") | if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") | ||||
set(TARGET "BARCELONA") | set(TARGET "BARCELONA") | ||||
endif () | endif () | ||||
if (${TARGET} STREQUAL "ARMV8" OR ${TARGET} STREQUAL "CORTEXA57" OR ${TARGET} STREQUAL "CORTEXA53") | |||||
if (${TARGET} STREQUAL "ARMV8" OR ${TARGET} STREQUAL "CORTEXA57" OR ${TARGET} STREQUAL "CORTEXA53" OR ${TARGET} STREQUAL "CORTEXA55") | |||||
set(TARGET "ARMV7") | set(TARGET "ARMV7") | ||||
endif () | endif () | ||||
endif () | endif () | ||||
@@ -36,6 +36,7 @@ size_t length=sizeof(value); | |||||
#define CPU_ARMV8 1 | #define CPU_ARMV8 1 | ||||
// Arm | // Arm | ||||
#define CPU_CORTEXA53 2 | #define CPU_CORTEXA53 2 | ||||
#define CPU_CORTEXA55 14 | |||||
#define CPU_CORTEXA57 3 | #define CPU_CORTEXA57 3 | ||||
#define CPU_CORTEXA72 4 | #define CPU_CORTEXA72 4 | ||||
#define CPU_CORTEXA73 5 | #define CPU_CORTEXA73 5 | ||||
@@ -67,7 +68,8 @@ static char *cpuname[] = { | |||||
"EMAG8180", | "EMAG8180", | ||||
"NEOVERSEN1", | "NEOVERSEN1", | ||||
"THUNDERX3T110", | "THUNDERX3T110", | ||||
"VORTEX" | |||||
"VORTEX", | |||||
"CORTEXA55" | |||||
}; | }; | ||||
static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
@@ -84,7 +86,8 @@ static char *cpuname_lower[] = { | |||||
"emag8180", | "emag8180", | ||||
"neoversen1", | "neoversen1", | ||||
"thunderx3t110", | "thunderx3t110", | ||||
"vortex" | |||||
"vortex", | |||||
"cortexa55" | |||||
}; | }; | ||||
int get_feature(char *search) | int get_feature(char *search) | ||||
@@ -161,6 +164,8 @@ int detect(void) | |||||
return CPU_CORTEXA73; | return CPU_CORTEXA73; | ||||
else if (strstr(cpu_part, "0xd0c")) | else if (strstr(cpu_part, "0xd0c")) | ||||
return CPU_NEOVERSEN1; | return CPU_NEOVERSEN1; | ||||
else if (strstr(cpu_part, "0xd05")) | |||||
return CPU_CORTEXA55; | |||||
} | } | ||||
// Qualcomm | // Qualcomm | ||||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | ||||
@@ -281,6 +286,7 @@ void get_cpuconfig(void) | |||||
{ | { | ||||
case CPU_CORTEXA53: | case CPU_CORTEXA53: | ||||
case CPU_CORTEXA55: | |||||
printf("#define %s\n", cpuname[d]); | printf("#define %s\n", cpuname[d]); | ||||
// Fall-through | // Fall-through | ||||
case CPU_ARMV8: | case CPU_ARMV8: | ||||
@@ -99,6 +99,11 @@ extern gotoblas_t gotoblas_NEOVERSEN1; | |||||
#else | #else | ||||
#define gotoblas_NEOVERSEN1 gotoblas_ARMV8 | #define gotoblas_NEOVERSEN1 gotoblas_ARMV8 | ||||
#endif | #endif | ||||
#ifdef DYN_CORTEX_A55 | |||||
extern gotoblas_t gotoblas_CORTEXA55; | |||||
#else | |||||
#define gotoblas_CORTEXA55 gotoblas_ARMV8 | |||||
#endif | |||||
#else | #else | ||||
extern gotoblas_t gotoblas_CORTEXA53; | extern gotoblas_t gotoblas_CORTEXA53; | ||||
extern gotoblas_t gotoblas_CORTEXA57; | extern gotoblas_t gotoblas_CORTEXA57; | ||||
@@ -111,11 +116,12 @@ extern gotoblas_t gotoblas_TSV110; | |||||
extern gotoblas_t gotoblas_EMAG8180; | extern gotoblas_t gotoblas_EMAG8180; | ||||
extern gotoblas_t gotoblas_NEOVERSEN1; | extern gotoblas_t gotoblas_NEOVERSEN1; | ||||
extern gotoblas_t gotoblas_THUNDERX3T110; | extern gotoblas_t gotoblas_THUNDERX3T110; | ||||
extern gotoblas_t gotoblas_CORTEXA55; | |||||
#endif | #endif | ||||
extern void openblas_warning(int verbose, const char * msg); | extern void openblas_warning(int verbose, const char * msg); | ||||
#define NUM_CORETYPES 12 | |||||
#define NUM_CORETYPES 13 | |||||
/* | /* | ||||
* In case asm/hwcap.h is outdated on the build system, make sure | * In case asm/hwcap.h is outdated on the build system, make sure | ||||
@@ -142,6 +148,7 @@ static char *corename[] = { | |||||
"emag8180", | "emag8180", | ||||
"neoversen1", | "neoversen1", | ||||
"thunderx3t110", | "thunderx3t110", | ||||
"cortexa55", | |||||
"unknown" | "unknown" | ||||
}; | }; | ||||
@@ -158,6 +165,7 @@ char *gotoblas_corename(void) { | |||||
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; | if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; | ||||
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; | if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; | ||||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11]; | if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11]; | ||||
if (gotoblas == &gotoblas_CORTEXA55) return corename[12]; | |||||
return corename[NUM_CORETYPES]; | return corename[NUM_CORETYPES]; | ||||
} | } | ||||
@@ -189,6 +197,7 @@ static gotoblas_t *force_coretype(char *coretype) { | |||||
case 9: return (&gotoblas_EMAG8180); | case 9: return (&gotoblas_EMAG8180); | ||||
case 10: return (&gotoblas_NEOVERSEN1); | case 10: return (&gotoblas_NEOVERSEN1); | ||||
case 11: return (&gotoblas_THUNDERX3T110); | case 11: return (&gotoblas_THUNDERX3T110); | ||||
case 12: return (&gotoblas_CORTEXA55); | |||||
} | } | ||||
snprintf(message, 128, "Core not found: %s\n", coretype); | snprintf(message, 128, "Core not found: %s\n", coretype); | ||||
openblas_warning(1, message); | openblas_warning(1, message); | ||||
@@ -247,6 +256,8 @@ static gotoblas_t *get_coretype(void) { | |||||
return &gotoblas_CORTEXA73; | return &gotoblas_CORTEXA73; | ||||
case 0xd0c: // Neoverse N1 | case 0xd0c: // Neoverse N1 | ||||
return &gotoblas_NEOVERSEN1; | return &gotoblas_NEOVERSEN1; | ||||
case 0xd05: // Cortex A55 | |||||
return &gotoblas_CORTEXA55; | |||||
} | } | ||||
break; | break; | ||||
case 0x42: // Broadcom | case 0x42: // Broadcom | ||||
@@ -1159,6 +1159,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#else | #else | ||||
#endif | #endif | ||||
#ifdef FORCE_CORTEXA55 | |||||
#define FORCE | |||||
#define ARCHITECTURE "ARM64" | |||||
#define SUBARCHITECTURE "CORTEXA55" | |||||
#define SUBDIRNAME "arm64" | |||||
#define ARCHCONFIG "-DCORTEXA55 " \ | |||||
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||||
"-DL2_SIZE=65536 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
#define LIBNAME "cortexa55" | |||||
#define CORENAME "CORTEXA55" | |||||
#else | |||||
#endif | |||||
#ifdef FORCE_FALKOR | #ifdef FORCE_FALKOR | ||||
#define FORCE | #define FORCE | ||||
@@ -0,0 +1,196 @@ | |||||
SAMINKERNEL = ../arm/amin.c | |||||
DAMINKERNEL = ../arm/amin.c | |||||
CAMINKERNEL = ../arm/zamin.c | |||||
ZAMINKERNEL = ../arm/zamin.c | |||||
SMAXKERNEL = ../arm/max.c | |||||
DMAXKERNEL = ../arm/max.c | |||||
SMINKERNEL = ../arm/min.c | |||||
DMINKERNEL = ../arm/min.c | |||||
ISAMINKERNEL = ../arm/iamin.c | |||||
IDAMINKERNEL = ../arm/iamin.c | |||||
ICAMINKERNEL = ../arm/izamin.c | |||||
IZAMINKERNEL = ../arm/izamin.c | |||||
ISMAXKERNEL = ../arm/imax.c | |||||
IDMAXKERNEL = ../arm/imax.c | |||||
ISMINKERNEL = ../arm/imin.c | |||||
IDMINKERNEL = ../arm/imin.c | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
SAMAXKERNEL = amax.S | |||||
DAMAXKERNEL = amax.S | |||||
CAMAXKERNEL = zamax.S | |||||
ZAMAXKERNEL = zamax.S | |||||
SAXPYKERNEL = axpy.S | |||||
DAXPYKERNEL = axpy.S | |||||
CAXPYKERNEL = zaxpy.S | |||||
ZAXPYKERNEL = zaxpy.S | |||||
SROTKERNEL = rot.S | |||||
DROTKERNEL = rot.S | |||||
CROTKERNEL = zrot.S | |||||
ZROTKERNEL = zrot.S | |||||
SSCALKERNEL = scal.S | |||||
DSCALKERNEL = scal.S | |||||
CSCALKERNEL = zscal.S | |||||
ZSCALKERNEL = zscal.S | |||||
SGEMVNKERNEL = gemv_n.S | |||||
DGEMVNKERNEL = gemv_n.S | |||||
CGEMVNKERNEL = zgemv_n.S | |||||
ZGEMVNKERNEL = zgemv_n.S | |||||
SGEMVTKERNEL = gemv_t.S | |||||
DGEMVTKERNEL = gemv_t.S | |||||
CGEMVTKERNEL = zgemv_t.S | |||||
ZGEMVTKERNEL = zgemv_t.S | |||||
SASUMKERNEL = asum.S | |||||
DASUMKERNEL = asum.S | |||||
CASUMKERNEL = casum.S | |||||
ZASUMKERNEL = zasum.S | |||||
SCOPYKERNEL = copy.S | |||||
DCOPYKERNEL = copy.S | |||||
CCOPYKERNEL = copy.S | |||||
ZCOPYKERNEL = copy.S | |||||
SSWAPKERNEL = swap.S | |||||
DSWAPKERNEL = swap.S | |||||
CSWAPKERNEL = swap.S | |||||
ZSWAPKERNEL = swap.S | |||||
ISAMAXKERNEL = iamax.S | |||||
IDAMAXKERNEL = iamax.S | |||||
ICAMAXKERNEL = izamax.S | |||||
IZAMAXKERNEL = izamax.S | |||||
SNRM2KERNEL = nrm2.S | |||||
DNRM2KERNEL = nrm2.S | |||||
CNRM2KERNEL = znrm2.S | |||||
ZNRM2KERNEL = znrm2.S | |||||
ifneq ($(C_COMPILER), PGI) | |||||
SDOTKERNEL = ../generic/dot.c | |||||
else | |||||
SDOTKERNEL = dot.S | |||||
endif | |||||
DDOTKERNEL = dot.S | |||||
ifneq ($(C_COMPILER), PGI) | |||||
CDOTKERNEL = zdot.S | |||||
ZDOTKERNEL = zdot.S | |||||
else | |||||
CDOTKERNEL = ../arm/zdot.c | |||||
ZDOTKERNEL = ../arm/zdot.c | |||||
endif | |||||
DSDOTKERNEL = dot.S | |||||
DGEMM_BETA = dgemm_beta.S | |||||
SGEMM_BETA = sgemm_beta.S | |||||
ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 8x8) | |||||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_cortexa53.S | |||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_cortexa53.S | |||||
else | |||||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
endif | |||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||||
ifeq ($(SGEMM_UNROLL_M), 16) | |||||
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S | |||||
else | |||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
endif | |||||
ifeq ($(SGEMM_UNROLL_M), 4) | |||||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S | |||||
else | |||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||||
endif | |||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||||
ifeq ($(DGEMM_UNROLL_M), 8) | |||||
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S | |||||
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S | |||||
else | |||||
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||||
endif | |||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
ifeq ($(DGEMM_UNROLL_N), 4) | |||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
else | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||||
endif | |||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) |
@@ -2959,7 +2959,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||||
#define CGEMM_DEFAULT_R 4096 | #define CGEMM_DEFAULT_R 4096 | ||||
#define ZGEMM_DEFAULT_R 2048 | #define ZGEMM_DEFAULT_R 2048 | ||||
#elif defined(CORTEXA53) | |||||
#elif defined(CORTEXA53) || defined(CORTEXA55) | |||||
#define SGEMM_DEFAULT_UNROLL_M 8 | #define SGEMM_DEFAULT_UNROLL_M 8 | ||||
#define SGEMM_DEFAULT_UNROLL_N 8 | #define SGEMM_DEFAULT_UNROLL_N 8 | ||||