@@ -689,6 +689,7 @@ ifneq ($(NO_SVE), 1) | |||
DYNAMIC_CORE += NEOVERSEV1 | |||
DYNAMIC_CORE += NEOVERSEN2 | |||
DYNAMIC_CORE += ARMV8SVE | |||
DYNAMIC_CORE += A64FX | |||
endif | |||
DYNAMIC_CORE += THUNDERX | |||
DYNAMIC_CORE += THUNDERX2T99 | |||
@@ -46,7 +46,7 @@ if (DYNAMIC_ARCH) | |||
if (ARM64) | |||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) | |||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99) | |||
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE) | |||
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX) | |||
endif () | |||
if (DYNAMIC_LIST) | |||
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) | |||
@@ -1218,6 +1218,37 @@ endif () | |||
set(ZGEMM_UNROLL_M 4) | |||
set(ZGEMM_UNROLL_N 4) | |||
set(SYMV_P 16) | |||
elseif ("${TCORE}" STREQUAL "A64FX") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define L1_CODE_SIZE\t65536\n" | |||
"#define L1_CODE_LINESIZE\t256\n" | |||
"#define L1_CODE_ASSOCIATIVE\t8\n" | |||
"#define L1_DATA_SIZE\t32768\n" | |||
"#define L1_DATA_LINESIZE\t256\n" | |||
"#define L1_DATA_ASSOCIATIVE\t8\n" | |||
"#define L2_SIZE\t8388608\n\n" | |||
"#define L2_LINESIZE\t256\n" | |||
"#define L2_ASSOCIATIVE\t8\n" | |||
"#define L3_SIZE\t0\n\n" | |||
"#define L3_LINESIZE\t0\n\n" | |||
"#define L3_ASSOCIATIVE\t0\n\n" | |||
"#define DTB_DEFAULT_ENTRIES\t64\n" | |||
"#define DTB_SIZE\t4096\n" | |||
"#define HAVE_VFPV4\n" | |||
"#define HAVE_VFPV3\n" | |||
"#define HAVE_VFP\n" | |||
"#define HAVE_NEON\n" | |||
"#define HAVE_SVE\n" | |||
"#define ARMV8\n") | |||
set(SGEMM_UNROLL_M 4) | |||
set(SGEMM_UNROLL_N 8) | |||
set(DGEMM_UNROLL_M 2) | |||
set(DGEMM_UNROLL_N 8) | |||
set(CGEMM_UNROLL_M 2) | |||
set(CGEMM_UNROLL_N 4) | |||
set(ZGEMM_UNROLL_M 2) | |||
set(ZGEMM_UNROLL_N 4) | |||
set(SYMV_P 16) | |||
elseif ("${TCORE}" STREQUAL "P5600") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define L2_SIZE 1048576\n" | |||
@@ -310,6 +310,18 @@ if (${TARGET} STREQUAL NEOVERSEV1) | |||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve") | |||
endif() | |||
endif() | |||
if (${TARGET} STREQUAL A64FX) | |||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) | |||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx") | |||
else () | |||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) | |||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve -mtune=a64fx") | |||
else () | |||
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support A64FX.") | |||
endif() | |||
endif() | |||
endif() | |||
endif() | |||
@@ -120,6 +120,11 @@ extern gotoblas_t gotoblas_CORTEXA55; | |||
#else | |||
#define gotoblas_CORTEXA55 gotoblas_ARMV8 | |||
#endif | |||
#ifdef DYN_A64FX | |||
extern gotoblas_t gotoblas_A64FX; | |||
#else | |||
#define gotoblas_A64FX gotoblas_ARMV8 | |||
#endif | |||
#else | |||
extern gotoblas_t gotoblas_CORTEXA53; | |||
#define gotoblas_CORTEXA55 gotoblas_CORTEXA53 | |||
@@ -136,10 +141,12 @@ extern gotoblas_t gotoblas_NEOVERSEN1; | |||
extern gotoblas_t gotoblas_NEOVERSEV1; | |||
extern gotoblas_t gotoblas_NEOVERSEN2; | |||
extern gotoblas_t gotoblas_ARMV8SVE; | |||
extern gotoblas_t gotoblas_A64FX; | |||
#else | |||
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8 | |||
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8 | |||
#define gotoblas_ARMV8SVE gotoblas_ARMV8 | |||
#define gotoblas_A64FX gotoblas_ARMV8 | |||
#endif | |||
extern gotoblas_t gotoblas_THUNDERX3T110; | |||
#endif | |||
@@ -149,7 +156,7 @@ extern void openblas_warning(int verbose, const char * msg); | |||
#define FALLBACK_VERBOSE 1 | |||
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n" | |||
#define NUM_CORETYPES 17 | |||
#define NUM_CORETYPES 18 | |||
/* | |||
* In case asm/hwcap.h is outdated on the build system, make sure | |||
@@ -184,6 +191,7 @@ static char *corename[] = { | |||
"thunderx3t110", | |||
"cortexa55", | |||
"armv8sve", | |||
"a64fx", | |||
"unknown" | |||
}; | |||
@@ -205,6 +213,7 @@ char *gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14]; | |||
if (gotoblas == &gotoblas_CORTEXA55) return corename[15]; | |||
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16]; | |||
if (gotoblas == &gotoblas_A64FX) return corename[17]; | |||
return corename[NUM_CORETYPES]; | |||
} | |||
@@ -241,6 +250,7 @@ static gotoblas_t *force_coretype(char *coretype) { | |||
case 14: return (&gotoblas_THUNDERX3T110); | |||
case 15: return (&gotoblas_CORTEXA55); | |||
case 16: return (&gotoblas_ARMV8SVE); | |||
case 17: return (&gotoblas_A64FX); | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
openblas_warning(1, message); | |||
@@ -346,6 +356,15 @@ static gotoblas_t *get_coretype(void) { | |||
return &gotoblas_THUNDERX3T110; | |||
} | |||
break; | |||
case 0x46: // Fujitsu | |||
switch (part) | |||
{ | |||
#ifndef NO_SVE | |||
case 0x001: // A64FX | |||
return &gotoblas_A64FX; | |||
#endif | |||
} | |||
break; | |||
case 0x48: // HiSilicon | |||
switch (part) | |||
{ | |||