* Update DYNAMIC_ARCH list of ARM64 targets for gmake * Update arm64 cpu list for runtime detection * Update DYNAMIC_ARCH list of ARM64 targets for cmake and add POWERPC targetstags/v0.3.8^2
@@ -39,7 +39,10 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
endif | |||
ifeq ($(GCCVERSIONGTEQ9), 1) | |||
ifeq ($(CORE), TSV110) | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 | |||
endif | |||
endif | |||
@@ -326,6 +326,7 @@ ifeq ($(C_COMPILER), GCC) | |||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) | |||
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4) | |||
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) | |||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) | |||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) | |||
ifeq ($(GCCVERSIONGT4), 1) | |||
# GCC Major version > 4 | |||
@@ -547,9 +548,14 @@ endif | |||
ifeq ($(ARCH), arm64) | |||
DYNAMIC_CORE = ARMV8 | |||
DYNAMIC_CORE += CORTEXA53 | |||
DYNAMIC_CORE += CORTEXA57 | |||
DYNAMIC_CORE += CORTEXA72 | |||
DYNAMIC_CORE += CORTEXA73 | |||
DYNAMIC_CORE += FALKOR | |||
DYNAMIC_CORE += THUNDERX | |||
DYNAMIC_CORE += THUNDERX2T99 | |||
DYNAMIC_CORE += TSV110 | |||
endif | |||
ifeq ($(ARCH), power) | |||
@@ -45,7 +45,11 @@ endif () | |||
if (DYNAMIC_ARCH) | |||
if (ARM64) | |||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99) | |||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110) | |||
endif () | |||
if (POWER) | |||
set(DYNAMIC_CORE POWER6 POWER8 POWER9) | |||
endif () | |||
if (X86) | |||
@@ -309,6 +309,83 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||
set(ZGEMM_UNROLL_M 4) | |||
set(ZGEMM_UNROLL_N 4) | |||
set(SYMV_P 16) | |||
elseif ("${TCORE}" STREQUAL "TSV110") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define ARMV8\n" | |||
"#define L1_CODE_SIZE\t65536\n" | |||
"#define L1_CODE_LINESIZE\t64\n" | |||
"#define L1_CODE_ASSOCIATIVE\t4\n" | |||
"#define L1_DATA_SIZE\t65536\n" | |||
"#define L1_DATA_LINESIZE\t64\n" | |||
"#define L1_DATA_ASSOCIATIVE\t4\n" | |||
"#define L2_SIZE\t524288\n" | |||
"#define L2_LINESIZE\t64\n" | |||
"#define L2_ASSOCIATIVE\t8\n" | |||
"#define DTB_DEFAULT_ENTRIES\t64\n" | |||
"#define DTB_SIZE\t4096\n") | |||
set(SGEMM_UNROLL_M 16) | |||
set(SGEMM_UNROLL_N 4) | |||
set(DGEMM_UNROLL_M 8) | |||
set(DGEMM_UNROLL_N 4) | |||
set(CGEMM_UNROLL_M 8) | |||
set(CGEMM_UNROLL_N 4) | |||
set(ZGEMM_UNROLL_M 4) | |||
set(ZGEMM_UNROLL_N 4) | |||
set(SYMV_P 16) | |||
elseif ("${TCORE}" STREQUAL "POWER6") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define L1_DATA_SIZE 32768\n" | |||
"#define L1_DATA_LINESIZE 128\n" | |||
"#define L2_SIZE 524288\n" | |||
"#define L2_LINESIZE 128 \n" | |||
"#define DTB_DEFAULT_ENTRIES 128\n" | |||
"#define DTB_SIZE 4096\n" | |||
"#define L2_ASSOCIATIVE 8\n") | |||
set(SGEMM_UNROLL_M 4) | |||
set(SGEMM_UNROLL_N 4) | |||
set(DGEMM_UNROLL_M 4) | |||
set(DGEMM_UNROLL_N 4) | |||
set(CGEMM_UNROLL_M 2) | |||
set(CGEMM_UNROLL_N 4) | |||
set(ZGEMM_UNROLL_M 2) | |||
set(ZGEMM_UNROLL_N 4) | |||
set(SYMV_P 8) | |||
elseif ("${TCORE}" STREQUAL "POWER8") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define L1_DATA_SIZE 32768\n" | |||
"#define L1_DATA_LINESIZE 128\n" | |||
"#define L2_SIZE 524288\n" | |||
"#define L2_LINESIZE 128 \n" | |||
"#define DTB_DEFAULT_ENTRIES 128\n" | |||
"#define DTB_SIZE 4096\n" | |||
"#define L2_ASSOCIATIVE 8\n") | |||
set(SGEMM_UNROLL_M 16) | |||
set(SGEMM_UNROLL_N 8) | |||
set(DGEMM_UNROLL_M 16) | |||
set(DGEMM_UNROLL_N 4) | |||
set(CGEMM_UNROLL_M 8) | |||
set(CGEMM_UNROLL_N 4) | |||
set(ZGEMM_UNROLL_M 8) | |||
set(ZGEMM_UNROLL_N 2) | |||
set(SYMV_P 8) | |||
elseif ("${TCORE}" STREQUAL "POWER9") | |||
file(APPEND ${TARGET_CONF_TEMP} | |||
"#define L1_DATA_SIZE 32768\n" | |||
"#define L1_DATA_LINESIZE 128\n" | |||
"#define L2_SIZE 524288\n" | |||
"#define L2_LINESIZE 128 \n" | |||
"#define DTB_DEFAULT_ENTRIES 128\n" | |||
"#define DTB_SIZE 4096\n" | |||
"#define L2_ASSOCIATIVE 8\n") | |||
set(SGEMM_UNROLL_M 16) | |||
set(SGEMM_UNROLL_N 8) | |||
set(DGEMM_UNROLL_M 16) | |||
set(DGEMM_UNROLL_N 4) | |||
set(CGEMM_UNROLL_M 8) | |||
set(CGEMM_UNROLL_N 4) | |||
set(ZGEMM_UNROLL_M 8) | |||
set(ZGEMM_UNROLL_N 2) | |||
set(SYMV_P 8) | |||
endif() | |||
# Or should this actually be NUM_CORES? | |||
@@ -43,13 +43,18 @@ | |||
#endif | |||
extern gotoblas_t gotoblas_ARMV8; | |||
extern gotoblas_t gotoblas_CORTEXA53; | |||
extern gotoblas_t gotoblas_CORTEXA57; | |||
extern gotoblas_t gotoblas_CORTEXA72; | |||
extern gotoblas_t gotoblas_CORTEXA73; | |||
extern gotoblas_t gotoblas_FALKOR; | |||
extern gotoblas_t gotoblas_THUNDERX; | |||
extern gotoblas_t gotoblas_THUNDERX2T99; | |||
extern gotoblas_t gotoblas_TSV110; | |||
extern void openblas_warning(int verbose, const char * msg); | |||
#define NUM_CORETYPES 4 | |||
#define NUM_CORETYPES 9 | |||
/* | |||
* In case asm/hwcap.h is outdated on the build system, make sure | |||
@@ -65,17 +70,27 @@ extern void openblas_warning(int verbose, const char * msg); | |||
static char *corename[] = { | |||
"armv8", | |||
"cortexa53", | |||
"cortexa57", | |||
"cortexa72", | |||
"cortexa73", | |||
"falkor", | |||
"thunderx", | |||
"thunderx2t99", | |||
"tsv110", | |||
"unknown" | |||
}; | |||
char *gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_ARMV8) return corename[ 0]; | |||
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1]; | |||
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2]; | |||
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3]; | |||
if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1]; | |||
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2]; | |||
if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3]; | |||
if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4]; | |||
if (gotoblas == &gotoblas_FALKOR) return corename[ 5]; | |||
if (gotoblas == &gotoblas_THUNDERX) return corename[ 6]; | |||
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7]; | |||
if (gotoblas == &gotoblas_TSV110) return corename[ 8]; | |||
return corename[NUM_CORETYPES]; | |||
} | |||
@@ -96,9 +111,14 @@ static gotoblas_t *force_coretype(char *coretype) { | |||
switch (found) | |||
{ | |||
case 0: return (&gotoblas_ARMV8); | |||
case 1: return (&gotoblas_CORTEXA57); | |||
case 2: return (&gotoblas_THUNDERX); | |||
case 3: return (&gotoblas_THUNDERX2T99); | |||
case 1: return (&gotoblas_CORTEXA53); | |||
case 2: return (&gotoblas_CORTEXA57); | |||
case 3: return (&gotoblas_CORTEXA72); | |||
case 4: return (&gotoblas_CORTEXA73); | |||
case 5: return (&gotoblas_FALKOR); | |||
case 6: return (&gotoblas_THUNDERX); | |||
case 7: return (&gotoblas_THUNDERX2T99); | |||
case 8: return (&gotoblas_TSV110); | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
openblas_warning(1, message); | |||
@@ -136,10 +156,14 @@ static gotoblas_t *get_coretype(void) { | |||
case 0x41: // ARM | |||
switch (part) | |||
{ | |||
case 0xd07: // Cortex A57 | |||
case 0xd08: // Cortex A72 | |||
case 0xd03: // Cortex A53 | |||
return &gotoblas_CORTEXA53; | |||
case 0xd07: // Cortex A57 | |||
return &gotoblas_CORTEXA57; | |||
case 0xd08: // Cortex A72 | |||
return &gotoblas_CORTEXA72; | |||
case 0xd09: // Cortex A73 | |||
return &gotoblas_CORTEXA73; | |||
} | |||
break; | |||
case 0x42: // Broadcom | |||
@@ -158,6 +182,20 @@ static gotoblas_t *get_coretype(void) { | |||
return &gotoblas_THUNDERX2T99; | |||
} | |||
break; | |||
case 0x48: // HiSilicon | |||
switch (part) | |||
{ | |||
case 0xd01: // tsv110 | |||
return &gotoblas_TSV110; | |||
} | |||
break; | |||
case 0x51: // Qualcomm | |||
switch (part) | |||
{ | |||
case 0xc00: // Falkor | |||
return &gotoblas_FALKOR; | |||
} | |||
break; | |||
} | |||
return NULL; | |||
} | |||