Add support for Ampere AmpereOne processorspull/5336/head
@@ -191,6 +191,16 @@ endif | |||||
endif | endif | ||||
endif | endif | ||||
# Detect Ampere AmpereOne(ampere1,ampere1a) processors. | |||||
ifeq ($(CORE), AMPERE1) | |||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG))) | |||||
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng | |||||
ifneq ($(F_COMPILER), NAG) | |||||
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng | |||||
endif | |||||
endif | |||||
endif | |||||
# Use a53 tunings because a55 is only available in GCC>=8.1 | # Use a53 tunings because a55 is only available in GCC>=8.1 | ||||
ifeq ($(CORE), CORTEXA55) | ifeq ($(CORE), CORTEXA55) | ||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) | ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) | ||||
@@ -393,6 +393,8 @@ GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) | |||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10) | GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10) | ||||
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11) | GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11) | ||||
GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12) | GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12) | ||||
GCCVERSIONGTEQ13 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 13) | |||||
GCCVERSIONGTEQ14 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 14) | |||||
# Note that the behavior of -dumpversion is compile-time-configurable for | # Note that the behavior of -dumpversion is compile-time-configurable for | ||||
# gcc-7.x and newer. Use -dumpfullversion there | # gcc-7.x and newer. Use -dumpfullversion there | ||||
ifeq ($(GCCVERSIONGTEQ7),1) | ifeq ($(GCCVERSIONGTEQ7),1) | ||||
@@ -79,6 +79,7 @@ size_t length64=sizeof(value64); | |||||
#define CPU_TSV110 9 | #define CPU_TSV110 9 | ||||
// Ampere | // Ampere | ||||
#define CPU_EMAG8180 10 | #define CPU_EMAG8180 10 | ||||
#define CPU_AMPERE1 25 | |||||
// Apple | // Apple | ||||
#define CPU_VORTEX 13 | #define CPU_VORTEX 13 | ||||
// Fujitsu | // Fujitsu | ||||
@@ -111,7 +112,8 @@ static char *cpuname[] = { | |||||
"CORTEXA710", | "CORTEXA710", | ||||
"FT2000", | "FT2000", | ||||
"CORTEXA76", | "CORTEXA76", | ||||
"NEOVERSEV2" | |||||
"NEOVERSEV2", | |||||
"AMPERE1" | |||||
}; | }; | ||||
static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
@@ -139,7 +141,9 @@ static char *cpuname_lower[] = { | |||||
"cortexa710", | "cortexa710", | ||||
"ft2000", | "ft2000", | ||||
"cortexa76", | "cortexa76", | ||||
"neoversev2" | |||||
"neoversev2", | |||||
"ampere1", | |||||
"ampere1a" | |||||
}; | }; | ||||
static int cpulowperf=0; | static int cpulowperf=0; | ||||
@@ -334,6 +338,10 @@ int detect(void) | |||||
// Ampere | // Ampere | ||||
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) | else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) | ||||
return CPU_EMAG8180; | return CPU_EMAG8180; | ||||
else if (strstr(cpu_implementer, "0xc0")) { | |||||
if (strstr(cpu_part, "0xac3") || strstr(cpu_part, "0xac4")) | |||||
return CPU_AMPERE1; | |||||
} | |||||
// Fujitsu | // Fujitsu | ||||
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | ||||
return CPU_A64FX; | return CPU_A64FX; | ||||
@@ -684,6 +692,21 @@ void get_cpuconfig(void) | |||||
printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
break; | break; | ||||
case CPU_AMPERE1: | |||||
printf("#define %s\n", cpuname[d]); | |||||
printf("#define L1_CODE_SIZE 16384\n"); | |||||
printf("#define L1_CODE_LINESIZE 64\n"); | |||||
printf("#define L1_CODE_ASSOCIATIVE 4\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 64\n"); | |||||
printf("#define L1_DATA_ASSOCIATIVE 4\n"); | |||||
printf("#define L2_SIZE 2097152\n"); | |||||
printf("#define L2_LINESIZE 64\n"); | |||||
printf("#define L2_ASSOCIATIVE 8\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
break; | |||||
case CPU_THUNDERX3T110: | case CPU_THUNDERX3T110: | ||||
printf("#define THUNDERX3T110 \n"); | printf("#define THUNDERX3T110 \n"); | ||||
printf("#define L1_CODE_SIZE 65536 \n"); | printf("#define L1_CODE_SIZE 65536 \n"); | ||||
@@ -158,6 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
/* #define FORCE_CSKY */ | /* #define FORCE_CSKY */ | ||||
/* #define FORCE_CK860FV */ | /* #define FORCE_CK860FV */ | ||||
/* #define FORCE_GENERIC */ | /* #define FORCE_GENERIC */ | ||||
/* #define FORCE_AMPERE1 */ | |||||
#ifdef FORCE_P2 | #ifdef FORCE_P2 | ||||
#define FORCE | #define FORCE | ||||
@@ -1590,6 +1591,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define CORENAME "EMAG8180" | #define CORENAME "EMAG8180" | ||||
#endif | #endif | ||||
#ifdef FORCE_AMPERE1 | |||||
#define FORCE | |||||
#define ARCHITECTURE "ARM64" | |||||
#define SUBARCHITECTURE "AMPERE1" | |||||
#define SUBDIRNAME "arm64" | |||||
#define ARCHCONFIG "-DAMPERE1 " \ | |||||
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \ | |||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \ | |||||
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \ | |||||
"-march=armv8.6-a+crypto+crc+fp16+sha3+rng" | |||||
#define LIBNAME "ampere1" | |||||
#define CORENAME "AMPERE1" | |||||
#endif | |||||
#ifdef FORCE_THUNDERX3T110 | #ifdef FORCE_THUNDERX3T110 | ||||
#define ARMV8 | #define ARMV8 | ||||
#define FORCE | #define FORCE | ||||
@@ -1820,7 +1837,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define CORENAME "CK860FV" | #define CORENAME "CK860FV" | ||||
#endif | #endif | ||||
#ifndef FORCE | #ifndef FORCE | ||||
#ifdef USER_TARGET | #ifdef USER_TARGET | ||||
@@ -0,0 +1 @@ | |||||
include $(KERNELDIR)/KERNEL.NEOVERSEN1 |
@@ -3635,6 +3635,41 @@ is a big desktop or server with abundant cache rather than a phone or embedded d | |||||
#define CGEMM_DEFAULT_R 4096 | #define CGEMM_DEFAULT_R 4096 | ||||
#define ZGEMM_DEFAULT_R 4096 | #define ZGEMM_DEFAULT_R 4096 | ||||
#elif defined(AMPERE1) | |||||
#if defined(XDOUBLE) || defined(DOUBLE) | |||||
#define SWITCH_RATIO 8 | |||||
#else | |||||
#define SWITCH_RATIO 16 | |||||
#endif | |||||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||||
#define DGEMM_DEFAULT_UNROLL_M 8 | |||||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||||
#define CGEMM_DEFAULT_UNROLL_M 8 | |||||
#define CGEMM_DEFAULT_UNROLL_N 4 | |||||
#define ZGEMM_DEFAULT_UNROLL_M 4 | |||||
#define ZGEMM_DEFAULT_UNROLL_N 4 | |||||
#define SGEMM_DEFAULT_P 240 | |||||
#define DGEMM_DEFAULT_P 240 | |||||
#define CGEMM_DEFAULT_P 128 | |||||
#define ZGEMM_DEFAULT_P 128 | |||||
#define SGEMM_DEFAULT_Q 640 | |||||
#define DGEMM_DEFAULT_Q 320 | |||||
#define CGEMM_DEFAULT_Q 224 | |||||
#define ZGEMM_DEFAULT_Q 112 | |||||
#define SGEMM_DEFAULT_R 4096 | |||||
#define DGEMM_DEFAULT_R 4096 | |||||
#define CGEMM_DEFAULT_R 4096 | |||||
#define ZGEMM_DEFAULT_R 4096 | |||||
#elif defined(A64FX) // 512-bit SVE | #elif defined(A64FX) // 512-bit SVE | ||||
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". | ||||