diff --git a/Makefile.arm64 b/Makefile.arm64 index 5d75eef30..b27852691 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -191,6 +191,16 @@ endif endif endif +# Detect Ampere AmpereOne(ampere1,ampere1a) processors. +ifeq ($(CORE), AMPERE1) +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG))) +CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng +endif +endif +endif + # Use a53 tunings because a55 is only available in GCC>=8.1 ifeq ($(CORE), CORTEXA55) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) diff --git a/Makefile.system b/Makefile.system index 38646c3c6..9d7fab937 100644 --- a/Makefile.system +++ b/Makefile.system @@ -393,6 +393,8 @@ GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9) GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10) GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11) GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12) +GCCVERSIONGTEQ13 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 13) +GCCVERSIONGTEQ14 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 14) # Note that the behavior of -dumpversion is compile-time-configurable for # gcc-7.x and newer. Use -dumpfullversion there ifeq ($(GCCVERSIONGTEQ7),1) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 2bf93cc87..fd6a9bd47 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -79,6 +79,7 @@ size_t length64=sizeof(value64); #define CPU_TSV110 9 // Ampere #define CPU_EMAG8180 10 +#define CPU_AMPERE1 25 // Apple #define CPU_VORTEX 13 // Fujitsu @@ -111,7 +112,8 @@ static char *cpuname[] = { "CORTEXA710", "FT2000", "CORTEXA76", - "NEOVERSEV2" + "NEOVERSEV2", + "AMPERE1" }; static char *cpuname_lower[] = { @@ -139,7 +141,9 @@ static char *cpuname_lower[] = { "cortexa710", "ft2000", "cortexa76", - "neoversev2" + "neoversev2", + "ampere1", + "ampere1a" }; static int cpulowperf=0; @@ -334,6 +338,10 @@ int detect(void) // Ampere else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) return CPU_EMAG8180; + else if (strstr(cpu_implementer, "0xc0")) { + if (strstr(cpu_part, "0xac3") || strstr(cpu_part, "0xac4")) + return CPU_AMPERE1; + } // Fujitsu else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) return CPU_A64FX; @@ -684,6 +692,21 @@ void get_cpuconfig(void) printf("#define DTB_SIZE 4096\n"); break; + case CPU_AMPERE1: + printf("#define %s\n", cpuname[d]); + printf("#define L1_CODE_SIZE 16384\n"); + printf("#define L1_CODE_LINESIZE 64\n"); + printf("#define L1_CODE_ASSOCIATIVE 4\n"); + printf("#define L1_DATA_SIZE 65536\n"); + printf("#define L1_DATA_LINESIZE 64\n"); + printf("#define L1_DATA_ASSOCIATIVE 4\n"); + printf("#define L2_SIZE 2097152\n"); + printf("#define L2_LINESIZE 64\n"); + printf("#define L2_ASSOCIATIVE 8\n"); + printf("#define DTB_DEFAULT_ENTRIES 64\n"); + printf("#define DTB_SIZE 4096\n"); + break; + case CPU_THUNDERX3T110: printf("#define THUNDERX3T110 \n"); printf("#define L1_CODE_SIZE 65536 \n"); diff --git a/getarch.c b/getarch.c index b51c3ed64..cb0b3cd7c 100644 --- a/getarch.c +++ b/getarch.c @@ -158,6 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_CSKY */ /* #define FORCE_CK860FV */ /* #define FORCE_GENERIC */ +/* #define FORCE_AMPERE1 */ #ifdef FORCE_P2 #define FORCE @@ -1590,6 +1591,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "EMAG8180" #endif +#ifdef FORCE_AMPERE1 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "AMPERE1" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DAMPERE1 " \ + "-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \ + "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \ + "-march=armv8.6-a+crypto+crc+fp16+sha3+rng" +#define LIBNAME "ampere1" +#define CORENAME "AMPERE1" +#endif + #ifdef FORCE_THUNDERX3T110 #define ARMV8 #define FORCE @@ -1820,7 +1837,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "CK860FV" #endif - #ifndef FORCE #ifdef USER_TARGET diff --git a/kernel/arm64/KERNEL.AMPERE1 b/kernel/arm64/KERNEL.AMPERE1 new file mode 100644 index 000000000..46a34469c --- /dev/null +++ b/kernel/arm64/KERNEL.AMPERE1 @@ -0,0 +1 @@ +include $(KERNELDIR)/KERNEL.NEOVERSEN1 diff --git a/param.h b/param.h index 48b64fd2a..d5537d2ad 100644 --- a/param.h +++ b/param.h @@ -3635,6 +3635,41 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 +#elif defined(AMPERE1) + +#if defined(XDOUBLE) || defined(DOUBLE) +#define SWITCH_RATIO 8 +#else +#define SWITCH_RATIO 16 +#endif + +#define SGEMM_DEFAULT_UNROLL_M 16 +#define SGEMM_DEFAULT_UNROLL_N 4 + +#define DGEMM_DEFAULT_UNROLL_M 8 +#define DGEMM_DEFAULT_UNROLL_N 4 + +#define CGEMM_DEFAULT_UNROLL_M 8 +#define CGEMM_DEFAULT_UNROLL_N 4 + +#define ZGEMM_DEFAULT_UNROLL_M 4 +#define ZGEMM_DEFAULT_UNROLL_N 4 + +#define SGEMM_DEFAULT_P 240 +#define DGEMM_DEFAULT_P 240 +#define CGEMM_DEFAULT_P 128 +#define ZGEMM_DEFAULT_P 128 + +#define SGEMM_DEFAULT_Q 640 +#define DGEMM_DEFAULT_Q 320 +#define CGEMM_DEFAULT_Q 224 +#define ZGEMM_DEFAULT_Q 112 + +#define SGEMM_DEFAULT_R 4096 +#define DGEMM_DEFAULT_R 4096 +#define CGEMM_DEFAULT_R 4096 +#define ZGEMM_DEFAULT_R 4096 + #elif defined(A64FX) // 512-bit SVE /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".