Browse Source

Merge pull request #5309 from davidz-ampere/dev-ampereone

Add support for Ampere AmpereOne processors
pull/5336/head
Martin Kroeker GitHub 3 months ago
parent
commit
ee26caffb3
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
6 changed files with 90 additions and 3 deletions
  1. +10
    -0
      Makefile.arm64
  2. +2
    -0
      Makefile.system
  3. +25
    -2
      cpuid_arm64.c
  4. +17
    -1
      getarch.c
  5. +1
    -0
      kernel/arm64/KERNEL.AMPERE1
  6. +35
    -0
      param.h

+ 10
- 0
Makefile.arm64 View File

@@ -191,6 +191,16 @@ endif
endif endif
endif endif


# Detect Ampere AmpereOne(ampere1,ampere1a) processors.
ifeq ($(CORE), AMPERE1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
endif
endif
endif

# Use a53 tunings because a55 is only available in GCC>=8.1 # Use a53 tunings because a55 is only available in GCC>=8.1
ifeq ($(CORE), CORTEXA55) ifeq ($(CORE), CORTEXA55)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))


+ 2
- 0
Makefile.system View File

@@ -393,6 +393,8 @@ GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10) GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11) GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12) GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12)
GCCVERSIONGTEQ13 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 13)
GCCVERSIONGTEQ14 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 14)
# Note that the behavior of -dumpversion is compile-time-configurable for # Note that the behavior of -dumpversion is compile-time-configurable for
# gcc-7.x and newer. Use -dumpfullversion there # gcc-7.x and newer. Use -dumpfullversion there
ifeq ($(GCCVERSIONGTEQ7),1) ifeq ($(GCCVERSIONGTEQ7),1)


+ 25
- 2
cpuid_arm64.c View File

@@ -79,6 +79,7 @@ size_t length64=sizeof(value64);
#define CPU_TSV110 9 #define CPU_TSV110 9
// Ampere // Ampere
#define CPU_EMAG8180 10 #define CPU_EMAG8180 10
#define CPU_AMPERE1 25
// Apple // Apple
#define CPU_VORTEX 13 #define CPU_VORTEX 13
// Fujitsu // Fujitsu
@@ -111,7 +112,8 @@ static char *cpuname[] = {
"CORTEXA710", "CORTEXA710",
"FT2000", "FT2000",
"CORTEXA76", "CORTEXA76",
"NEOVERSEV2"
"NEOVERSEV2",
"AMPERE1"
}; };


static char *cpuname_lower[] = { static char *cpuname_lower[] = {
@@ -139,7 +141,9 @@ static char *cpuname_lower[] = {
"cortexa710", "cortexa710",
"ft2000", "ft2000",
"cortexa76", "cortexa76",
"neoversev2"
"neoversev2",
"ampere1",
"ampere1a"
}; };


static int cpulowperf=0; static int cpulowperf=0;
@@ -334,6 +338,10 @@ int detect(void)
// Ampere // Ampere
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
return CPU_EMAG8180; return CPU_EMAG8180;
else if (strstr(cpu_implementer, "0xc0")) {
if (strstr(cpu_part, "0xac3") || strstr(cpu_part, "0xac4"))
return CPU_AMPERE1;
}
// Fujitsu // Fujitsu
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
return CPU_A64FX; return CPU_A64FX;
@@ -684,6 +692,21 @@ void get_cpuconfig(void)
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break; break;


case CPU_AMPERE1:
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 16384\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 4\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 4\n");
printf("#define L2_SIZE 2097152\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 8\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;

case CPU_THUNDERX3T110: case CPU_THUNDERX3T110:
printf("#define THUNDERX3T110 \n"); printf("#define THUNDERX3T110 \n");
printf("#define L1_CODE_SIZE 65536 \n"); printf("#define L1_CODE_SIZE 65536 \n");


+ 17
- 1
getarch.c View File

@@ -158,6 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_CSKY */ /* #define FORCE_CSKY */
/* #define FORCE_CK860FV */ /* #define FORCE_CK860FV */
/* #define FORCE_GENERIC */ /* #define FORCE_GENERIC */
/* #define FORCE_AMPERE1 */


#ifdef FORCE_P2 #ifdef FORCE_P2
#define FORCE #define FORCE
@@ -1590,6 +1591,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "EMAG8180" #define CORENAME "EMAG8180"
#endif #endif


#ifdef FORCE_AMPERE1
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "AMPERE1"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DAMPERE1 " \
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \
"-march=armv8.6-a+crypto+crc+fp16+sha3+rng"
#define LIBNAME "ampere1"
#define CORENAME "AMPERE1"
#endif

#ifdef FORCE_THUNDERX3T110 #ifdef FORCE_THUNDERX3T110
#define ARMV8 #define ARMV8
#define FORCE #define FORCE
@@ -1820,7 +1837,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "CK860FV" #define CORENAME "CK860FV"
#endif #endif



#ifndef FORCE #ifndef FORCE


#ifdef USER_TARGET #ifdef USER_TARGET


+ 1
- 0
kernel/arm64/KERNEL.AMPERE1 View File

@@ -0,0 +1 @@
include $(KERNELDIR)/KERNEL.NEOVERSEN1

+ 35
- 0
param.h View File

@@ -3635,6 +3635,41 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096


#elif defined(AMPERE1)

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#else
#define SWITCH_RATIO 16
#endif

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4

#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4

#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4

#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4

#define SGEMM_DEFAULT_P 240
#define DGEMM_DEFAULT_P 240
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128

#define SGEMM_DEFAULT_Q 640
#define DGEMM_DEFAULT_Q 320
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112

#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096

#elif defined(A64FX) // 512-bit SVE #elif defined(A64FX) // 512-bit SVE


/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl". /* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".


Loading…
Cancel
Save