Browse Source

Merge pull request #3278 from brada4/A55

Add CORTEXA55 cpuid 0xd05 support
tags/v0.3.16^2
Martin Kroeker GitHub 4 years ago
parent
commit
1a8b6134c2
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 260 additions and 7 deletions
  1. +22
    -0
      Makefile.arm64
  2. +2
    -0
      Makefile.system
  3. +1
    -0
      TargetList.txt
  4. +1
    -1
      cmake/arch.cmake
  5. +1
    -1
      cmake/prebuild.cmake
  6. +1
    -1
      cmake/system.cmake
  7. +8
    -2
      cpuid_arm64.c
  8. +12
    -1
      driver/others/dynamic_arm64.c
  9. +15
    -0
      getarch.c
  10. +196
    -0
      kernel/arm64/KERNEL.CORTEXA55
  11. +1
    -1
      param.h

+ 22
- 0
Makefile.arm64 View File

@@ -57,6 +57,28 @@ endif
endif
endif

# Use a53 tunings because a55 is only available in GCC>=8.1
ifeq ($(CORE), CORTEXA55)
ifeq ($(GCCVERSIONGTEQ7), 1)
ifeq ($(GCCVERSIONGTEQ8), 1)
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif
endif
endif

ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
ifneq ($(F_COMPILER), NAG)


+ 2
- 0
Makefile.system View File

@@ -333,6 +333,7 @@ GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
GCCVERSIONGTEQ8 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 8)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
@@ -625,6 +626,7 @@ DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += CORTEXA72
DYNAMIC_CORE += CORTEXA73
DYNAMIC_CORE += NEOVERSEN1
DYNAMIC_CORE += CORTEXA55
DYNAMIC_CORE += FALKOR
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99


+ 1
- 0
TargetList.txt View File

@@ -92,6 +92,7 @@ CORTEXA57
CORTEXA72
CORTEXA73
NEOVERSEN1
CORTEXA55
EMAG8180
FALKOR
THUNDERX


+ 1
- 1
cmake/arch.cmake View File

@@ -44,7 +44,7 @@ endif ()

if (DYNAMIC_ARCH)
if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
if (DYNAMIC_LIST)
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})
endif ()


+ 1
- 1
cmake/prebuild.cmake View File

@@ -177,7 +177,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53")
elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53" OR "${TCORE}" STREQUAL "CORTEXA55")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t32768\n"
"#define L1_CODE_LINESIZE\t64\n"


+ 1
- 1
cmake/system.cmake View File

@@ -39,7 +39,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
set(TARGET "BARCELONA")
endif ()
if (${TARGET} STREQUAL "ARMV8" OR ${TARGET} STREQUAL "CORTEXA57" OR ${TARGET} STREQUAL "CORTEXA53")
if (${TARGET} STREQUAL "ARMV8" OR ${TARGET} STREQUAL "CORTEXA57" OR ${TARGET} STREQUAL "CORTEXA53" OR ${TARGET} STREQUAL "CORTEXA55")
set(TARGET "ARMV7")
endif ()
endif ()


+ 8
- 2
cpuid_arm64.c View File

@@ -36,6 +36,7 @@ size_t length=sizeof(value);
#define CPU_ARMV8 1
// Arm
#define CPU_CORTEXA53 2
#define CPU_CORTEXA55 14
#define CPU_CORTEXA57 3
#define CPU_CORTEXA72 4
#define CPU_CORTEXA73 5
@@ -67,7 +68,8 @@ static char *cpuname[] = {
"EMAG8180",
"NEOVERSEN1",
"THUNDERX3T110",
"VORTEX"
"VORTEX",
"CORTEXA55"
};

static char *cpuname_lower[] = {
@@ -84,7 +86,8 @@ static char *cpuname_lower[] = {
"emag8180",
"neoversen1",
"thunderx3t110",
"vortex"
"vortex",
"cortexa55"
};

int get_feature(char *search)
@@ -161,6 +164,8 @@ int detect(void)
return CPU_CORTEXA73;
else if (strstr(cpu_part, "0xd0c"))
return CPU_NEOVERSEN1;
else if (strstr(cpu_part, "0xd05"))
return CPU_CORTEXA55;
}
// Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
@@ -281,6 +286,7 @@ void get_cpuconfig(void)
{

case CPU_CORTEXA53:
case CPU_CORTEXA55:
printf("#define %s\n", cpuname[d]);
// Fall-through
case CPU_ARMV8:


+ 12
- 1
driver/others/dynamic_arm64.c View File

@@ -99,6 +99,11 @@ extern gotoblas_t gotoblas_NEOVERSEN1;
#else
#define gotoblas_NEOVERSEN1 gotoblas_ARMV8
#endif
#ifdef DYN_CORTEX_A55
extern gotoblas_t gotoblas_CORTEXA55;
#else
#define gotoblas_CORTEXA55 gotoblas_ARMV8
#endif
#else
extern gotoblas_t gotoblas_CORTEXA53;
extern gotoblas_t gotoblas_CORTEXA57;
@@ -111,11 +116,12 @@ extern gotoblas_t gotoblas_TSV110;
extern gotoblas_t gotoblas_EMAG8180;
extern gotoblas_t gotoblas_NEOVERSEN1;
extern gotoblas_t gotoblas_THUNDERX3T110;
extern gotoblas_t gotoblas_CORTEXA55;
#endif

extern void openblas_warning(int verbose, const char * msg);

#define NUM_CORETYPES 12
#define NUM_CORETYPES 13

/*
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -142,6 +148,7 @@ static char *corename[] = {
"emag8180",
"neoversen1",
"thunderx3t110",
"cortexa55",
"unknown"
};

@@ -158,6 +165,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11];
if (gotoblas == &gotoblas_CORTEXA55) return corename[12];
return corename[NUM_CORETYPES];
}

@@ -189,6 +197,7 @@ static gotoblas_t *force_coretype(char *coretype) {
case 9: return (&gotoblas_EMAG8180);
case 10: return (&gotoblas_NEOVERSEN1);
case 11: return (&gotoblas_THUNDERX3T110);
case 12: return (&gotoblas_CORTEXA55);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
@@ -247,6 +256,8 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_CORTEXA73;
case 0xd0c: // Neoverse N1
return &gotoblas_NEOVERSEN1;
case 0xd05: // Cortex A55
return &gotoblas_CORTEXA55;
}
break;
case 0x42: // Broadcom


+ 15
- 0
getarch.c View File

@@ -1159,6 +1159,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif

#ifdef FORCE_CORTEXA55
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA55"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA55 " \
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
"-DL2_SIZE=65536 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa55"
#define CORENAME "CORTEXA55"
#else
#endif

#ifdef FORCE_FALKOR
#define FORCE


+ 196
- 0
kernel/arm64/KERNEL.CORTEXA55 View File

@@ -0,0 +1,196 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c

SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c

SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c

ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c

ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c

ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S

SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S

SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S

SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S

SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S

SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S


SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S

SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S

SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S

ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S

SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S

ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S

DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S

ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 8x8)
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_cortexa53.S
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_cortexa53.S
else
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
endif
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
ifeq ($(SGEMM_UNROLL_M), 16)
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
else
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
endif
ifeq ($(SGEMM_UNROLL_M), 4)
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
else
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
endif
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif

SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S

ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))

ifeq ($(DGEMM_UNROLL_M), 8)
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
else
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
endif

DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(DGEMM_UNROLL_N), 4)
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
else
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
endif

DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)

ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

+ 1
- 1
param.h View File

@@ -2959,7 +2959,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 2048

#elif defined(CORTEXA53)
#elif defined(CORTEXA53) || defined(CORTEXA55)

#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_N 8


Loading…
Cancel
Save