Browse Source

ARM64: Add THUNDERX3T110 Target

tags/v0.3.11^2
Ashwin Sekhar T K 5 years ago
parent
commit
4e1be0e481
12 changed files with 305 additions and 6 deletions
  1. +10
    -0
      Makefile.arm64
  2. +1
    -0
      Makefile.system
  3. +1
    -0
      TargetList.txt
  4. +1
    -1
      cmake/arch.cmake
  5. +27
    -0
      cmake/prebuild.cmake
  6. +25
    -2
      cpuid_arm64.c
  7. +7
    -1
      driver/others/dynamic_arm64.c
  8. +18
    -0
      getarch.c
  9. +1
    -1
      interface/swap.c
  10. +1
    -1
      interface/zswap.c
  11. +184
    -0
      kernel/arm64/KERNEL.THUNDERX3T110
  12. +29
    -0
      param.h

+ 10
- 0
Makefile.arm64 View File

@@ -56,6 +56,16 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif endif


ifeq ($(CORE), THUNDERX3T110)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
else
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif

ifeq ($(GCCVERSIONGTEQ9), 1) ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq ($(CORE), TSV110) ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110


+ 1
- 0
Makefile.system View File

@@ -578,6 +578,7 @@ DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99 DYNAMIC_CORE += THUNDERX2T99
DYNAMIC_CORE += TSV110 DYNAMIC_CORE += TSV110
DYNAMIC_CORE += EMAG8180 DYNAMIC_CORE += EMAG8180
DYNAMIC_CORE += THUNDERX3T110
endif endif


ifeq ($(ARCH), zarch) ifeq ($(ARCH), zarch)


+ 1
- 0
TargetList.txt View File

@@ -96,6 +96,7 @@ FALKOR
THUNDERX THUNDERX
THUNDERX2T99 THUNDERX2T99
TSV110 TSV110
THUNDERX3T110


9.System Z: 9.System Z:
ZARCH_GENERIC ZARCH_GENERIC


+ 1
- 1
cmake/arch.cmake View File

@@ -45,7 +45,7 @@ endif ()


if (DYNAMIC_ARCH) if (DYNAMIC_ARCH)
if (ARM64) if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
endif () endif ()
if (POWER) if (POWER)


+ 27
- 0
cmake/prebuild.cmake View File

@@ -338,6 +338,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4) set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16) set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
file(APPEND ${TARGET_CONF_TEMP}
"#define THUNDERX3T110\n"
"#define L1_CODE_SIZE\t65536\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t8\n"
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t8\n"
"#define L2_SIZE\t524288\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define L3_SIZE\t94371840\n"
"#define L3_LINESIZE\t64\n"
"#define L3_ASSOCIATIVE\t32\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "TSV110") elseif ("${TCORE}" STREQUAL "TSV110")
file(APPEND ${TARGET_CONF_TEMP} file(APPEND ${TARGET_CONF_TEMP}
"#define ARMV8\n" "#define ARMV8\n"


+ 25
- 2
cpuid_arm64.c View File

@@ -40,6 +40,7 @@
// Cavium // Cavium
#define CPU_THUNDERX 7 #define CPU_THUNDERX 7
#define CPU_THUNDERX2T99 8 #define CPU_THUNDERX2T99 8
#define CPU_THUNDERX3T110 12
//Hisilicon //Hisilicon
#define CPU_TSV110 9 #define CPU_TSV110 9
// Ampere // Ampere
@@ -57,7 +58,8 @@ static char *cpuname[] = {
"THUNDERX2T99", "THUNDERX2T99",
"TSV110", "TSV110",
"EMAG8180", "EMAG8180",
"NEOVERSEN1"
"NEOVERSEN1",
"THUNDERX3T110"
}; };


static char *cpuname_lower[] = { static char *cpuname_lower[] = {
@@ -72,7 +74,8 @@ static char *cpuname_lower[] = {
"thunderx2t99", "thunderx2t99",
"tsv110", "tsv110",
"emag8180", "emag8180",
"neoversen1"
"neoversen1",
"thunderx3t110"
}; };


int get_feature(char *search) int get_feature(char *search)
@@ -158,6 +161,8 @@ int detect(void)
return CPU_THUNDERX; return CPU_THUNDERX;
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af")) else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
return CPU_THUNDERX2T99; return CPU_THUNDERX2T99;
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0b8"))
return CPU_THUNDERX3T110;
// HiSilicon // HiSilicon
else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01")) else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
return CPU_TSV110; return CPU_TSV110;
@@ -372,7 +377,25 @@ void get_cpuconfig(void)
printf("#define L2_LINESIZE 64\n"); printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
break;


case CPU_THUNDERX3T110:
printf("#define THUNDERX3T110 \n");
printf("#define L1_CODE_SIZE 65536 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 524288 \n");
printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 94371840 \n");
printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
} }
get_cpucount(); get_cpucount();
} }


+ 7
- 1
driver/others/dynamic_arm64.c View File

@@ -53,10 +53,11 @@ extern gotoblas_t gotoblas_THUNDERX2T99;
extern gotoblas_t gotoblas_TSV110; extern gotoblas_t gotoblas_TSV110;
extern gotoblas_t gotoblas_EMAG8180; extern gotoblas_t gotoblas_EMAG8180;
extern gotoblas_t gotoblas_NEOVERSEN1; extern gotoblas_t gotoblas_NEOVERSEN1;
extern gotoblas_t gotoblas_THUNDERX3T110;


extern void openblas_warning(int verbose, const char * msg); extern void openblas_warning(int verbose, const char * msg);


#define NUM_CORETYPES 11
#define NUM_CORETYPES 12


/* /*
* In case asm/hwcap.h is outdated on the build system, make sure * In case asm/hwcap.h is outdated on the build system, make sure
@@ -82,6 +83,7 @@ static char *corename[] = {
"tsv110", "tsv110",
"emag8180", "emag8180",
"neoversen1", "neoversen1",
"thunderx3t110",
"unknown" "unknown"
}; };


@@ -97,6 +99,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_TSV110) return corename[ 8]; if (gotoblas == &gotoblas_TSV110) return corename[ 8];
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[11];
return corename[NUM_CORETYPES]; return corename[NUM_CORETYPES];
} }


@@ -127,6 +130,7 @@ static gotoblas_t *force_coretype(char *coretype) {
case 8: return (&gotoblas_TSV110); case 8: return (&gotoblas_TSV110);
case 9: return (&gotoblas_EMAG8180); case 9: return (&gotoblas_EMAG8180);
case 10: return (&gotoblas_NEOVERSEN1); case 10: return (&gotoblas_NEOVERSEN1);
case 11: return (&gotoblas_THUNDERX3T110);
} }
snprintf(message, 128, "Core not found: %s\n", coretype); snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message); openblas_warning(1, message);
@@ -190,6 +194,8 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_THUNDERX; return &gotoblas_THUNDERX;
case 0x0af: // ThunderX2 case 0x0af: // ThunderX2
return &gotoblas_THUNDERX2T99; return &gotoblas_THUNDERX2T99;
case 0x0b8: // ThunderX3
return &gotoblas_THUNDERX3T110;
} }
break; break;
case 0x48: // HiSilicon case 0x48: // HiSilicon


+ 18
- 0
getarch.c View File

@@ -1174,6 +1174,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "EMAG8180" #define CORENAME "EMAG8180"
#endif #endif


#ifdef FORCE_THUNDERX3T110
#define ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "THUNDERX3T110"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DTHUNDERX3T110 " \
"-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=94371840 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx3t110"
#define CORENAME "THUNDERX3T110"
#else
#endif

#ifdef FORCE_ZARCH_GENERIC #ifdef FORCE_ZARCH_GENERIC
#define FORCE #define FORCE
#define ARCHITECTURE "ZARCH" #define ARCHITECTURE "ZARCH"


+ 1
- 1
interface/swap.c View File

@@ -42,7 +42,7 @@
#include "functable.h" #include "functable.h"
#endif #endif


#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110)
// Multithreaded swap gives performance benefits in ThunderX2T99 // Multithreaded swap gives performance benefits in ThunderX2T99
#else #else
// Disable multi-threading as it does not show any performance // Disable multi-threading as it does not show any performance


+ 1
- 1
interface/zswap.c View File

@@ -42,7 +42,7 @@
#include "functable.h" #include "functable.h"
#endif #endif


#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) || defined(THUNDERX3T110)
// Multithreaded swap gives performance benefits in ThunderX2T99 // Multithreaded swap gives performance benefits in ThunderX2T99
#else #else
// Disable multi-threading as it does not show any performance // Disable multi-threading as it does not show any performance


+ 184
- 0
kernel/arm64/KERNEL.THUNDERX3T110 View File

@@ -0,0 +1,184 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c

SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c

SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c

ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c

ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c

ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S

SAXPYKERNEL = axpy.S
DAXPYKERNEL = daxpy_thunderx2t99.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S

SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S

SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S

SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S

SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S

STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S

ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))

ifeq ($(DGEMM_UNROLL_M), 8)
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
else
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
endif

DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(DGEMM_UNROLL_N), 4)
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
else
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
endif

DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)

ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

SASUMKERNEL = sasum_thunderx2t99.c
DASUMKERNEL = dasum_thunderx2t99.c
CASUMKERNEL = casum_thunderx2t99.c
ZASUMKERNEL = zasum_thunderx2t99.c

SCOPYKERNEL = copy_thunderx2t99.c
DCOPYKERNEL = copy_thunderx2t99.c
CCOPYKERNEL = copy_thunderx2t99.c
ZCOPYKERNEL = copy_thunderx2t99.c

SSWAPKERNEL = swap_thunderx2t99.S
DSWAPKERNEL = swap_thunderx2t99.S
CSWAPKERNEL = swap_thunderx2t99.S
ZSWAPKERNEL = swap_thunderx2t99.S

ISAMAXKERNEL = iamax_thunderx2t99.c
IDAMAXKERNEL = iamax_thunderx2t99.c
ICAMAXKERNEL = izamax_thunderx2t99.c
IZAMAXKERNEL = izamax_thunderx2t99.c

SNRM2KERNEL = scnrm2_thunderx2t99.c
CNRM2KERNEL = scnrm2_thunderx2t99.c
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
DNRM2KERNEL = dznrm2_thunderx2t99.c
ZNRM2KERNEL = dznrm2_thunderx2t99.c


DDOTKERNEL = dot_thunderx2t99.c
SDOTKERNEL = dot_thunderx2t99.c
CDOTKERNEL = zdot_thunderx2t99.c
ZDOTKERNEL = zdot_thunderx2t99.c
DSDOTKERNEL = dot.S

ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
endif

ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4)
SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S
endif

ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4)
CGEMMKERNEL = cgemm_kernel_8x4_thunderx2t99.S
endif

ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4)
ZGEMMKERNEL = zgemm_kernel_4x4_thunderx2t99.S
endif

+ 29
- 0
param.h View File

@@ -2779,6 +2779,35 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096 #define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096


#elif defined(THUNDERX3T110)

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4

#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4

#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4

#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4

#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 320
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128

#define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112

#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096

#elif defined(NEOVERSEN1) #elif defined(NEOVERSEN1)


#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16


Loading…
Cancel
Save