Browse Source

Merge pull request #1589 from fenrus75/skylakex

Initial support for SkylakeX / AVX512
tags/v0.3.1
Martin Kroeker GitHub 7 years ago
parent
commit
cf234a0561
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
58 changed files with 12253 additions and 50 deletions
  1. +20
    -0
      Makefile.system
  2. +1
    -0
      TargetList.txt
  3. +3
    -0
      cmake/arch.cmake
  4. +1
    -1
      cmake/system.cmake
  5. +3
    -0
      cpuid.h
  6. +32
    -0
      cpuid_x86.c
  7. +26
    -4
      driver/others/dynamic.c
  8. +2
    -2
      driver/others/parameter.c
  9. +15
    -0
      getarch.c
  10. +1
    -1
      kernel/CMakeLists.txt
  11. +4
    -0
      kernel/Makefile.L3
  12. +16
    -0
      kernel/setparam-ref.c
  13. +1
    -1
      kernel/x86/trsm_kernel_LN_2x4_penryn.S
  14. +1
    -1
      kernel/x86/trsm_kernel_LN_4x4_penryn.S
  15. +1
    -1
      kernel/x86/trsm_kernel_LT_2x4_penryn.S
  16. +1
    -1
      kernel/x86/trsm_kernel_LT_4x4_penryn.S
  17. +1
    -1
      kernel/x86/trsm_kernel_RT_2x4_penryn.S
  18. +1
    -1
      kernel/x86/trsm_kernel_RT_4x4_penryn.S
  19. +1
    -1
      kernel/x86/ztrsm_kernel_LN_2x2_penryn.S
  20. +1
    -1
      kernel/x86/ztrsm_kernel_LT_1x2_penryn.S
  21. +1
    -1
      kernel/x86/ztrsm_kernel_LT_2x2_penryn.S
  22. +1
    -1
      kernel/x86/ztrsm_kernel_RT_1x2_penryn.S
  23. +1
    -1
      kernel/x86/ztrsm_kernel_RT_2x2_penryn.S
  24. +19
    -0
      kernel/x86_64/KERNEL.SKYLAKEX
  25. +1
    -1
      kernel/x86_64/caxpy.c
  26. +1
    -1
      kernel/x86_64/cdot.c
  27. +1
    -1
      kernel/x86_64/cgemv_n_4.c
  28. +1
    -1
      kernel/x86_64/cgemv_t_4.c
  29. +1
    -1
      kernel/x86_64/cscal.c
  30. +1
    -1
      kernel/x86_64/daxpy.c
  31. +1
    -1
      kernel/x86_64/ddot.c
  32. +5138
    -0
      kernel/x86_64/dgemm_kernel_16x2_skylakex.S
  33. +1
    -1
      kernel/x86_64/dgemv_n_4.c
  34. +1
    -1
      kernel/x86_64/dgemv_t_4.c
  35. +1
    -1
      kernel/x86_64/dscal.c
  36. +1
    -1
      kernel/x86_64/dsymv_L.c
  37. +1
    -1
      kernel/x86_64/dsymv_U.c
  38. +1
    -1
      kernel/x86_64/saxpy.c
  39. +1
    -1
      kernel/x86_64/sdot.c
  40. +6811
    -0
      kernel/x86_64/sgemm_kernel_16x4_skylakex.S
  41. +1
    -1
      kernel/x86_64/sgemv_n_4.c
  42. +1
    -1
      kernel/x86_64/sgemv_t_4.c
  43. +1
    -1
      kernel/x86_64/ssymv_L.c
  44. +1
    -1
      kernel/x86_64/ssymv_U.c
  45. +1
    -1
      kernel/x86_64/symv_L_sse.S
  46. +1
    -1
      kernel/x86_64/symv_L_sse2.S
  47. +1
    -1
      kernel/x86_64/symv_U_sse.S
  48. +1
    -1
      kernel/x86_64/symv_U_sse2.S
  49. +1
    -1
      kernel/x86_64/zaxpy.c
  50. +1
    -1
      kernel/x86_64/zdot.c
  51. +1
    -1
      kernel/x86_64/zgemv_n_4.c
  52. +1
    -1
      kernel/x86_64/zgemv_t_4.c
  53. +1
    -1
      kernel/x86_64/zscal.c
  54. +1
    -1
      kernel/x86_64/zsymv_L_sse.S
  55. +1
    -1
      kernel/x86_64/zsymv_L_sse2.S
  56. +1
    -1
      kernel/x86_64/zsymv_U_sse.S
  57. +1
    -1
      kernel/x86_64/zsymv_U_sse2.S
  58. +119
    -0
      param.h

+ 20
- 0
Makefile.system View File

@@ -62,6 +62,9 @@ ifeq ($(BINARY), 32)
ifeq ($(TARGET), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
@@ -95,6 +98,9 @@ ifeq ($(BINARY), 32)
ifeq ($(TARGET_CORE), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
@@ -141,6 +147,10 @@ ifeq ($(NO_AVX2), 1)
GETARCH_FLAGS += -DNO_AVX2
endif

ifeq ($(NO_AVX512), 1)
GETARCH_FLAGS += -DNO_AVX512
endif

ifeq ($(DEBUG), 1)
GETARCH_FLAGS += -g
endif
@@ -469,6 +479,11 @@ endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL ZEN
endif
ifneq ($(NO_AVX512), 1)
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += SKYLAKEX
endif
endif
endif

# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
@@ -924,6 +939,10 @@ ifeq ($(NO_AVX2), 1)
CCOMMON_OPT += -DNO_AVX2
endif

ifeq ($(NO_AVX512), 1)
CCOMMON_OPT += -DNO_AVX512
endif

ifdef SMP
CCOMMON_OPT += -DSMP_SERVER

@@ -1230,6 +1249,7 @@ export MSA_FLAGS
export KERNELDIR
export FUNCTION_PROFILE
export TARGET_CORE
export NO_AVX512

export SGEMM_UNROLL_M
export SGEMM_UNROLL_N


+ 1
- 0
TargetList.txt View File

@@ -20,6 +20,7 @@ DUNNINGTON
NEHALEM
SANDYBRIDGE
HASWELL
SKYLAKEX
ATOM

b)AMD CPU:


+ 3
- 0
cmake/arch.cmake View File

@@ -56,6 +56,9 @@ if (DYNAMIC_ARCH)
if (NOT NO_AVX2)
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
endif ()
if (NOT NO_AVX512)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
endif ()
endif ()

if (NOT DYNAMIC_CORE)


+ 1
- 1
cmake/system.cmake View File

@@ -33,7 +33,7 @@ endif ()
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")


+ 3
- 0
cpuid.h View File

@@ -115,6 +115,7 @@
#define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26
#define CORE_ZEN 27
#define CORE_SKYLAKEX 28

#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -137,6 +138,7 @@
#define HAVE_AVX (1 << 18)
#define HAVE_FMA4 (1 << 19)
#define HAVE_FMA3 (1 << 20)
#define HAVE_AVX512VL (1 << 21)

#define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2
@@ -211,5 +213,6 @@ typedef struct {
#define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50
#define CPUTYPE_ZEN 51
#define CPUTYPE_SKYLAKEX 52

#endif

+ 32
- 0
cpuid_x86.c View File

@@ -50,6 +50,8 @@
#ifdef NO_AVX
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
#define CORE_HASWELL CORE_NEHALEM
#define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
#define CORE_SKYLAKEX CORE_NEHALEM
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
#define CORE_SANDYBRIDGE CORE_NEHALEM
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
@@ -1299,6 +1301,19 @@ int get_cpuname(void){
else
return CPUTYPE_NEHALEM;
case 5:
// Skylake X
#ifndef NO_AVX512
return CPUTYPE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
#endif
case 14:
// Skylake
if(support_avx())
@@ -1556,6 +1571,7 @@ static char *cpuname[] = {
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
"SKYLAKEX"
};

static char *lowercpuname[] = {
@@ -1610,6 +1626,7 @@ static char *lowercpuname[] = {
"steamroller",
"excavator",
"zen",
"skylakex"
};

static char *corename[] = {
@@ -1641,6 +1658,7 @@ static char *corename[] = {
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
"SKYLAKEX"
};

static char *corename_lower[] = {
@@ -1672,6 +1690,7 @@ static char *corename_lower[] = {
"steamroller",
"excavator",
"zen",
"skylakex"
};


@@ -1860,6 +1879,19 @@ int get_coretype(void){
else
return CORE_NEHALEM;
case 5:
// Skylake X
#ifndef NO_AVX512
return CORE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
#endif
case 14:
// Skylake
if(support_avx())


+ 26
- 4
driver/others/dynamic.c View File

@@ -74,15 +74,22 @@ extern gotoblas_t gotoblas_STEAMROLLER;
extern gotoblas_t gotoblas_EXCAVATOR;
#ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
#else
extern gotoblas_t gotoblas_HASWELL;
extern gotoblas_t gotoblas_ZEN;
#ifndef NO_AVX512
extern gotoblas_t gotoblas_SKYLAKEX;
#else
#define gotoblas_SKYLAKEX gotoblas_HASWELL
#endif
#endif
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#define gotoblas_HASWELL gotoblas_NEHALEM
#define gotoblas_SKYLAKEX gotoblas_NEHALEM
#define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
@@ -284,8 +291,21 @@ static gotoblas_t *get_coretype(void){
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
}
}
if (model == 5) {
// Intel Skylake X
#ifndef NO_AVX512
return &gotoblas_SKYLAKEX;
#else
if(support_avx())
return &gotoblas_HASWELL;
else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM;
}
#endif
}
//Intel Skylake
if (model == 14 || model == 5) {
if (model == 14) {
if(support_avx())
return &gotoblas_HASWELL;
else{
@@ -445,7 +465,8 @@ static char *corename[] = {
"Haswell",
"Steamroller",
"Excavator",
"Zen"
"Zen",
"SkylakeX"
};

char *gotoblas_corename(void) {
@@ -473,7 +494,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21];
if (gotoblas == &gotoblas_EXCAVATOR) return corename[22];
if (gotoblas == &gotoblas_ZEN) return corename[23];
if (gotoblas == &gotoblas_SKYLAKEX) return corename[24];
return corename[0];
}

@@ -485,7 +506,7 @@ static gotoblas_t *force_coretype(char *coretype){
char message[128];
//char mname[20];

for ( i=1 ; i <= 23; i++)
for ( i=1 ; i <= 24; i++)
{
if (!strncasecmp(coretype,corename[i],20))
{
@@ -503,6 +524,7 @@ static gotoblas_t *force_coretype(char *coretype){

switch (found)
{
case 24: return (&gotoblas_SKYLAKEX);
case 23: return (&gotoblas_ZEN);
case 22: return (&gotoblas_EXCAVATOR);
case 21: return (&gotoblas_STEAMROLLER);


+ 2
- 2
driver/others/parameter.c View File

@@ -167,7 +167,7 @@ int get_L2_size(void){
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)

cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

@@ -251,7 +251,7 @@ int get_L2_size(void){
void blas_set_parameter(void){

int factor;
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
int size = 16;
#else
int size = get_L2_size();


+ 15
- 0
getarch.c View File

@@ -326,6 +326,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "HASWELL"
#endif

#ifdef FORCE_SKYLAKEX
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "SKYLAKEX"
#define ARCHCONFIG "-DSKYLAKEX " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
#define LIBNAME "skylakex"
#define CORENAME "SKYLAKEX"
#endif

#ifdef FORCE_ATOM
#define FORCE
#define FORCE_INTEL


+ 1
- 1
kernel/CMakeLists.txt View File

@@ -121,7 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
# Makefile.L3
set(USE_TRMM false)

if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen")
if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex")
set(USE_TRMM true)
endif ()



+ 4
- 0
kernel/Makefile.L3 View File

@@ -32,6 +32,10 @@ ifeq ($(CORE), HASWELL)
USE_TRMM = 1
endif

ifeq ($(CORE), SKYLAKEX)
USE_TRMM = 1
endif

ifeq ($(CORE), ZEN)
USE_TRMM = 1
endif


+ 16
- 0
kernel/setparam-ref.c View File

@@ -871,6 +871,22 @@ static void init_parameter(void) {
#endif
#endif

#ifdef SKYLAKEX

#ifdef DEBUG
fprintf(stderr, "SkylakeX\n");
#endif

TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef OPTERON



+ 1
- 1
kernel/x86/trsm_kernel_LN_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif


+ 1
- 1
kernel/x86/trsm_kernel_LN_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif


+ 1
- 1
kernel/x86/trsm_kernel_LT_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif


+ 1
- 1
kernel/x86/trsm_kernel_LT_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif


+ 1
- 1
kernel/x86/trsm_kernel_RT_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif


+ 1
- 1
kernel/x86/trsm_kernel_RT_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4)
#endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LN_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LT_1x2_penryn.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LT_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif


+ 1
- 1
kernel/x86/ztrsm_kernel_RT_1x2_penryn.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif


+ 1
- 1
kernel/x86/ztrsm_kernel_RT_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1
#define PREFETCHSIZE 84
#endif


+ 19
- 0
kernel/x86_64/KERNEL.SKYLAKEX View File

@@ -0,0 +1,19 @@
include $(KERNELDIR)/KERNEL.HASWELL

SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S


DTRMMKERNEL = ../generic/trmmkernel_16x2.c
DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
DGEMMITCOPY = ../generic/gemm_tcopy_16.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)


SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c

+ 1
- 1
kernel/x86_64/caxpy.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "caxpy_microk_steamroller-2.c"
#elif defined(BULLDOZER)
#include "caxpy_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX)
#include "caxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "caxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/cdot.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "cdot_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "cdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/cgemv_n_4.c View File

@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include "common.h"

#if defined(HASWELL) || defined(ZEN)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cgemv_n_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_n_microk_bulldozer-4.c"


+ 1
- 1
kernel/x86_64/cgemv_t_4.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "common.h"

#if defined(HASWELL) || defined(ZEN)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cgemv_t_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_t_microk_bulldozer-4.c"


+ 1
- 1
kernel/x86_64/cscal.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"


#if defined(HASWELL) || defined(ZEN)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "cscal_microk_bulldozer-2.c"


+ 1
- 1
kernel/x86_64/daxpy.c View File

@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "daxpy_microk_steamroller-2.c"
#elif defined(PILEDRIVER)
#include "daxpy_microk_piledriver-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "daxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "daxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/ddot.c View File

@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ddot_microk_piledriver-2.c"
#elif defined(NEHALEM)
#include "ddot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "ddot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ddot_microk_sandy-2.c"


+ 5138
- 0
kernel/x86_64/dgemm_kernel_16x2_skylakex.S
File diff suppressed because it is too large
View File


+ 1
- 1
kernel/x86_64/dgemv_n_4.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(NEHALEM)
#include "dgemv_n_microk_nehalem-4.c"
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
#include "dgemv_n_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/dgemv_t_4.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "common.h"

#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
#include "dgemv_t_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/dscal.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dscal_microk_bulldozer-2.c"
#elif defined(SANDYBRIDGE)
#include "dscal_microk_sandy-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "dscal_microk_haswell-2.c"
#endif



+ 1
- 1
kernel/x86_64/dsymv_L.c View File

@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_L_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "dsymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_L_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/dsymv_U.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_U_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "dsymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "dsymv_U_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/saxpy.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(NEHALEM)
#include "saxpy_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "saxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "saxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/sdot.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sdot_microk_steamroller-2.c"
#elif defined(NEHALEM)
#include "sdot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "sdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "sdot_microk_sandy-2.c"


+ 6811
- 0
kernel/x86_64/sgemm_kernel_16x4_skylakex.S
File diff suppressed because it is too large
View File


+ 1
- 1
kernel/x86_64/sgemv_n_4.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_n_microk_nehalem-4.c"
#elif defined(SANDYBRIDGE)
#include "sgemv_n_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "sgemv_n_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/sgemv_t_4.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_t_microk_bulldozer-4.c"
#elif defined(SANDYBRIDGE)
#include "sgemv_t_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "sgemv_t_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/ssymv_L.c View File

@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_L_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_L_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "ssymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_L_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/ssymv_U.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_U_microk_bulldozer-2.c"
#elif defined(NEHALEM)
#include "ssymv_U_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "ssymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "ssymv_U_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/symv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zaxpy.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zaxpy_microk_bulldozer-2.c"
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zaxpy_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zaxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zaxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/zdot.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "zdot_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE)
#include "zdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/zgemv_n_4.c View File

@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"


#if defined(HASWELL) || defined(ZEN)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zgemv_n_microk_haswell-4.c"
#elif defined(SANDYBRIDGE)
#include "zgemv_n_microk_sandy-4.c"


+ 1
- 1
kernel/x86_64/zgemv_t_4.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zgemv_t_microk_bulldozer-4.c"
#elif defined(HASWELL) || defined(ZEN)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zgemv_t_microk_haswell-4.c"
#endif



+ 1
- 1
kernel/x86_64/zscal.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"


#if defined(HASWELL) || defined(ZEN)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "zscal_microk_bulldozer-2.c"


+ 1
- 1
kernel/x86_64/zsymv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24)
#endif

#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24)


+ 119
- 0
param.h View File

@@ -1613,6 +1613,125 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif


#endif

#ifdef SKYLAKEX

#define SNUMOPT 16
#define DNUMOPT 8

#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL

#define SYMV_P 8

#define SWITCH_RATIO 4

#ifdef ARCH_X86

#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1

#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1

#else

#define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 4
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1

#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 8
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1

#define SGEMM_DEFAULT_UNROLL_MN 32
#define DGEMM_DEFAULT_UNROLL_MN 32
#endif

#ifdef ARCH_X86

#define SGEMM_DEFAULT_P 512
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_R 1024
#define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define SGEMM_DEFAULT_Q 256
#define DGEMM_DEFAULT_Q 256
#define QGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 256
#define ZGEMM_DEFAULT_Q 192
#define XGEMM_DEFAULT_Q 128

#else

#define SGEMM_DEFAULT_P 768
#define DGEMM_DEFAULT_P 512
#define CGEMM_DEFAULT_P 384
#define ZGEMM_DEFAULT_P 256

#ifdef WINDOWS_ABI
#define SGEMM_DEFAULT_Q 320
#define DGEMM_DEFAULT_Q 128
#else
#define SGEMM_DEFAULT_Q 384
#define DGEMM_DEFAULT_Q 256
#endif
#define CGEMM_DEFAULT_Q 192
#define ZGEMM_DEFAULT_Q 128

#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R 13824
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r

#define QGEMM_DEFAULT_Q 128
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define XGEMM_DEFAULT_Q 128

#define CGEMM3M_DEFAULT_UNROLL_N 8
#define CGEMM3M_DEFAULT_UNROLL_M 4
#define ZGEMM3M_DEFAULT_UNROLL_N 8
#define ZGEMM3M_DEFAULT_UNROLL_M 2

#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288

#endif


#endif




Loading…
Cancel
Save