Browse Source

Enable COOPERLAKE build target

Enable new build target platform -- COOPERLAKE. This target platform
supports all the SKYLAKEX supported ISAs + avx512bf16. So all the
SKYLAKEX specific kernels/drivers and related code are now extended
to be also active on COOPERLAKE. Besides, new BF16 related kernels
are active under this target.
tags/v0.3.11^2
Chen, Guobing 5 years ago
parent
commit
e740c4873d
62 changed files with 309 additions and 76 deletions
  1. +7
    -1
      Makefile.system
  2. +19
    -0
      Makefile.x86_64
  3. +1
    -0
      TargetList.txt
  4. +2
    -2
      cmake/arch.cmake
  5. +8
    -0
      cmake/cc.cmake
  6. +4
    -1
      cmake/system.cmake
  7. +9
    -6
      cpuid.h
  8. +32
    -5
      cpuid_x86.c
  9. +1
    -1
      driver/level3/level3.c
  10. +1
    -1
      driver/level3/level3_thread.c
  11. +4
    -4
      driver/level3/trmm_L.c
  12. +6
    -6
      driver/level3/trmm_R.c
  13. +7
    -4
      driver/others/parameter.c
  14. +30
    -0
      getarch.c
  15. +1
    -1
      kernel/CMakeLists.txt
  16. +11
    -1
      kernel/Makefile
  17. +4
    -0
      kernel/Makefile.L3
  18. +1
    -1
      kernel/setparam-ref.c
  19. +1
    -1
      kernel/x86/trsm_kernel_LN_2x4_penryn.S
  20. +1
    -1
      kernel/x86/trsm_kernel_LN_4x4_penryn.S
  21. +1
    -1
      kernel/x86/trsm_kernel_LT_2x4_penryn.S
  22. +1
    -1
      kernel/x86/trsm_kernel_LT_4x4_penryn.S
  23. +1
    -1
      kernel/x86/trsm_kernel_RT_2x4_penryn.S
  24. +1
    -1
      kernel/x86/trsm_kernel_RT_4x4_penryn.S
  25. +1
    -1
      kernel/x86/ztrsm_kernel_LN_2x2_penryn.S
  26. +1
    -1
      kernel/x86/ztrsm_kernel_LT_1x2_penryn.S
  27. +1
    -1
      kernel/x86/ztrsm_kernel_LT_2x2_penryn.S
  28. +1
    -1
      kernel/x86/ztrsm_kernel_RT_1x2_penryn.S
  29. +1
    -1
      kernel/x86/ztrsm_kernel_RT_2x2_penryn.S
  30. +1
    -0
      kernel/x86_64/KERNEL.COOPERLAKE
  31. +1
    -1
      kernel/x86_64/caxpy.c
  32. +1
    -1
      kernel/x86_64/cdot.c
  33. +1
    -1
      kernel/x86_64/cgemv_n_4.c
  34. +1
    -1
      kernel/x86_64/cgemv_t_4.c
  35. +1
    -1
      kernel/x86_64/cscal.c
  36. +1
    -1
      kernel/x86_64/daxpy.c
  37. +1
    -1
      kernel/x86_64/ddot.c
  38. +1
    -1
      kernel/x86_64/dgemv_n_4.c
  39. +1
    -1
      kernel/x86_64/dgemv_t_4.c
  40. +1
    -1
      kernel/x86_64/dscal.c
  41. +1
    -1
      kernel/x86_64/dsymv_L.c
  42. +1
    -1
      kernel/x86_64/dsymv_U.c
  43. +1
    -1
      kernel/x86_64/saxpy.c
  44. +1
    -1
      kernel/x86_64/sdot.c
  45. +1
    -1
      kernel/x86_64/sgemv_n_4.c
  46. +1
    -1
      kernel/x86_64/sgemv_t_4.c
  47. +1
    -1
      kernel/x86_64/ssymv_L.c
  48. +1
    -1
      kernel/x86_64/ssymv_U.c
  49. +1
    -1
      kernel/x86_64/symv_L_sse.S
  50. +1
    -1
      kernel/x86_64/symv_L_sse2.S
  51. +1
    -1
      kernel/x86_64/symv_U_sse.S
  52. +1
    -1
      kernel/x86_64/symv_U_sse2.S
  53. +1
    -1
      kernel/x86_64/zaxpy.c
  54. +1
    -1
      kernel/x86_64/zdot.c
  55. +1
    -1
      kernel/x86_64/zgemv_n_4.c
  56. +1
    -1
      kernel/x86_64/zgemv_t_4.c
  57. +1
    -1
      kernel/x86_64/zscal.c
  58. +1
    -1
      kernel/x86_64/zsymv_L_sse.S
  59. +1
    -1
      kernel/x86_64/zsymv_L_sse2.S
  60. +1
    -1
      kernel/x86_64/zsymv_U_sse.S
  61. +1
    -1
      kernel/x86_64/zsymv_U_sse2.S
  62. +118
    -0
      param.h

+ 7
- 1
Makefile.system View File

@@ -88,6 +88,9 @@ endif
ifeq ($(TARGET), SKYLAKEX) ifeq ($(TARGET), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
ifeq ($(TARGET), COOPERLAKE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SANDYBRIDGE) ifeq ($(TARGET), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
@@ -130,6 +133,9 @@ endif
ifeq ($(TARGET_CORE), SKYLAKEX) ifeq ($(TARGET_CORE), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
ifeq ($(TARGET_CORE), COOPERLAKE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SANDYBRIDGE) ifeq ($(TARGET_CORE), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
@@ -553,7 +559,7 @@ DYNAMIC_CORE += HASWELL ZEN
endif endif
ifneq ($(NO_AVX512), 1) ifneq ($(NO_AVX512), 1)
ifneq ($(NO_AVX2), 1) ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += SKYLAKEX
DYNAMIC_CORE += SKYLAKEX COOPERLAKE
endif endif
endif endif
endif endif


+ 19
- 0
Makefile.x86_64 View File

@@ -27,6 +27,25 @@ endif
endif endif
endif endif


ifeq ($(CORE), COOPERLAKE)
ifndef DYNAMIC_ARCH
ifndef NO_AVX512
CCOMMON_OPT += -march=cooperlake
FCOMMON_OPT += -march=cooperlake
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
endif

ifeq ($(CORE), HASWELL) ifeq ($(CORE), HASWELL)
ifndef DYNAMIC_ARCH ifndef DYNAMIC_ARCH
ifndef NO_AVX2 ifndef NO_AVX2


+ 1
- 0
TargetList.txt View File

@@ -22,6 +22,7 @@ SANDYBRIDGE
HASWELL HASWELL
SKYLAKEX SKYLAKEX
ATOM ATOM
COOPERLAKE


b)AMD CPU: b)AMD CPU:
ATHLON ATHLON


+ 2
- 2
cmake/arch.cmake View File

@@ -76,9 +76,9 @@ if (DYNAMIC_ARCH)
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN) set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
endif () endif ()
if (NOT NO_AVX512) if (NOT NO_AVX512)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE)
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
endif ()
endif ()
if (DYNAMIC_LIST) if (DYNAMIC_LIST)
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST}) set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
endif () endif ()


+ 8
- 0
cmake/cc.cmake View File

@@ -103,3 +103,11 @@ if (${CORE} STREQUAL "SKYLAKEX")
endif () endif ()
endif () endif ()
endif () endif ()

if (${CORE} STREQUAL "COOPERLAKE")
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
endif ()
endif ()
endif ()

+ 4
- 1
cmake/system.cmake View File

@@ -33,7 +33,7 @@ endif ()
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.") message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1) set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE")
set(TARGET "NEHALEM") set(TARGET "NEHALEM")
endif () endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
@@ -45,6 +45,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
endif () endif ()


if (DEFINED TARGET) if (DEFINED TARGET)
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
endif()
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512) if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif() endif()


+ 9
- 6
cpuid.h View File

@@ -118,6 +118,7 @@
#define CORE_ZEN 27 #define CORE_ZEN 27
#define CORE_SKYLAKEX 28 #define CORE_SKYLAKEX 28
#define CORE_DHYANA 29 #define CORE_DHYANA 29
#define CORE_COOPERLAKE 30


#define HAVE_SSE (1 << 0) #define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1) #define HAVE_SSE2 (1 << 1)
@@ -137,11 +138,12 @@
#define HAVE_MISALIGNSSE (1 << 15) #define HAVE_MISALIGNSSE (1 << 15)
#define HAVE_128BITFPU (1 << 16) #define HAVE_128BITFPU (1 << 16)
#define HAVE_FASTMOVU (1 << 17) #define HAVE_FASTMOVU (1 << 17)
#define HAVE_AVX (1 << 18)
#define HAVE_FMA4 (1 << 19)
#define HAVE_FMA3 (1 << 20)
#define HAVE_AVX512VL (1 << 21)
#define HAVE_AVX2 (1 << 22)
#define HAVE_AVX (1 << 18)
#define HAVE_FMA4 (1 << 19)
#define HAVE_FMA3 (1 << 20)
#define HAVE_AVX512VL (1 << 21)
#define HAVE_AVX2 (1 << 22)
#define HAVE_AVX512BF16 (1 << 23)


#define CACHE_INFO_L1_I 1 #define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2 #define CACHE_INFO_L1_D 2
@@ -218,7 +220,8 @@ typedef struct {
#define CPUTYPE_ZEN 51 #define CPUTYPE_ZEN 51
#define CPUTYPE_SKYLAKEX 52 #define CPUTYPE_SKYLAKEX 52
#define CPUTYPE_DHYANA 53 #define CPUTYPE_DHYANA 53
#define CPUTYPE_COOPERLAKE 54


#define CPUTYPE_HYGON_UNKNOWN 54
#define CPUTYPE_HYGON_UNKNOWN 99


#endif #endif

+ 32
- 5
cpuid_x86.c View File

@@ -249,6 +249,22 @@ int support_avx512(){
#endif #endif
} }


int support_avx512_bf16(){
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;

if (!support_avx512())
return 0;
cpuid_count(7, 1, &eax, &ebx, &ecx, &edx);
if((eax & 32) == 32){
ret=1; // CPUID.7.1:EAX[bit 5] indicates whether avx512_bf16 supported or not
}
return ret;
#else
return 0;
#endif
}


int get_vendor(void){ int get_vendor(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
@@ -335,6 +351,7 @@ int get_cputype(int gettype){
if (support_avx()) feature |= HAVE_AVX; if (support_avx()) feature |= HAVE_AVX;
if (support_avx2()) feature |= HAVE_AVX2; if (support_avx2()) feature |= HAVE_AVX2;
if (support_avx512()) feature |= HAVE_AVX512VL; if (support_avx512()) feature |= HAVE_AVX512VL;
if (support_avx512_bf16()) feature |= HAVE_AVX512BF16;
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
#endif #endif


@@ -1337,6 +1354,8 @@ int get_cpuname(void){
return CPUTYPE_NEHALEM; return CPUTYPE_NEHALEM;
case 5: case 5:
// Skylake X // Skylake X
if(support_avx512_bf16())
return CPUTYPE_COOPERLAKE;
if(support_avx512()) if(support_avx512())
return CPUTYPE_SKYLAKEX; return CPUTYPE_SKYLAKEX;
if(support_avx2()) if(support_avx2())
@@ -1677,7 +1696,8 @@ static char *cpuname[] = {
"EXCAVATOR", "EXCAVATOR",
"ZEN", "ZEN",
"SKYLAKEX", "SKYLAKEX",
"DHYANA"
"DHYANA",
"COOPERLAKE"
}; };


static char *lowercpuname[] = { static char *lowercpuname[] = {
@@ -1733,7 +1753,8 @@ static char *lowercpuname[] = {
"excavator", "excavator",
"zen", "zen",
"skylakex", "skylakex",
"dhyana"
"dhyana",
"cooperlake"
}; };


static char *corename[] = { static char *corename[] = {
@@ -1766,7 +1787,8 @@ static char *corename[] = {
"EXCAVATOR", "EXCAVATOR",
"ZEN", "ZEN",
"SKYLAKEX", "SKYLAKEX",
"DHYANA"
"DHYANA",
"COOPERLAKE"
}; };


static char *corename_lower[] = { static char *corename_lower[] = {
@@ -1799,7 +1821,8 @@ static char *corename_lower[] = {
"excavator", "excavator",
"zen", "zen",
"skylakex", "skylakex",
"dhyana"
"dhyana",
"cooperlake"
}; };




@@ -2007,7 +2030,9 @@ int get_coretype(void){
case 5: case 5:
// Skylake X // Skylake X
#ifndef NO_AVX512 #ifndef NO_AVX512
return CORE_SKYLAKEX;
if(support_avx512_bf16())
return CORE_COOPERLAKE;
return CORE_SKYLAKEX;
#else #else
if(support_avx()) if(support_avx())
#ifndef NO_AVX2 #ifndef NO_AVX2
@@ -2276,6 +2301,7 @@ void get_cpuconfig(void){
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n"); if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n"); if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n"); if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n");
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n"); if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n"); if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n"); if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
@@ -2346,6 +2372,7 @@ void get_sse(void){
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n"); if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n"); if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n"); if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n");
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n"); if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n"); if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n"); if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");


+ 1
- 1
driver/level3/level3.c View File

@@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#else #else
for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else


+ 1
- 1
driver/level3/level3_thread.c View File

@@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Split local region of B into parts */ /* Split local region of B into parts */
for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){ for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){
min_jj = MIN(n_to, js + div_n) - jjs; min_jj = MIN(n_to, js + div_n) - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else


+ 4
- 4
driver/level3/trmm_L.c View File

@@ -135,7 +135,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -205,7 +205,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -300,7 +300,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -370,7 +370,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else


+ 6
- 6
driver/level3/trmm_R.c View File

@@ -122,7 +122,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = 0; jjs < ls - js; jjs += min_jj){ for(jjs = 0; jjs < ls - js; jjs += min_jj){
min_jj = ls - js - jjs; min_jj = ls - js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -146,7 +146,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = 0; jjs < min_l; jjs += min_jj){ for(jjs = 0; jjs < min_l; jjs += min_jj){
min_jj = min_l - jjs; min_jj = min_l - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -203,7 +203,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -258,7 +258,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = 0; jjs < min_l; jjs += min_jj){ for(jjs = 0; jjs < min_l; jjs += min_jj){
min_jj = min_l - jjs; min_jj = min_l - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -283,7 +283,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){ for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){
min_jj = js - ls - min_l - jjs; min_jj = js - ls - min_l - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else
@@ -344,7 +344,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO


for(jjs = js; jjs < js + min_j; jjs += min_jj){ for(jjs = js; jjs < js + min_j; jjs += min_jj){
min_jj = min_j + js - jjs; min_jj = min_j + js - jjs;
#ifdef SKYLAKEX
#if defined(SKYLAKEX) || defined(COOPERLAKE)
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
#else #else


+ 7
- 4
driver/others/parameter.c View File

@@ -180,9 +180,10 @@ int get_L2_size(void){
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;


#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || \
defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE)


cpuid(0x80000006, &eax, &ebx, &ecx, &edx); cpuid(0x80000006, &eax, &ebx, &ecx, &edx);


@@ -266,7 +267,9 @@ int get_L2_size(void){
void blas_set_parameter(void){ void blas_set_parameter(void){


int factor; int factor;
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || \
defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || \
defined(SKYLAKEX) || defined(COOPERLAKE)
int size = 16; int size = 16;
#else #else
int size = get_L2_size(); int size = get_L2_size();


+ 30
- 0
getarch.c View File

@@ -365,6 +365,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#ifdef FORCE_COOPERLAKE
#ifdef NO_AVX512
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "HASWELL"
#define ARCHCONFIG "-DHASWELL " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3"
#define LIBNAME "haswell"
#define CORENAME "HASWELL"
#else
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "COOPERLAKE"
#define ARCHCONFIG "-DCOOPERLAKE " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake"
#define LIBNAME "cooperlake"
#define CORENAME "COOPERLAKE"
#endif
#endif

#ifdef FORCE_ATOM #ifdef FORCE_ATOM
#define FORCE #define FORCE
#define FORCE_INTEL #define FORCE_INTEL


+ 1
- 1
kernel/CMakeLists.txt View File

@@ -127,7 +127,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)


# Makefile.L3 # Makefile.L3
set(USE_TRMM false) set(USE_TRMM false)
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) )
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE))
set(USE_TRMM true) set(USE_TRMM true)
endif () endif ()
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9) OR (TARGET_CORE MATCHES POWER10)) if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9) OR (TARGET_CORE MATCHES POWER10))


+ 11
- 1
kernel/Makefile View File

@@ -37,7 +37,17 @@ ifdef NO_AVX2
endif endif


ifdef TARGET_CORE ifdef TARGET_CORE
ifeq ($(TARGET_CORE), SKYLAKEX)
ifeq ($(TARGET_CORE), COOPERLAKE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=cooperlake
ifeq ($(OSNAME), CYGWIN_NT)
override CFLAGS += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
override CFLAGS += -fno-asynchronous-unwind-tables
endif
endif
else ifeq ($(TARGET_CORE), SKYLAKEX)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512 override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512
ifeq ($(OSNAME), CYGWIN_NT) ifeq ($(OSNAME), CYGWIN_NT)
override CFLAGS += -fno-asynchronous-unwind-tables override CFLAGS += -fno-asynchronous-unwind-tables


+ 4
- 0
kernel/Makefile.L3 View File

@@ -39,6 +39,10 @@ ifeq ($(CORE), SKYLAKEX)
USE_TRMM = 1 USE_TRMM = 1
endif endif


ifeq ($(CORE), COOPERLAKE)
USE_TRMM = 1
endif

ifeq ($(CORE), ZEN) ifeq ($(CORE), ZEN)
USE_TRMM = 1 USE_TRMM = 1
endif endif


+ 1
- 1
kernel/setparam-ref.c View File

@@ -1166,7 +1166,7 @@ static void init_parameter(void) {
#endif #endif
#endif #endif


#ifdef SKYLAKEX
#if defined (SKYLAKEX) || defined (COOPERLAKE)


#ifdef DEBUG #ifdef DEBUG
fprintf(stderr, "SkylakeX\n"); fprintf(stderr, "SkylakeX\n");


+ 1
- 1
kernel/x86/trsm_kernel_LN_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_LN_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_LT_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_LT_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_RT_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_RT_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LN_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LT_1x2_penryn.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LT_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_RT_1x2_penryn.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_RT_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 0
kernel/x86_64/KERNEL.COOPERLAKE View File

@@ -0,0 +1 @@
include $(KERNELDIR)/KERNEL.SKYLAKEX

+ 1
- 1
kernel/x86_64/caxpy.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "caxpy_microk_steamroller-2.c" #include "caxpy_microk_steamroller-2.c"
#elif defined(BULLDOZER) #elif defined(BULLDOZER)
#include "caxpy_microk_bulldozer-2.c" #include "caxpy_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE)
#include "caxpy_microk_haswell-2.c" #include "caxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "caxpy_microk_sandy-2.c" #include "caxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/cdot.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cdot_microk_bulldozer-2.c" #include "cdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "cdot_microk_steamroller-2.c" #include "cdot_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "cdot_microk_haswell-2.c" #include "cdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "cdot_microk_sandy-2.c" #include "cdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/cgemv_n_4.c View File

@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h> #include <stdio.h>
#include "common.h" #include "common.h"


#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "cgemv_n_microk_haswell-4.c" #include "cgemv_n_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_n_microk_bulldozer-4.c" #include "cgemv_n_microk_bulldozer-4.c"


+ 1
- 1
kernel/x86_64/cgemv_t_4.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "cgemv_t_microk_haswell-4.c" #include "cgemv_t_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_t_microk_bulldozer-4.c" #include "cgemv_t_microk_bulldozer-4.c"


+ 1
- 1
kernel/x86_64/cscal.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "cscal_microk_haswell-2.c" #include "cscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) #elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "cscal_microk_bulldozer-2.c" #include "cscal_microk_bulldozer-2.c"


+ 1
- 1
kernel/x86_64/daxpy.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "daxpy_microk_piledriver-2.c" #include "daxpy_microk_piledriver-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN)
#include "daxpy_microk_haswell-2.c" #include "daxpy_microk_haswell-2.c"
#elif defined (SKYLAKEX)
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "daxpy_microk_skylakex-2.c" #include "daxpy_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "daxpy_microk_sandy-2.c" #include "daxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/ddot.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ddot_microk_nehalem-2.c" #include "ddot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN)
#include "ddot_microk_haswell-2.c" #include "ddot_microk_haswell-2.c"
#elif defined (SKYLAKEX)
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "ddot_microk_skylakex-2.c" #include "ddot_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "ddot_microk_sandy-2.c" #include "ddot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/dgemv_n_4.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dgemv_n_microk_nehalem-4.c" #include "dgemv_n_microk_nehalem-4.c"
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dgemv_n_microk_haswell-4.c" #include "dgemv_n_microk_haswell-4.c"
#elif defined (SKYLAKEX)
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "dgemv_n_microk_skylakex-4.c" #include "dgemv_n_microk_skylakex-4.c"
#endif #endif




+ 1
- 1
kernel/x86_64/dgemv_t_4.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "dgemv_t_microk_haswell-4.c" #include "dgemv_t_microk_haswell-4.c"
#endif #endif




+ 1
- 1
kernel/x86_64/dscal.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dscal_microk_sandy-2.c" #include "dscal_microk_sandy-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN)
#include "dscal_microk_haswell-2.c" #include "dscal_microk_haswell-2.c"
#elif defined (SKYLAKEX)
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "dscal_microk_skylakex-2.c" #include "dscal_microk_skylakex-2.c"
#endif #endif




+ 1
- 1
kernel/x86_64/dsymv_L.c View File

@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dsymv_L_microk_bulldozer-2.c" #include "dsymv_L_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN)
#include "dsymv_L_microk_haswell-2.c" #include "dsymv_L_microk_haswell-2.c"
#elif defined (SKYLAKEX)
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "dsymv_L_microk_skylakex-2.c" #include "dsymv_L_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "dsymv_L_microk_sandy-2.c" #include "dsymv_L_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/dsymv_U.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_U_microk_bulldozer-2.c" #include "dsymv_U_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "dsymv_U_microk_haswell-2.c" #include "dsymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "dsymv_U_microk_sandy-2.c" #include "dsymv_U_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/saxpy.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "saxpy_microk_nehalem-2.c" #include "saxpy_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN)
#include "saxpy_microk_haswell-2.c" #include "saxpy_microk_haswell-2.c"
#elif defined (SKYLAKEX)
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "saxpy_microk_skylakex-2.c" #include "saxpy_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "saxpy_microk_sandy-2.c" #include "saxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/sdot.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sdot_microk_nehalem-2.c" #include "sdot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN)
#include "sdot_microk_haswell-2.c" #include "sdot_microk_haswell-2.c"
#elif defined (SKYLAKEX)
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
#include "sdot_microk_skylakex-2.c" #include "sdot_microk_skylakex-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "sdot_microk_sandy-2.c" #include "sdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/sgemv_n_4.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_n_microk_nehalem-4.c" #include "sgemv_n_microk_nehalem-4.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "sgemv_n_microk_sandy-4.c" #include "sgemv_n_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "sgemv_n_microk_haswell-4.c" #include "sgemv_n_microk_haswell-4.c"
#endif #endif




+ 1
- 1
kernel/x86_64/sgemv_t_4.c View File

@@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_t_microk_bulldozer-4.c" #include "sgemv_t_microk_bulldozer-4.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "sgemv_t_microk_sandy-4.c" #include "sgemv_t_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "sgemv_t_microk_haswell-4.c" #include "sgemv_t_microk_haswell-4.c"
#endif #endif




+ 1
- 1
kernel/x86_64/ssymv_L.c View File

@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_L_microk_bulldozer-2.c" #include "ssymv_L_microk_bulldozer-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "ssymv_L_microk_nehalem-2.c" #include "ssymv_L_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "ssymv_L_microk_haswell-2.c" #include "ssymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "ssymv_L_microk_sandy-2.c" #include "ssymv_L_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/ssymv_U.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_U_microk_bulldozer-2.c" #include "ssymv_U_microk_bulldozer-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "ssymv_U_microk_nehalem-2.c" #include "ssymv_U_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "ssymv_U_microk_haswell-2.c" #include "ssymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "ssymv_U_microk_sandy-2.c" #include "ssymv_U_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/symv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zaxpy.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zaxpy_microk_bulldozer-2.c" #include "zaxpy_microk_bulldozer-2.c"
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zaxpy_microk_steamroller-2.c" #include "zaxpy_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "zaxpy_microk_haswell-2.c" #include "zaxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "zaxpy_microk_sandy-2.c" #include "zaxpy_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/zdot.c View File

@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zdot_microk_bulldozer-2.c" #include "zdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "zdot_microk_steamroller-2.c" #include "zdot_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "zdot_microk_haswell-2.c" #include "zdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "zdot_microk_sandy-2.c" #include "zdot_microk_sandy-2.c"


+ 1
- 1
kernel/x86_64/zgemv_n_4.c View File

@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "zgemv_n_microk_haswell-4.c" #include "zgemv_n_microk_haswell-4.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "zgemv_n_microk_sandy-4.c" #include "zgemv_n_microk_sandy-4.c"


+ 1
- 1
kernel/x86_64/zgemv_t_4.c View File

@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zgemv_t_microk_bulldozer-4.c" #include "zgemv_t_microk_bulldozer-4.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "zgemv_t_microk_haswell-4.c" #include "zgemv_t_microk_haswell-4.c"
#endif #endif




+ 1
- 1
kernel/x86_64/zscal.c View File

@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#include "zscal_microk_haswell-2.c" #include "zscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) #elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "zscal_microk_bulldozer-2.c" #include "zscal_microk_bulldozer-2.c"


+ 1
- 1
kernel/x86_64/zsymv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 118
- 0
param.h View File

@@ -1748,6 +1748,124 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#ifdef COOPERLAKE

#define SNUMOPT 16
#define DNUMOPT 8

#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL

#define SYMV_P 8

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 8
#else
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
#endif
#define USE_SGEMM_KERNEL_DIRECT 1

#ifdef ARCH_X86

#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1

#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1

#else

#define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 16
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1

#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 2
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1

#define SGEMM_DEFAULT_UNROLL_MN 32
#define DGEMM_DEFAULT_UNROLL_MN 32
#endif

#ifdef ARCH_X86

#define SGEMM_DEFAULT_P 512
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_R 1024
#define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define SGEMM_DEFAULT_Q 256
#define DGEMM_DEFAULT_Q 256
#define QGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 256
#define ZGEMM_DEFAULT_Q 192
#define XGEMM_DEFAULT_Q 128

#else

#define SGEMM_DEFAULT_P 640
#define DGEMM_DEFAULT_P 192
#define CGEMM_DEFAULT_P 384
#define ZGEMM_DEFAULT_P 256

#define SGEMM_DEFAULT_Q 320
#define DGEMM_DEFAULT_Q 384
#define CGEMM_DEFAULT_Q 192
#define ZGEMM_DEFAULT_Q 128

#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R 8640
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r

#define QGEMM_DEFAULT_Q 128
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define XGEMM_DEFAULT_Q 128

#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 4
#define ZGEMM3M_DEFAULT_UNROLL_M 4

#define CGEMM3M_DEFAULT_P 320
#define ZGEMM3M_DEFAULT_P 256
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 320
#define ZGEMM3M_DEFAULT_Q 256
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288

#endif
#endif




#ifdef ATOM #ifdef ATOM


Loading…
Cancel
Save