@@ -121,6 +121,9 @@ endif | |||||
ifeq ($(TARGET), COOPERLAKE) | ifeq ($(TARGET), COOPERLAKE) | ||||
GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
endif | endif | ||||
ifeq ($(TARGET), SAPPHIRERAPIDS) | |||||
GETARCH_FLAGS := -DFORCE_NEHALEM | |||||
endif | |||||
ifeq ($(TARGET), SANDYBRIDGE) | ifeq ($(TARGET), SANDYBRIDGE) | ||||
GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
endif | endif | ||||
@@ -166,6 +169,9 @@ endif | |||||
ifeq ($(TARGET_CORE), COOPERLAKE) | ifeq ($(TARGET_CORE), COOPERLAKE) | ||||
GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
endif | endif | ||||
ifeq ($(TARGET_CORE), SAPPHIRERAPIDS) | |||||
GETARCH_FLAGS := -DFORCE_NEHALEM | |||||
endif | |||||
ifeq ($(TARGET_CORE), SANDYBRIDGE) | ifeq ($(TARGET_CORE), SANDYBRIDGE) | ||||
GETARCH_FLAGS := -DFORCE_NEHALEM | GETARCH_FLAGS := -DFORCE_NEHALEM | ||||
endif | endif | ||||
@@ -96,6 +96,30 @@ endif | |||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(CORE), SAPPHIRERAPIDS) | |||||
ifndef NO_AVX512 | |||||
ifeq ($(C_COMPILER), GCC) | |||||
# sapphire rapids support was added in 11 | |||||
ifeq ($(GCCVERSIONGTEQ11), 1) | |||||
CCOMMON_OPT += -march=sapphirerapids | |||||
ifneq ($(F_COMPILER), NAG) | |||||
FCOMMON_OPT += -march=sapphirerapids | |||||
endif | |||||
endif | |||||
endif | |||||
ifeq ($(OSNAME), CYGWIN_NT) | |||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||||
endif | |||||
ifeq ($(OSNAME), WINNT) | |||||
ifeq ($(C_COMPILER), GCC) | |||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||||
endif | |||||
endif | |||||
endif | |||||
endif | |||||
ifdef HAVE_AVX2 | ifdef HAVE_AVX2 | ||||
ifndef NO_AVX2 | ifndef NO_AVX2 | ||||
ifeq ($(C_COMPILER), GCC) | ifeq ($(C_COMPILER), GCC) | ||||
@@ -23,6 +23,7 @@ HASWELL | |||||
SKYLAKEX | SKYLAKEX | ||||
ATOM | ATOM | ||||
COOPERLAKE | COOPERLAKE | ||||
SAPPHIRERAPIDS | |||||
b)AMD CPU: | b)AMD CPU: | ||||
ATHLON | ATHLON | ||||
@@ -126,6 +126,19 @@ if (${CORE} STREQUAL COOPERLAKE) | |||||
endif () | endif () | ||||
endif () | endif () | ||||
if (${CORE} STREQUAL SAPPHIRERAPIDS) | |||||
if (NOT DYNAMIC_ARCH) | |||||
if (NOT NO_AVX512) | |||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
if (${GCC_VERSION} VERSION_GREATER 11.0 OR ${GCC_VERSION} VERSION_EQUAL 11.0) | |||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=sapphirerapids") | |||||
else () | |||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") | |||||
endif() | |||||
endif () | |||||
endif () | |||||
endif () | |||||
if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
if (HAVE_AVX2) | if (HAVE_AVX2) | ||||
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2") | set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2") | ||||
@@ -33,7 +33,7 @@ endif () | |||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | ||||
message(STATUS "Compiling a ${BINARY}-bit binary.") | message(STATUS "Compiling a ${BINARY}-bit binary.") | ||||
set(NO_AVX 1) | set(NO_AVX 1) | ||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE") | |||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE" OR ${TARGET} STREQUAL "SAPPHIRERAPIDS") | |||||
set(TARGET "NEHALEM") | set(TARGET "NEHALEM") | ||||
endif () | endif () | ||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") | if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") | ||||
@@ -163,6 +163,22 @@ if (DEFINED TARGET) | |||||
endif() | endif() | ||||
endif() | endif() | ||||
endif() | endif() | ||||
if (${TARGET} STREQUAL SAPPHIRERAPIDS AND NOT NO_AVX512) | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 11.0) | |||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids") | |||||
else() | |||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
endif() | |||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") | |||||
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.0) | |||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids") | |||||
else() | |||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
endif() | |||||
endif() | |||||
endif() | |||||
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) | if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) | ||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | ||||
endif() | endif() | ||||
@@ -120,6 +120,7 @@ | |||||
#define CORE_SKYLAKEX 28 | #define CORE_SKYLAKEX 28 | ||||
#define CORE_DHYANA 29 | #define CORE_DHYANA 29 | ||||
#define CORE_COOPERLAKE 30 | #define CORE_COOPERLAKE 30 | ||||
#define CORE_SAPPHIRERAPIDS 31 | |||||
#define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
#define HAVE_SSE2 (1 << 1) | #define HAVE_SSE2 (1 << 1) | ||||
@@ -145,6 +146,7 @@ | |||||
#define HAVE_AVX512VL (1 << 21) | #define HAVE_AVX512VL (1 << 21) | ||||
#define HAVE_AVX2 (1 << 22) | #define HAVE_AVX2 (1 << 22) | ||||
#define HAVE_AVX512BF16 (1 << 23) | #define HAVE_AVX512BF16 (1 << 23) | ||||
#define HAVE_AMXBF16 (1 << 24) | |||||
#define CACHE_INFO_L1_I 1 | #define CACHE_INFO_L1_I 1 | ||||
#define CACHE_INFO_L1_D 2 | #define CACHE_INFO_L1_D 2 | ||||
@@ -222,6 +224,7 @@ typedef struct { | |||||
#define CPUTYPE_SKYLAKEX 52 | #define CPUTYPE_SKYLAKEX 52 | ||||
#define CPUTYPE_DHYANA 53 | #define CPUTYPE_DHYANA 53 | ||||
#define CPUTYPE_COOPERLAKE 54 | #define CPUTYPE_COOPERLAKE 54 | ||||
#define CPUTYPE_SAPPHIRERAPIDS 55 | |||||
#define CPUTYPE_HYGON_UNKNOWN 99 | #define CPUTYPE_HYGON_UNKNOWN 99 | ||||
@@ -266,6 +266,31 @@ int support_avx512_bf16(){ | |||||
#endif | #endif | ||||
} | } | ||||
#define BIT_AMX_TILE 0x01000000 | |||||
#define BIT_AMX_BF16 0x00400000 | |||||
#define BIT_AMX_ENBD 0x00060000 | |||||
int support_amx_bf16() { | |||||
#if !defined(NO_AVX) && !defined(NO_AVX512) | |||||
int eax, ebx, ecx, edx; | |||||
int ret=0; | |||||
if (!support_avx512()) | |||||
return 0; | |||||
// CPUID.7.0:EDX indicates AMX support | |||||
cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); | |||||
if ((edx & BIT_AMX_TILE) && (edx & BIT_AMX_BF16)) { | |||||
// CPUID.D.0:EAX[17:18] indicates AMX enabled | |||||
cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); | |||||
if ((eax & BIT_AMX_ENBD) == BIT_AMX_ENBD) | |||||
ret = 1; | |||||
} | |||||
return ret; | |||||
#else | |||||
return 0; | |||||
#endif | |||||
} | |||||
int get_vendor(void){ | int get_vendor(void){ | ||||
int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
char vendor[13]; | char vendor[13]; | ||||
@@ -353,6 +378,7 @@ int get_cputype(int gettype){ | |||||
if (support_avx2()) feature |= HAVE_AVX2; | if (support_avx2()) feature |= HAVE_AVX2; | ||||
if (support_avx512()) feature |= HAVE_AVX512VL; | if (support_avx512()) feature |= HAVE_AVX512VL; | ||||
if (support_avx512_bf16()) feature |= HAVE_AVX512BF16; | if (support_avx512_bf16()) feature |= HAVE_AVX512BF16; | ||||
if (support_amx_bf16()) feature |= HAVE_AMXBF16; | |||||
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; | if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; | ||||
#endif | #endif | ||||
@@ -2389,6 +2415,7 @@ void get_cpuconfig(void){ | |||||
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n"); | if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n"); | ||||
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n"); | if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n"); | ||||
if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n"); | if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n"); | ||||
if (features & HAVE_AMXBF16 ) printf("#define HAVE_AMXBF16\n"); | |||||
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n"); | if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n"); | ||||
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n"); | if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n"); | ||||
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n"); | if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n"); | ||||
@@ -2460,6 +2487,7 @@ void get_sse(void){ | |||||
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n"); | if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n"); | ||||
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n"); | if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n"); | ||||
if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n"); | if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n"); | ||||
if (features & HAVE_AMXBF16 ) printf("HAVE_AMXBF16=1\n"); | |||||
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n"); | if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n"); | ||||
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n"); | if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n"); | ||||
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n"); | if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n"); | ||||
@@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
#else | #else | ||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
/* Split local region of B into parts */ | /* Split local region of B into parts */ | ||||
for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){ | for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){ | ||||
min_jj = MIN(n_to, js + div_n) - jjs; | min_jj = MIN(n_to, js + div_n) - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -138,7 +138,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -215,7 +215,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -320,7 +320,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -399,7 +399,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -122,7 +122,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = 0; jjs < ls - js; jjs += min_jj){ | for(jjs = 0; jjs < ls - js; jjs += min_jj){ | ||||
min_jj = ls - js - jjs; | min_jj = ls - js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -146,7 +146,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = 0; jjs < min_l; jjs += min_jj){ | for(jjs = 0; jjs < min_l; jjs += min_jj){ | ||||
min_jj = min_l - jjs; | min_jj = min_l - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -203,7 +203,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -258,7 +258,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = 0; jjs < min_l; jjs += min_jj){ | for(jjs = 0; jjs < min_l; jjs += min_jj){ | ||||
min_jj = min_l - jjs; | min_jj = min_l - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -283,7 +283,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){ | for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){ | ||||
min_jj = js - ls - min_l - jjs; | min_jj = js - ls - min_l - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -344,7 +344,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){ | for(jjs = js; jjs < js + min_j; jjs += min_jj){ | ||||
min_jj = min_j + js - jjs; | min_jj = min_j + js - jjs; | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ | ||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; | ||||
#else | #else | ||||
@@ -183,7 +183,7 @@ int get_L2_size(void){ | |||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | ||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | ||||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || \ | defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || \ | ||||
defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | ||||
@@ -269,7 +269,7 @@ void blas_set_parameter(void){ | |||||
int factor; | int factor; | ||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || \ | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || \ | ||||
defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || \ | defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || \ | ||||
defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
int size = 16; | int size = 16; | ||||
#else | #else | ||||
int size = get_L2_size(); | int size = get_L2_size(); | ||||
@@ -469,6 +469,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#ifdef FORCE_SAPPHIRERAPIDS | |||||
#ifdef NO_AVX512 | |||||
#define FORCE | |||||
#define FORCE_INTEL | |||||
#define ARCHITECTURE "X86" | |||||
#define SUBARCHITECTURE "HASWELL" | |||||
#define ARCHCONFIG "-DHASWELL " \ | |||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ | |||||
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3" | |||||
#define LIBNAME "haswell" | |||||
#define CORENAME "HASWELL" | |||||
#else | |||||
#define FORCE | |||||
#define FORCE_INTEL | |||||
#define ARCHITECTURE "X86" | |||||
#define SUBARCHITECTURE "SAPPHIRERAPIDS" | |||||
#define ARCHCONFIG "-DSAPPHIRERAPIDS " \ | |||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ | |||||
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=sapphirerapids" | |||||
#define LIBNAME "sapphirerapids" | |||||
#define CORENAME "SAPPHIRERAPIDS" | |||||
#endif | |||||
#endif | |||||
#ifdef FORCE_ATOM | #ifdef FORCE_ATOM | ||||
#define FORCE | #define FORCE | ||||
#define FORCE_INTEL | #define FORCE_INTEL | ||||
@@ -198,7 +198,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
# Makefile.L3 | # Makefile.L3 | ||||
set(USE_TRMM false) | set(USE_TRMM false) | ||||
string(TOUPPER ${TARGET_CORE} UC_TARGET_CORE) | string(TOUPPER ${TARGET_CORE} UC_TARGET_CORE) | ||||
if (ARM OR ARM64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE)) | |||||
if (ARM OR ARM64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE) OR (UC_TARGET_CORE MATCHES SAPPHIRERAPIDS)) | |||||
set(USE_TRMM true) | set(USE_TRMM true) | ||||
endif () | endif () | ||||
if (ZARCH OR (UC_TARGET_CORE MATCHES POWER8) OR (UC_TARGET_CORE MATCHES POWER9) OR (UC_TARGET_CORE MATCHES POWER10)) | if (ZARCH OR (UC_TARGET_CORE MATCHES POWER8) OR (UC_TARGET_CORE MATCHES POWER9) OR (UC_TARGET_CORE MATCHES POWER10)) | ||||
@@ -31,7 +31,22 @@ ifdef NO_AVX2 | |||||
endif | endif | ||||
ifdef TARGET_CORE | ifdef TARGET_CORE | ||||
ifeq ($(TARGET_CORE), COOPERLAKE) | |||||
ifeq ($(TARGET_CORE), SAPPHIRERAPIDS) | |||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | |||||
ifeq ($(GCCVERSIONGTEQ10), 1) | |||||
override CFLAGS += -march=sapphirerapids | |||||
else | |||||
override CFLAGS += -march=skylake-avx512 -mavx512f | |||||
endif | |||||
ifeq ($(OSNAME), CYGWIN_NT) | |||||
override CFLAGS += -fno-asynchronous-unwind-tables | |||||
endif | |||||
ifeq ($(OSNAME), WINNT) | |||||
ifeq ($(C_COMPILER), GCC) | |||||
override CFLAGS += -fno-asynchronous-unwind-tables | |||||
endif | |||||
endif | |||||
else ifeq ($(TARGET_CORE), COOPERLAKE) | |||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | ||||
ifeq ($(GCCVERSIONGTEQ10), 1) | ifeq ($(GCCVERSIONGTEQ10), 1) | ||||
override CFLAGS += -march=cooperlake | override CFLAGS += -march=cooperlake | ||||
@@ -47,6 +47,10 @@ ifeq ($(CORE), COOPERLAKE) | |||||
USE_TRMM = 1 | USE_TRMM = 1 | ||||
endif | endif | ||||
ifeq ($(CORE), SAPPHIRERAPIDS) | |||||
USE_TRMM = 1 | |||||
endif | |||||
ifeq ($(CORE), ZEN) | ifeq ($(CORE), ZEN) | ||||
USE_TRMM = 1 | USE_TRMM = 1 | ||||
endif | endif | ||||
@@ -1518,7 +1518,7 @@ static void init_parameter(void) { | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
#ifdef DEBUG | #ifdef DEBUG | ||||
fprintf(stderr, "SkylakeX\n"); | fprintf(stderr, "SkylakeX\n"); | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -61,7 +61,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -63,7 +63,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -61,7 +61,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -63,7 +63,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -61,7 +61,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -0,0 +1 @@ | |||||
include $(KERNELDIR)/KERNEL.COOPERLAKE |
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "caxpy_microk_steamroller-2.c" | #include "caxpy_microk_steamroller-2.c" | ||||
#elif defined(BULLDOZER) | #elif defined(BULLDOZER) | ||||
#include "caxpy_microk_bulldozer-2.c" | #include "caxpy_microk_bulldozer-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
#include "caxpy_microk_haswell-2.c" | #include "caxpy_microk_haswell-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "caxpy_microk_sandy-2.c" | #include "caxpy_microk_sandy-2.c" | ||||
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "cdot_microk_bulldozer-2.c" | #include "cdot_microk_bulldozer-2.c" | ||||
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | ||||
#include "cdot_microk_steamroller-2.c" | #include "cdot_microk_steamroller-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "cdot_microk_haswell-2.c" | #include "cdot_microk_haswell-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "cdot_microk_sandy-2.c" | #include "cdot_microk_sandy-2.c" | ||||
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include "common.h" | #include "common.h" | ||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "cgemv_n_microk_haswell-4.c" | #include "cgemv_n_microk_haswell-4.c" | ||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
#include "cgemv_n_microk_bulldozer-4.c" | #include "cgemv_n_microk_bulldozer-4.c" | ||||
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "cgemv_t_microk_haswell-4.c" | #include "cgemv_t_microk_haswell-4.c" | ||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
#include "cgemv_t_microk_bulldozer-4.c" | #include "cgemv_t_microk_bulldozer-4.c" | ||||
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "cscal_microk_haswell-2.c" | #include "cscal_microk_haswell-2.c" | ||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) | #elif defined(BULLDOZER) || defined(PILEDRIVER) | ||||
#include "cscal_microk_bulldozer-2.c" | #include "cscal_microk_bulldozer-2.c" | ||||
@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "daxpy_microk_piledriver-2.c" | #include "daxpy_microk_piledriver-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "daxpy_microk_haswell-2.c" | #include "daxpy_microk_haswell-2.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "daxpy_microk_skylakex-2.c" | #include "daxpy_microk_skylakex-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "daxpy_microk_sandy-2.c" | #include "daxpy_microk_sandy-2.c" | ||||
@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "ddot_microk_nehalem-2.c" | #include "ddot_microk_nehalem-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "ddot_microk_haswell-2.c" | #include "ddot_microk_haswell-2.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "ddot_microk_skylakex-2.c" | #include "ddot_microk_skylakex-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "ddot_microk_sandy-2.c" | #include "ddot_microk_sandy-2.c" | ||||
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "dgemv_n_microk_nehalem-4.c" | #include "dgemv_n_microk_nehalem-4.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
#include "dgemv_n_microk_haswell-4.c" | #include "dgemv_n_microk_haswell-4.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "dgemv_n_microk_skylakex-4.c" | #include "dgemv_n_microk_skylakex-4.c" | ||||
#endif | #endif | ||||
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "dgemv_t_microk_haswell-4.c" | #include "dgemv_t_microk_haswell-4.c" | ||||
#endif | #endif | ||||
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "dscal_microk_sandy-2.c" | #include "dscal_microk_sandy-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "dscal_microk_haswell-2.c" | #include "dscal_microk_haswell-2.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "dscal_microk_skylakex-2.c" | #include "dscal_microk_skylakex-2.c" | ||||
#endif | #endif | ||||
@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "dsymv_L_microk_bulldozer-2.c" | #include "dsymv_L_microk_bulldozer-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "dsymv_L_microk_haswell-2.c" | #include "dsymv_L_microk_haswell-2.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "dsymv_L_microk_skylakex-2.c" | #include "dsymv_L_microk_skylakex-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "dsymv_L_microk_sandy-2.c" | #include "dsymv_L_microk_sandy-2.c" | ||||
@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
#include "dsymv_U_microk_bulldozer-2.c" | #include "dsymv_U_microk_bulldozer-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "dsymv_U_microk_haswell-2.c" | #include "dsymv_U_microk_haswell-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "dsymv_U_microk_sandy-2.c" | #include "dsymv_U_microk_sandy-2.c" | ||||
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "saxpy_microk_nehalem-2.c" | #include "saxpy_microk_nehalem-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "saxpy_microk_haswell-2.c" | #include "saxpy_microk_haswell-2.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "saxpy_microk_skylakex-2.c" | #include "saxpy_microk_skylakex-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "saxpy_microk_sandy-2.c" | #include "saxpy_microk_sandy-2.c" | ||||
@@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined(COOPERLAKE) | |||||
#if defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
#include "sbdot_microk_cooperlake.c" | #include "sbdot_microk_cooperlake.c" | ||||
#endif | #endif | ||||
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined (COOPERLAKE) | |||||
#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "sbgemv_n_microk_cooperlake.c" | #include "sbgemv_n_microk_cooperlake.c" | ||||
#endif | #endif | ||||
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined (COOPERLAKE) | |||||
#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "sbgemv_t_microk_cooperlake.c" | #include "sbgemv_t_microk_cooperlake.c" | ||||
#endif | #endif | ||||
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "sdot_microk_nehalem-2.c" | #include "sdot_microk_nehalem-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "sdot_microk_haswell-2.c" | #include "sdot_microk_haswell-2.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "sdot_microk_skylakex-2.c" | #include "sdot_microk_skylakex-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "sdot_microk_sandy-2.c" | #include "sdot_microk_sandy-2.c" | ||||
@@ -1,7 +1,7 @@ | |||||
/* the direct sgemm code written by Arjan van der Ven */ | /* the direct sgemm code written by Arjan van der Ven */ | ||||
#include "common.h" | #include "common.h" | ||||
#if defined(SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include <immintrin.h> | #include <immintrin.h> | ||||
@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "sgemv_n_microk_sandy-4.c" | #include "sgemv_n_microk_sandy-4.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "sgemv_n_microk_haswell-4.c" | #include "sgemv_n_microk_haswell-4.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "sgemv_n_microk_haswell-4.c" | #include "sgemv_n_microk_haswell-4.c" | ||||
#include "sgemv_n_microk_skylakex-8.c" | #include "sgemv_n_microk_skylakex-8.c" | ||||
#endif | #endif | ||||
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "sgemv_t_microk_sandy-4.c" | #include "sgemv_t_microk_sandy-4.c" | ||||
#elif defined(HASWELL) || defined(ZEN) | #elif defined(HASWELL) || defined(ZEN) | ||||
#include "sgemv_t_microk_haswell-4.c" | #include "sgemv_t_microk_haswell-4.c" | ||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "sgemv_t_microk_haswell-4.c" | #include "sgemv_t_microk_haswell-4.c" | ||||
#include "sgemv_t_microk_skylakex.c" | #include "sgemv_t_microk_skylakex.c" | ||||
#endif | #endif | ||||
@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "ssymv_L_microk_bulldozer-2.c" | #include "ssymv_L_microk_bulldozer-2.c" | ||||
#elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
#include "ssymv_L_microk_nehalem-2.c" | #include "ssymv_L_microk_nehalem-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "ssymv_L_microk_haswell-2.c" | #include "ssymv_L_microk_haswell-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "ssymv_L_microk_sandy-2.c" | #include "ssymv_L_microk_sandy-2.c" | ||||
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "ssymv_U_microk_bulldozer-2.c" | #include "ssymv_U_microk_bulldozer-2.c" | ||||
#elif defined(NEHALEM) | #elif defined(NEHALEM) | ||||
#include "ssymv_U_microk_nehalem-2.c" | #include "ssymv_U_microk_nehalem-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "ssymv_U_microk_haswell-2.c" | #include "ssymv_U_microk_haswell-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "ssymv_U_microk_sandy-2.c" | #include "ssymv_U_microk_sandy-2.c" | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#else | #else | ||||
#endif | #endif | ||||
#if defined(COOPERLAKE) | |||||
#if defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) | |||||
#if defined(DOUBLE) | #if defined(DOUBLE) | ||||
#include "dtobf16_microk_cooperlake.c" | #include "dtobf16_microk_cooperlake.c" | ||||
#elif defined(SINGLE) | #elif defined(SINGLE) | ||||
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "zaxpy_microk_bulldozer-2.c" | #include "zaxpy_microk_bulldozer-2.c" | ||||
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
#include "zaxpy_microk_steamroller-2.c" | #include "zaxpy_microk_steamroller-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "zaxpy_microk_haswell-2.c" | #include "zaxpy_microk_haswell-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "zaxpy_microk_sandy-2.c" | #include "zaxpy_microk_sandy-2.c" | ||||
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "zdot_microk_bulldozer-2.c" | #include "zdot_microk_bulldozer-2.c" | ||||
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) | ||||
#include "zdot_microk_steamroller-2.c" | #include "zdot_microk_steamroller-2.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "zdot_microk_haswell-2.c" | #include "zdot_microk_haswell-2.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "zdot_microk_sandy-2.c" | #include "zdot_microk_sandy-2.c" | ||||
@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "zgemv_n_microk_haswell-4.c" | #include "zgemv_n_microk_haswell-4.c" | ||||
#elif defined(SANDYBRIDGE) | #elif defined(SANDYBRIDGE) | ||||
#include "zgemv_n_microk_sandy-4.c" | #include "zgemv_n_microk_sandy-4.c" | ||||
@@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) | ||||
#include "zgemv_t_microk_bulldozer-4.c" | #include "zgemv_t_microk_bulldozer-4.c" | ||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "zgemv_t_microk_haswell-4.c" | #include "zgemv_t_microk_haswell-4.c" | ||||
#endif | #endif | ||||
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#include "zscal_microk_haswell-2.c" | #include "zscal_microk_haswell-2.c" | ||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) | #elif defined(BULLDOZER) || defined(PILEDRIVER) | ||||
#include "zscal_microk_bulldozer-2.c" | #include "zscal_microk_bulldozer-2.c" | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) | |||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -1751,6 +1751,125 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#endif | #endif | ||||
#ifdef SAPPHIRERAPIDS | |||||
#define SNUMOPT 16 | |||||
#define DNUMOPT 8 | |||||
#define GEMM_DEFAULT_OFFSET_A 0 | |||||
#define GEMM_DEFAULT_OFFSET_B 0 | |||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
#define SYMV_P 8 | |||||
#if defined(XDOUBLE) || defined(DOUBLE) | |||||
#define SWITCH_RATIO 8 | |||||
#define GEMM_PREFERED_SIZE 8 | |||||
#else | |||||
#define SWITCH_RATIO 16 | |||||
#define GEMM_PREFERED_SIZE 16 | |||||
#endif | |||||
#define USE_SGEMM_KERNEL_DIRECT 1 | |||||
#ifdef ARCH_X86 | |||||
#define SGEMM_DEFAULT_UNROLL_M 4 | |||||
#define DGEMM_DEFAULT_UNROLL_M 2 | |||||
#define QGEMM_DEFAULT_UNROLL_M 2 | |||||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||||
#define ZGEMM_DEFAULT_UNROLL_M 1 | |||||
#define XGEMM_DEFAULT_UNROLL_M 1 | |||||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||||
#define QGEMM_DEFAULT_UNROLL_N 2 | |||||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
#define XGEMM_DEFAULT_UNROLL_N 1 | |||||
#else | |||||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||||
#define DGEMM_DEFAULT_UNROLL_M 16 | |||||
#define QGEMM_DEFAULT_UNROLL_M 2 | |||||
#define CGEMM_DEFAULT_UNROLL_M 8 | |||||
#define ZGEMM_DEFAULT_UNROLL_M 4 | |||||
#define XGEMM_DEFAULT_UNROLL_M 1 | |||||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||||
#define DGEMM_DEFAULT_UNROLL_N 2 | |||||
#define QGEMM_DEFAULT_UNROLL_N 2 | |||||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
#define XGEMM_DEFAULT_UNROLL_N 1 | |||||
#define SGEMM_DEFAULT_UNROLL_MN 32 | |||||
#define DGEMM_DEFAULT_UNROLL_MN 32 | |||||
#endif | |||||
#ifdef ARCH_X86 | |||||
#define SGEMM_DEFAULT_P 512 | |||||
#define SGEMM_DEFAULT_R sgemm_r | |||||
#define DGEMM_DEFAULT_P 512 | |||||
#define DGEMM_DEFAULT_R dgemm_r | |||||
#define QGEMM_DEFAULT_P 504 | |||||
#define QGEMM_DEFAULT_R qgemm_r | |||||
#define CGEMM_DEFAULT_P 128 | |||||
#define CGEMM_DEFAULT_R 1024 | |||||
#define ZGEMM_DEFAULT_P 512 | |||||
#define ZGEMM_DEFAULT_R zgemm_r | |||||
#define XGEMM_DEFAULT_P 252 | |||||
#define XGEMM_DEFAULT_R xgemm_r | |||||
#define SGEMM_DEFAULT_Q 256 | |||||
#define DGEMM_DEFAULT_Q 256 | |||||
#define QGEMM_DEFAULT_Q 128 | |||||
#define CGEMM_DEFAULT_Q 256 | |||||
#define ZGEMM_DEFAULT_Q 192 | |||||
#define XGEMM_DEFAULT_Q 128 | |||||
#else | |||||
#define SGEMM_DEFAULT_P 640 | |||||
#define DGEMM_DEFAULT_P 192 | |||||
#define CGEMM_DEFAULT_P 384 | |||||
#define ZGEMM_DEFAULT_P 256 | |||||
#define SGEMM_DEFAULT_Q 320 | |||||
#define DGEMM_DEFAULT_Q 384 | |||||
#define CGEMM_DEFAULT_Q 192 | |||||
#define ZGEMM_DEFAULT_Q 128 | |||||
#define SGEMM_DEFAULT_R sgemm_r | |||||
#define DGEMM_DEFAULT_R 8640 | |||||
#define CGEMM_DEFAULT_R cgemm_r | |||||
#define ZGEMM_DEFAULT_R zgemm_r | |||||
#define QGEMM_DEFAULT_Q 128 | |||||
#define QGEMM_DEFAULT_P 504 | |||||
#define QGEMM_DEFAULT_R qgemm_r | |||||
#define XGEMM_DEFAULT_P 252 | |||||
#define XGEMM_DEFAULT_R xgemm_r | |||||
#define XGEMM_DEFAULT_Q 128 | |||||
#define CGEMM3M_DEFAULT_UNROLL_N 4 | |||||
#define CGEMM3M_DEFAULT_UNROLL_M 8 | |||||
#define ZGEMM3M_DEFAULT_UNROLL_N 4 | |||||
#define ZGEMM3M_DEFAULT_UNROLL_M 4 | |||||
#define CGEMM3M_DEFAULT_P 320 | |||||
#define ZGEMM3M_DEFAULT_P 256 | |||||
#define XGEMM3M_DEFAULT_P 112 | |||||
#define CGEMM3M_DEFAULT_Q 320 | |||||
#define ZGEMM3M_DEFAULT_Q 256 | |||||
#define XGEMM3M_DEFAULT_Q 224 | |||||
#define CGEMM3M_DEFAULT_R 12288 | |||||
#define ZGEMM3M_DEFAULT_R 12288 | |||||
#define XGEMM3M_DEFAULT_R 12288 | |||||
#endif | |||||
#endif | |||||
#ifdef COOPERLAKE | #ifdef COOPERLAKE | ||||
#define SNUMOPT 16 | #define SNUMOPT 16 | ||||