@@ -336,14 +336,14 @@ ifeq ($(ARCH), x86) | |||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | ||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL | |||||
endif | endif | ||||
endif | endif | ||||
@@ -107,7 +107,7 @@ | |||||
#define CORE_BOBCAT 21 | #define CORE_BOBCAT 21 | ||||
#define CORE_BULLDOZER 22 | #define CORE_BULLDOZER 22 | ||||
#define CORE_PILEDRIVER 23 | #define CORE_PILEDRIVER 23 | ||||
#define CORE_HASWELL CORE_SANDYBRIDGE | |||||
#define CORE_HASWELL 24 | |||||
#define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
#define HAVE_SSE2 (1 << 1) | #define HAVE_SSE2 (1 << 1) | ||||
@@ -200,7 +200,6 @@ typedef struct { | |||||
#define CPUTYPE_BOBCAT 45 | #define CPUTYPE_BOBCAT 45 | ||||
#define CPUTYPE_BULLDOZER 46 | #define CPUTYPE_BULLDOZER 46 | ||||
#define CPUTYPE_PILEDRIVER 47 | #define CPUTYPE_PILEDRIVER 47 | ||||
// this define is because BLAS doesn't have haswell specific optimizations yet | |||||
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE | |||||
#define CPUTYPE_HASWELL 48 | |||||
#endif | #endif |
@@ -1243,6 +1243,7 @@ static char *cpuname[] = { | |||||
"BOBCAT", | "BOBCAT", | ||||
"BULLDOZER", | "BULLDOZER", | ||||
"PILEDRIVER", | "PILEDRIVER", | ||||
"HASWELL", | |||||
}; | }; | ||||
static char *lowercpuname[] = { | static char *lowercpuname[] = { | ||||
@@ -1293,6 +1294,7 @@ static char *lowercpuname[] = { | |||||
"bobcat", | "bobcat", | ||||
"bulldozer", | "bulldozer", | ||||
"piledriver", | "piledriver", | ||||
"haswell", | |||||
}; | }; | ||||
static char *corename[] = { | static char *corename[] = { | ||||
@@ -1320,6 +1322,7 @@ static char *corename[] = { | |||||
"BOBCAT", | "BOBCAT", | ||||
"BULLDOZER", | "BULLDOZER", | ||||
"PILEDRIVER", | "PILEDRIVER", | ||||
"HASWELL", | |||||
}; | }; | ||||
static char *corename_lower[] = { | static char *corename_lower[] = { | ||||
@@ -1347,6 +1350,7 @@ static char *corename_lower[] = { | |||||
"bobcat", | "bobcat", | ||||
"bulldozer", | "bulldozer", | ||||
"piledriver", | "piledriver", | ||||
"haswell", | |||||
}; | }; | ||||
@@ -65,14 +65,15 @@ extern gotoblas_t gotoblas_BOBCAT; | |||||
extern gotoblas_t gotoblas_SANDYBRIDGE; | extern gotoblas_t gotoblas_SANDYBRIDGE; | ||||
extern gotoblas_t gotoblas_BULLDOZER; | extern gotoblas_t gotoblas_BULLDOZER; | ||||
extern gotoblas_t gotoblas_PILEDRIVER; | extern gotoblas_t gotoblas_PILEDRIVER; | ||||
extern gotoblas_t gotoblas_HASWELL; | |||||
#else | #else | ||||
//Use NEHALEM kernels for sandy bridge | //Use NEHALEM kernels for sandy bridge | ||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | ||||
#define gotoblas_HASWELL gotoblas_NEHALEM | |||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA | #define gotoblas_BULLDOZER gotoblas_BARCELONA | ||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA | #define gotoblas_PILEDRIVER gotoblas_BARCELONA | ||||
#endif | #endif | ||||
//Use sandy bridge kernels for haswell. | |||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||||
#define VENDOR_INTEL 1 | #define VENDOR_INTEL 1 | ||||
#define VENDOR_AMD 2 | #define VENDOR_AMD 2 | ||||
@@ -297,6 +298,7 @@ static char *corename[] = { | |||||
"Bobcat", | "Bobcat", | ||||
"Bulldozer", | "Bulldozer", | ||||
"Piledriver", | "Piledriver", | ||||
"Haswell", | |||||
}; | }; | ||||
char *gotoblas_corename(void) { | char *gotoblas_corename(void) { | ||||
@@ -319,7 +321,8 @@ char *gotoblas_corename(void) { | |||||
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; | if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; | ||||
if (gotoblas == &gotoblas_BOBCAT) return corename[17]; | if (gotoblas == &gotoblas_BOBCAT) return corename[17]; | ||||
if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; | if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; | ||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||||
if (gotoblas == &gotoblas_HASWELL) return corename[20]; | |||||
return corename[0]; | return corename[0]; | ||||
} | } | ||||
@@ -298,6 +298,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define CORENAME "SANDYBRIDGE" | #define CORENAME "SANDYBRIDGE" | ||||
#endif | #endif | ||||
#ifdef FORCE_HASWELL | |||||
#define FORCE | |||||
#define FORCE_INTEL | |||||
#define ARCHITECTURE "X86" | |||||
#define SUBARCHITECTURE "HASWELL" | |||||
#define ARCHCONFIG "-DHASWELL " \ | |||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ | |||||
"-DFMA3" | |||||
#define LIBNAME "haswell" | |||||
#define CORENAME "HASWELL" | |||||
#endif | |||||
#ifdef FORCE_ATOM | #ifdef FORCE_ATOM | ||||
#define FORCE | #define FORCE | ||||
#define FORCE_INTEL | #define FORCE_INTEL | ||||
@@ -0,0 +1 @@ | |||||
include $(KERNELDIR)/KERNEL.PENRYN |
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -62,7 +62,7 @@ | |||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHSIZE (8 * 21 + 4) | #define PREFETCHSIZE (8 * 21 + 4) | ||||
#endif | #endif | ||||
@@ -61,7 +61,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -63,7 +63,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -61,7 +61,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -63,7 +63,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -61,7 +61,7 @@ | |||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht1 | #define PREFETCH prefetcht1 | ||||
#define PREFETCHSIZE 84 | #define PREFETCHSIZE 84 | ||||
#endif | #endif | ||||
@@ -0,0 +1,63 @@ | |||||
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S | |||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||||
SGEMMITCOPY = ../generic/gemm_tcopy_16.c | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
DGEMMKERNEL = dgemm_kernel_4x4_haswell.S | |||||
DGEMMINCOPY = | |||||
DGEMMITCOPY = | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
DGEMMINCOPYOBJ = | |||||
DGEMMITCOPYOBJ = | |||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
CGEMMKERNEL = cgemm_kernel_8x2_haswell.S | |||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c | |||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S | |||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S | |||||
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 12) | #define PREFETCHSIZE (16 * 12) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -57,7 +57,7 @@ | |||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
#endif | #endif | ||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) | |||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION) | |||||
#define PREFETCH prefetcht0 | #define PREFETCH prefetcht0 | ||||
#define PREFETCHW prefetcht0 | #define PREFETCHW prefetcht0 | ||||
#define PREFETCHSIZE (16 * 24) | #define PREFETCHSIZE (16 * 24) | ||||
@@ -1154,6 +1154,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#ifdef HASWELL | #ifdef HASWELL | ||||
<<<<<<< HEAD | |||||
#define SNUMOPT 8 | #define SNUMOPT 8 | ||||
#define DNUMOPT 4 | #define DNUMOPT 4 | ||||
@@ -1164,6 +1165,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define SYMV_P 8 | #define SYMV_P 8 | ||||
#define SWITCH_RATIO 4 | #define SWITCH_RATIO 4 | ||||
======= | |||||
#define SNUMOPT 8 | |||||
#define DNUMOPT 4 | |||||
#define GEMM_DEFAULT_OFFSET_A 0 | |||||
#define GEMM_DEFAULT_OFFSET_B 0 | |||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
#define SYMV_P 8 | |||||
#define SWITCH_RATIO 4 | |||||
>>>>>>> origin/haswell | |||||
#ifdef ARCH_X86 | #ifdef ARCH_X86 | ||||
@@ -1233,6 +1246,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define ZGEMM_DEFAULT_Q 128 | #define ZGEMM_DEFAULT_Q 128 | ||||
#define SGEMM_DEFAULT_R sgemm_r | #define SGEMM_DEFAULT_R sgemm_r | ||||
<<<<<<< HEAD | |||||
======= | |||||
//#define DGEMM_DEFAULT_R dgemm_r | |||||
>>>>>>> origin/haswell | |||||
#define DGEMM_DEFAULT_R 13824 | #define DGEMM_DEFAULT_R 13824 | ||||
#define CGEMM_DEFAULT_R cgemm_r | #define CGEMM_DEFAULT_R cgemm_r | ||||
#define ZGEMM_DEFAULT_R zgemm_r | #define ZGEMM_DEFAULT_R zgemm_r | ||||