Browse Source

Merge remote branch 'origin/haswell' into develop

tags/v0.2.9.rc1
wernsaar 12 years ago
parent
commit
b3254eecaf
32 changed files with 16078 additions and 27 deletions
  1. +2
    -2
      Makefile.system
  2. +2
    -3
      cpuid.h
  3. +4
    -0
      cpuid_x86.c
  4. +6
    -3
      driver/others/dynamic.c
  5. +15
    -0
      getarch.c
  6. +1
    -0
      kernel/x86/KERNEL.HASWELL
  7. +1
    -1
      kernel/x86/trsm_kernel_LN_2x4_penryn.S
  8. +1
    -1
      kernel/x86/trsm_kernel_LN_4x4_penryn.S
  9. +1
    -1
      kernel/x86/trsm_kernel_LT_2x4_penryn.S
  10. +1
    -1
      kernel/x86/trsm_kernel_LT_4x4_penryn.S
  11. +1
    -1
      kernel/x86/trsm_kernel_RT_2x4_penryn.S
  12. +1
    -1
      kernel/x86/trsm_kernel_RT_4x4_penryn.S
  13. +1
    -1
      kernel/x86/ztrsm_kernel_LN_2x2_penryn.S
  14. +1
    -1
      kernel/x86/ztrsm_kernel_LT_1x2_penryn.S
  15. +1
    -1
      kernel/x86/ztrsm_kernel_LT_2x2_penryn.S
  16. +1
    -1
      kernel/x86/ztrsm_kernel_RT_1x2_penryn.S
  17. +1
    -1
      kernel/x86/ztrsm_kernel_RT_2x2_penryn.S
  18. +63
    -0
      kernel/x86_64/KERNEL.HASWELL
  19. +2284
    -0
      kernel/x86_64/cgemm_kernel_8x2_haswell.S
  20. +5215
    -0
      kernel/x86_64/dgemm_kernel_16x2_haswell.S
  21. +3479
    -0
      kernel/x86_64/dgemm_kernel_4x4_haswell.S
  22. +3159
    -0
      kernel/x86_64/sgemm_kernel_16x4_haswell.S
  23. +1
    -1
      kernel/x86_64/symv_L_sse.S
  24. +1
    -1
      kernel/x86_64/symv_L_sse2.S
  25. +1
    -1
      kernel/x86_64/symv_U_sse.S
  26. +1
    -1
      kernel/x86_64/symv_U_sse2.S
  27. +1812
    -0
      kernel/x86_64/zgemm_kernel_4x2_haswell.S
  28. +1
    -1
      kernel/x86_64/zsymv_L_sse.S
  29. +1
    -1
      kernel/x86_64/zsymv_L_sse2.S
  30. +1
    -1
      kernel/x86_64/zsymv_U_sse.S
  31. +1
    -1
      kernel/x86_64/zsymv_U_sse2.S
  32. +17
    -0
      param.h

+ 2
- 2
Makefile.system View File

@@ -336,14 +336,14 @@ ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1) ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
endif endif
endif endif


ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1) ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
endif endif
endif endif




+ 2
- 3
cpuid.h View File

@@ -107,7 +107,7 @@
#define CORE_BOBCAT 21 #define CORE_BOBCAT 21
#define CORE_BULLDOZER 22 #define CORE_BULLDOZER 22
#define CORE_PILEDRIVER 23 #define CORE_PILEDRIVER 23
#define CORE_HASWELL CORE_SANDYBRIDGE
#define CORE_HASWELL 24


#define HAVE_SSE (1 << 0) #define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1) #define HAVE_SSE2 (1 << 1)
@@ -200,7 +200,6 @@ typedef struct {
#define CPUTYPE_BOBCAT 45 #define CPUTYPE_BOBCAT 45
#define CPUTYPE_BULLDOZER 46 #define CPUTYPE_BULLDOZER 46
#define CPUTYPE_PILEDRIVER 47 #define CPUTYPE_PILEDRIVER 47
// this define is because BLAS doesn't have haswell specific optimizations yet
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE
#define CPUTYPE_HASWELL 48


#endif #endif

+ 4
- 0
cpuid_x86.c View File

@@ -1243,6 +1243,7 @@ static char *cpuname[] = {
"BOBCAT", "BOBCAT",
"BULLDOZER", "BULLDOZER",
"PILEDRIVER", "PILEDRIVER",
"HASWELL",
}; };


static char *lowercpuname[] = { static char *lowercpuname[] = {
@@ -1293,6 +1294,7 @@ static char *lowercpuname[] = {
"bobcat", "bobcat",
"bulldozer", "bulldozer",
"piledriver", "piledriver",
"haswell",
}; };


static char *corename[] = { static char *corename[] = {
@@ -1320,6 +1322,7 @@ static char *corename[] = {
"BOBCAT", "BOBCAT",
"BULLDOZER", "BULLDOZER",
"PILEDRIVER", "PILEDRIVER",
"HASWELL",
}; };


static char *corename_lower[] = { static char *corename_lower[] = {
@@ -1347,6 +1350,7 @@ static char *corename_lower[] = {
"bobcat", "bobcat",
"bulldozer", "bulldozer",
"piledriver", "piledriver",
"haswell",
}; };






+ 6
- 3
driver/others/dynamic.c View File

@@ -65,14 +65,15 @@ extern gotoblas_t gotoblas_BOBCAT;
extern gotoblas_t gotoblas_SANDYBRIDGE; extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BULLDOZER; extern gotoblas_t gotoblas_BULLDOZER;
extern gotoblas_t gotoblas_PILEDRIVER; extern gotoblas_t gotoblas_PILEDRIVER;
extern gotoblas_t gotoblas_HASWELL;
#else #else
//Use NEHALEM kernels for sandy bridge //Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#define gotoblas_HASWELL gotoblas_NEHALEM
#define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA
#endif #endif
//Use sandy bridge kernels for haswell.
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE



#define VENDOR_INTEL 1 #define VENDOR_INTEL 1
#define VENDOR_AMD 2 #define VENDOR_AMD 2
@@ -297,6 +298,7 @@ static char *corename[] = {
"Bobcat", "Bobcat",
"Bulldozer", "Bulldozer",
"Piledriver", "Piledriver",
"Haswell",
}; };


char *gotoblas_corename(void) { char *gotoblas_corename(void) {
@@ -319,7 +321,8 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
if (gotoblas == &gotoblas_BOBCAT) return corename[17]; if (gotoblas == &gotoblas_BOBCAT) return corename[17];
if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
if (gotoblas == &gotoblas_HASWELL) return corename[20];


return corename[0]; return corename[0];
} }


+ 15
- 0
getarch.c View File

@@ -298,6 +298,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "SANDYBRIDGE" #define CORENAME "SANDYBRIDGE"
#endif #endif


#ifdef FORCE_HASWELL
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "HASWELL"
#define ARCHCONFIG "-DHASWELL " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3"
#define LIBNAME "haswell"
#define CORENAME "HASWELL"
#endif

#ifdef FORCE_ATOM #ifdef FORCE_ATOM
#define FORCE #define FORCE
#define FORCE_INTEL #define FORCE_INTEL


+ 1
- 0
kernel/x86/KERNEL.HASWELL View File

@@ -0,0 +1 @@
include $(KERNELDIR)/KERNEL.PENRYN

+ 1
- 1
kernel/x86/trsm_kernel_LN_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_LN_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_LT_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_LT_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_RT_2x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/trsm_kernel_RT_4x4_penryn.S View File

@@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LN_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LT_1x2_penryn.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_LT_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_RT_1x2_penryn.S View File

@@ -63,7 +63,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 1
- 1
kernel/x86/ztrsm_kernel_RT_2x2_penryn.S View File

@@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif


+ 63
- 0
kernel/x86_64/KERNEL.HASWELL View File

@@ -0,0 +1,63 @@
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMMKERNEL = dgemm_kernel_4x4_haswell.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

CGEMMKERNEL = cgemm_kernel_8x2_haswell.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)

ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S


+ 2284
- 0
kernel/x86_64/cgemm_kernel_8x2_haswell.S
File diff suppressed because it is too large
View File


+ 5215
- 0
kernel/x86_64/dgemm_kernel_16x2_haswell.S
File diff suppressed because it is too large
View File


+ 3479
- 0
kernel/x86_64/dgemm_kernel_4x4_haswell.S
File diff suppressed because it is too large
View File


+ 3159
- 0
kernel/x86_64/sgemm_kernel_16x4_haswell.S
File diff suppressed because it is too large
View File


+ 1
- 1
kernel/x86_64/symv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)


+ 1
- 1
kernel/x86_64/symv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1812
- 0
kernel/x86_64/zgemm_kernel_4x2_haswell.S
File diff suppressed because it is too large
View File


+ 1
- 1
kernel/x86_64/zsymv_L_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_L_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 1
- 1
kernel/x86_64/zsymv_U_sse2.S View File

@@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif


#if defined(NEHALEM) || defined(SANDYBRIDGE)
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)


+ 17
- 0
param.h View File

@@ -1154,6 +1154,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#ifdef HASWELL #ifdef HASWELL


<<<<<<< HEAD
#define SNUMOPT 8 #define SNUMOPT 8
#define DNUMOPT 4 #define DNUMOPT 4


@@ -1164,6 +1165,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 8 #define SYMV_P 8


#define SWITCH_RATIO 4 #define SWITCH_RATIO 4
=======
#define SNUMOPT 8
#define DNUMOPT 4

#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL

#define SYMV_P 8

#define SWITCH_RATIO 4
>>>>>>> origin/haswell


#ifdef ARCH_X86 #ifdef ARCH_X86


@@ -1233,6 +1246,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_Q 128 #define ZGEMM_DEFAULT_Q 128


#define SGEMM_DEFAULT_R sgemm_r #define SGEMM_DEFAULT_R sgemm_r
<<<<<<< HEAD
=======
//#define DGEMM_DEFAULT_R dgemm_r
>>>>>>> origin/haswell
#define DGEMM_DEFAULT_R 13824 #define DGEMM_DEFAULT_R 13824
#define CGEMM_DEFAULT_R cgemm_r #define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r #define ZGEMM_DEFAULT_R zgemm_r


Loading…
Cancel
Save