@@ -15,7 +15,7 @@ VERSION = 0.2.13 | |||
# TARGET = PENRYN | |||
# If you want to support multiple architecture in one binary | |||
# DYNAMIC_ARCH = 1 | |||
DYNAMIC_ARCH = 1 | |||
# C compiler including binary type(32bit / 64bit). Default is gcc. | |||
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect. | |||
@@ -61,6 +61,9 @@ endif | |||
ifeq ($(TARGET), PILEDRIVER) | |||
GETARCH_FLAGS := -DFORCE_BARCELONA | |||
endif | |||
ifeq ($(TARGET), STEAMROLLER) | |||
GETARCH_FLAGS := -DFORCE_BARCELONA | |||
endif | |||
endif | |||
@@ -85,6 +88,9 @@ endif | |||
ifeq ($(TARGET_CORE), PILEDRIVER) | |||
GETARCH_FLAGS := -DFORCE_BARCELONA | |||
endif | |||
ifeq ($(TARGET_CORE), STEAMROLLER) | |||
GETARCH_FLAGS := -DFORCE_BARCELONA | |||
endif | |||
endif | |||
@@ -392,7 +398,7 @@ endif | |||
ifeq ($(ARCH), x86_64) | |||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
ifneq ($(NO_AVX), 1) | |||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER | |||
endif | |||
ifneq ($(NO_AVX2), 1) | |||
DYNAMIC_CORE += HASWELL | |||
@@ -60,6 +60,7 @@ Please read GotoBLAS_01Readme.txt | |||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes. | |||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar) | |||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations. | |||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations. | |||
#### MIPS64: | |||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2. | |||
@@ -32,6 +32,7 @@ ISTANBUL | |||
BOBCAT | |||
BULLDOZER | |||
PILEDRIVER | |||
STEAMROLLER | |||
c)VIA CPU: | |||
SSE_GENERIC | |||
@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||
#define MMXSTORE movd | |||
#endif | |||
#if defined(PILEDRIVER) || defined(BULLDOZER) | |||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) | |||
//Enable some optimazation for barcelona. | |||
#define BARCELONA_OPTIMIZATION | |||
#endif | |||
@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||
#ifdef ASSEMBLER | |||
#if defined(PILEDRIVER) || defined(BULLDOZER) | |||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) | |||
//Enable some optimazation for barcelona. | |||
#define BARCELONA_OPTIMIZATION | |||
#endif | |||
@@ -104,10 +104,11 @@ | |||
#define CORE_ATOM 18 | |||
#define CORE_NANO 19 | |||
#define CORE_SANDYBRIDGE 20 | |||
#define CORE_BOBCAT 21 | |||
#define CORE_BULLDOZER 22 | |||
#define CORE_BOBCAT 21 | |||
#define CORE_BULLDOZER 22 | |||
#define CORE_PILEDRIVER 23 | |||
#define CORE_HASWELL 24 | |||
#define CORE_HASWELL 24 | |||
#define CORE_STEAMROLLER 25 | |||
#define HAVE_SSE (1 << 0) | |||
#define HAVE_SSE2 (1 << 1) | |||
@@ -200,6 +201,7 @@ typedef struct { | |||
#define CPUTYPE_BOBCAT 45 | |||
#define CPUTYPE_BULLDOZER 46 | |||
#define CPUTYPE_PILEDRIVER 47 | |||
#define CPUTYPE_HASWELL 48 | |||
#define CPUTYPE_HASWELL 48 | |||
#define CPUTYPE_STEAMROLLER 49 | |||
#endif |
@@ -1162,6 +1162,12 @@ int get_cpuname(void){ | |||
return CPUTYPE_PILEDRIVER; | |||
else | |||
return CPUTYPE_BARCELONA; //OS don't support AVX. | |||
case 0: | |||
if(support_avx()) | |||
return CPUTYPE_STEAMROLLER; | |||
else | |||
return CPUTYPE_BARCELONA; //OS don't support AVX. | |||
} | |||
break; | |||
case 5: | |||
@@ -1290,6 +1296,7 @@ static char *cpuname[] = { | |||
"BULLDOZER", | |||
"PILEDRIVER", | |||
"HASWELL", | |||
"STEAMROLLER", | |||
}; | |||
static char *lowercpuname[] = { | |||
@@ -1341,6 +1348,7 @@ static char *lowercpuname[] = { | |||
"bulldozer", | |||
"piledriver", | |||
"haswell", | |||
"steamroller", | |||
}; | |||
static char *corename[] = { | |||
@@ -1369,6 +1377,7 @@ static char *corename[] = { | |||
"BULLDOZER", | |||
"PILEDRIVER", | |||
"HASWELL", | |||
"STEAMROLLER", | |||
}; | |||
static char *corename_lower[] = { | |||
@@ -1397,6 +1406,7 @@ static char *corename_lower[] = { | |||
"bulldozer", | |||
"piledriver", | |||
"haswell", | |||
"steamroller", | |||
}; | |||
@@ -1562,7 +1572,15 @@ int get_coretype(void){ | |||
return CORE_PILEDRIVER; | |||
else | |||
return CORE_BARCELONA; //OS don't support AVX. | |||
case 0: | |||
if(support_avx()) | |||
return CORE_STEAMROLLER; | |||
else | |||
return CORE_BARCELONA; //OS don't support AVX. | |||
} | |||
}else return CORE_BARCELONA; | |||
} | |||
} | |||
@@ -66,6 +66,7 @@ extern gotoblas_t gotoblas_BOBCAT; | |||
extern gotoblas_t gotoblas_SANDYBRIDGE; | |||
extern gotoblas_t gotoblas_BULLDOZER; | |||
extern gotoblas_t gotoblas_PILEDRIVER; | |||
extern gotoblas_t gotoblas_STEAMROLLER; | |||
#ifdef NO_AVX2 | |||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE | |||
#else | |||
@@ -77,6 +78,7 @@ extern gotoblas_t gotoblas_HASWELL; | |||
#define gotoblas_HASWELL gotoblas_NEHALEM | |||
#define gotoblas_BULLDOZER gotoblas_BARCELONA | |||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA | |||
#define gotoblas_STEAMROLLER gotoblas_BARCELONA | |||
#endif | |||
@@ -275,7 +277,17 @@ static gotoblas_t *get_coretype(void){ | |||
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
} | |||
}else if(model == 0){ | |||
//AMD STEAMROLLER | |||
if(support_avx()) | |||
return &gotoblas_STEAMROLLER; | |||
else{ | |||
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
} | |||
} | |||
} else { | |||
return &gotoblas_BARCELONA; | |||
} | |||
@@ -315,6 +327,7 @@ static char *corename[] = { | |||
"Bulldozer", | |||
"Piledriver", | |||
"Haswell", | |||
"Steamroller", | |||
}; | |||
char *gotoblas_corename(void) { | |||
@@ -339,6 +352,7 @@ char *gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; | |||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; | |||
if (gotoblas == &gotoblas_HASWELL) return corename[20]; | |||
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21]; | |||
return corename[0]; | |||
} | |||
@@ -370,6 +384,7 @@ static gotoblas_t *force_coretype(char *coretype){ | |||
switch (found) | |||
{ | |||
case 21: return (&gotoblas_STEAMROLLER); | |||
case 20: return (&gotoblas_HASWELL); | |||
case 19: return (&gotoblas_PILEDRIVER); | |||
case 18: return (&gotoblas_BULLDOZER); | |||
@@ -166,7 +166,7 @@ int get_L2_size(void){ | |||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | |||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | |||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | |||
defined(PILEDRIVER) || defined(HASWELL) | |||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) | |||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | |||
@@ -251,7 +251,7 @@ void blas_set_parameter(void){ | |||
env_var_t p; | |||
int factor; | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) | |||
int size = 16; | |||
#else | |||
int size = get_L2_size(); | |||
@@ -432,6 +432,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CORENAME "PILEDRIVER" | |||
#endif | |||
#if defined (FORCE_STEAMROLLER) | |||
#define FORCE | |||
#define FORCE_INTEL | |||
#define ARCHITECTURE "X86" | |||
#define SUBARCHITECTURE "STEAMROLLER" | |||
#define ARCHCONFIG "-DSTEAMROLLER " \ | |||
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \ | |||
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \ | |||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \ | |||
"-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3" | |||
#define LIBNAME "steamroller" | |||
#define CORENAME "STEAMROLLER" | |||
#endif | |||
#ifdef FORCE_SSE_GENERIC | |||
#define FORCE | |||
#define FORCE_INTEL | |||
@@ -941,6 +941,23 @@ static void init_parameter(void) { | |||
#endif | |||
#endif | |||
#ifdef STEAMROLLER | |||
#ifdef DEBUG | |||
fprintf(stderr, "Steamroller\n"); | |||
#endif | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
#endif | |||
#endif | |||
#ifdef NANO | |||
#ifdef DEBUG | |||
@@ -0,0 +1,76 @@ | |||
SGEMVNKERNEL = sgemv_n_4.c | |||
SGEMVTKERNEL = sgemv_t_4.c | |||
ZGEMVNKERNEL = zgemv_n_dup.S | |||
ZGEMVTKERNEL = zgemv_t_4.c | |||
DGEMVNKERNEL = dgemv_n_bulldozer.S | |||
DGEMVTKERNEL = dgemv_t_bulldozer.S | |||
DDOTKERNEL = ddot_bulldozer.S | |||
DCOPYKERNEL = dcopy_bulldozer.S | |||
SGEMMKERNEL = sgemm_kernel_16x2_piledriver.S | |||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_16.c | |||
SGEMMONCOPY = gemm_ncopy_2_bulldozer.S | |||
SGEMMOTCOPY = gemm_tcopy_2_bulldozer.S | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S | |||
DGEMMINCOPY = dgemm_ncopy_8_bulldozer.S | |||
DGEMMITCOPY = dgemm_tcopy_8_bulldozer.S | |||
DGEMMONCOPY = gemm_ncopy_2_bulldozer.S | |||
DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = cgemm_kernel_4x2_piledriver.S | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = zgemm_kernel_2x2_piledriver.S | |||
ZGEMMINCOPY = | |||
ZGEMMITCOPY = | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMINCOPYOBJ = | |||
ZGEMMITCOPYOBJ = | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S | |||
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S | |||
DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) | |||
#include "ddot_microk_bulldozer-2.c" | |||
#elif defined(NEHALEM) | |||
#include "ddot_microk_nehalem-2.c" | |||
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) | |||
#include "sdot_microk_bulldozer-2.c" | |||
#elif defined(NEHALEM) | |||
#include "sdot_microk_nehalem-2.c" | |||
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) | |||
#include "sgemv_n_microk_bulldozer-4.c" | |||
#elif defined(NEHALEM) | |||
#include "sgemv_n_microk_nehalem-4.c" | |||
@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#if defined(NEHALEM) | |||
#include "sgemv_t_microk_nehalem-4.c" | |||
#elif defined(BULLDOZER) || defined(PILEDRIVER) | |||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) | |||
#include "sgemv_t_microk_bulldozer-4.c" | |||
#elif defined(SANDYBRIDGE) | |||
#include "sgemv_t_microk_sandy-4.c" | |||
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) | |||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) | |||
#include "zgemv_t_microk_bulldozer-4.c" | |||
#elif defined(HASWELL) | |||
#include "zgemv_t_microk_haswell-4.c" | |||
@@ -406,6 +406,99 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#ifdef STEAMROLLER | |||
#define SNUMOPT 8 | |||
#define DNUMOPT 4 | |||
#define GEMM_DEFAULT_OFFSET_A 64 | |||
#define GEMM_DEFAULT_OFFSET_B 832 | |||
#define GEMM_DEFAULT_ALIGN 0x0fffUL | |||
#define QGEMM_DEFAULT_UNROLL_N 2 | |||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define XGEMM_DEFAULT_UNROLL_N 1 | |||
#ifdef ARCH_X86 | |||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||
#define SGEMM_DEFAULT_UNROLL_M 4 | |||
#define DGEMM_DEFAULT_UNROLL_M 2 | |||
#define QGEMM_DEFAULT_UNROLL_M 2 | |||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||
#define ZGEMM_DEFAULT_UNROLL_M 1 | |||
#define XGEMM_DEFAULT_UNROLL_M 1 | |||
#else | |||
#define SGEMM_DEFAULT_UNROLL_N 2 | |||
#define DGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||
#define DGEMM_DEFAULT_UNROLL_M 8 | |||
#define QGEMM_DEFAULT_UNROLL_M 2 | |||
#define CGEMM_DEFAULT_UNROLL_M 4 | |||
#define ZGEMM_DEFAULT_UNROLL_M 2 | |||
#define XGEMM_DEFAULT_UNROLL_M 1 | |||
#define CGEMM3M_DEFAULT_UNROLL_N 4 | |||
#define CGEMM3M_DEFAULT_UNROLL_M 8 | |||
#define ZGEMM3M_DEFAULT_UNROLL_N 4 | |||
#define ZGEMM3M_DEFAULT_UNROLL_M 4 | |||
#define GEMV_UNROLL 8 | |||
#endif | |||
#if defined(ARCH_X86_64) | |||
#define SGEMM_DEFAULT_P 768 | |||
#define DGEMM_DEFAULT_P 768 | |||
#define ZGEMM_DEFAULT_P 384 | |||
#define CGEMM_DEFAULT_P 768 | |||
#else | |||
#define SGEMM_DEFAULT_P 448 | |||
#define DGEMM_DEFAULT_P 480 | |||
#define ZGEMM_DEFAULT_P 112 | |||
#define CGEMM_DEFAULT_P 224 | |||
#endif | |||
#define QGEMM_DEFAULT_P 112 | |||
#define XGEMM_DEFAULT_P 56 | |||
#if defined(ARCH_X86_64) | |||
#define SGEMM_DEFAULT_Q 192 | |||
#define DGEMM_DEFAULT_Q 168 | |||
#define ZGEMM_DEFAULT_Q 168 | |||
#define CGEMM_DEFAULT_Q 168 | |||
#else | |||
#define SGEMM_DEFAULT_Q 224 | |||
#define DGEMM_DEFAULT_Q 224 | |||
#define ZGEMM_DEFAULT_Q 224 | |||
#define CGEMM_DEFAULT_Q 224 | |||
#endif | |||
#define QGEMM_DEFAULT_Q 224 | |||
#define XGEMM_DEFAULT_Q 224 | |||
#define CGEMM3M_DEFAULT_P 448 | |||
#define ZGEMM3M_DEFAULT_P 224 | |||
#define XGEMM3M_DEFAULT_P 112 | |||
#define CGEMM3M_DEFAULT_Q 224 | |||
#define ZGEMM3M_DEFAULT_Q 224 | |||
#define XGEMM3M_DEFAULT_Q 224 | |||
#define CGEMM3M_DEFAULT_R 12288 | |||
#define ZGEMM3M_DEFAULT_R 12288 | |||
#define XGEMM3M_DEFAULT_R 12288 | |||
#define SGEMM_DEFAULT_R 12288 | |||
#define QGEMM_DEFAULT_R qgemm_r | |||
#define DGEMM_DEFAULT_R 12288 | |||
#define CGEMM_DEFAULT_R cgemm_r | |||
#define ZGEMM_DEFAULT_R zgemm_r | |||
#define XGEMM_DEFAULT_R xgemm_r | |||
#define SYMV_P 16 | |||
#define HAVE_EXCLUSIVE_CACHE | |||
#define GEMM_THREAD gemm_thread_mn | |||
#endif | |||
#ifdef ATHLON | |||
#define SNUMOPT 4 | |||