Browse Source

Add AMD Excavator target.

tags/v0.2.15^2
Zhang Xianyi 10 years ago
parent
commit
51ff17d46e
10 changed files with 265 additions and 19 deletions
  1. +7
    -1
      Makefile.system
  2. +1
    -0
      TargetList.txt
  3. +1
    -1
      common_x86.h
  4. +1
    -1
      common_x86_64.h
  5. +2
    -0
      cpuid.h
  6. +32
    -9
      cpuid_x86.c
  7. +21
    -7
      driver/others/dynamic.c
  8. +16
    -0
      getarch.c
  9. +92
    -0
      kernel/x86_64/KERNEL.EXCAVATOR
  10. +92
    -0
      param.h

+ 7
- 1
Makefile.system View File

@@ -65,6 +65,9 @@ endif
ifeq ($(TARGET), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif


@@ -92,6 +95,9 @@ endif
ifeq ($(TARGET_CORE), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif


@@ -409,7 +415,7 @@ endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL


+ 1
- 0
TargetList.txt View File

@@ -33,6 +33,7 @@ BOBCAT
BULLDOZER
PILEDRIVER
STEAMROLLER
EXCAVATOR

c)VIA CPU:
SSE_GENERIC


+ 1
- 1
common_x86.h View File

@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#define MMXSTORE movd
#endif

#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
//Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION
#endif


+ 1
- 1
common_x86_64.h View File

@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){

#ifdef ASSEMBLER

#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
//Enable some optimazation for barcelona.
#define BARCELONA_OPTIMIZATION
#endif


+ 2
- 0
cpuid.h View File

@@ -109,6 +109,7 @@
#define CORE_PILEDRIVER 23
#define CORE_HASWELL 24
#define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26

#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -203,5 +204,6 @@ typedef struct {
#define CPUTYPE_PILEDRIVER 47
#define CPUTYPE_HASWELL 48
#define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50

#endif

+ 32
- 9
cpuid_x86.c View File

@@ -1198,11 +1198,20 @@ int get_cpuname(void){
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 0:
if(support_avx())
return CPUTYPE_STEAMROLLER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.

switch(exmodel){
case 3:
if(support_avx())
return CPUTYPE_STEAMROLLER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.

case 6:
if(support_avx())
return CPUTYPE_EXCAVATOR;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
}
break;
}
break;
case 5:
@@ -1332,6 +1341,7 @@ static char *cpuname[] = {
"PILEDRIVER",
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
};

static char *lowercpuname[] = {
@@ -1384,6 +1394,7 @@ static char *lowercpuname[] = {
"piledriver",
"haswell",
"steamroller",
"excavator",
};

static char *corename[] = {
@@ -1413,6 +1424,7 @@ static char *corename[] = {
"PILEDRIVER",
"HASWELL",
"STEAMROLLER",
"EXCAVATOR",
};

static char *corename_lower[] = {
@@ -1442,6 +1454,7 @@ static char *corename_lower[] = {
"piledriver",
"haswell",
"steamroller",
"excavator",
};


@@ -1644,10 +1657,20 @@ int get_coretype(void){
return CORE_BARCELONA; //OS don't support AVX.
case 0:
if(support_avx())
return CORE_STEAMROLLER;
else
return CORE_BARCELONA; //OS don't support AVX.
switch(exmodel){
case 3:
if(support_avx())
return CORE_STEAMROLLER;
else
return CORE_BARCELONA; //OS don't support AVX.

case 6:
if(support_avx())
return CORE_EXCAVATOR;
else
return CORE_BARCELONA; //OS don't support AVX.
}
break;
}




+ 21
- 7
driver/others/dynamic.c View File

@@ -67,6 +67,7 @@ extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BULLDOZER;
extern gotoblas_t gotoblas_PILEDRIVER;
extern gotoblas_t gotoblas_STEAMROLLER;
extern gotoblas_t gotoblas_EXCAVATOR;
#ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
#else
@@ -79,6 +80,7 @@ extern gotoblas_t gotoblas_HASWELL;
#define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
#define gotoblas_EXCAVATOR gotoblas_BARCELONA
#endif


@@ -307,12 +309,22 @@ static gotoblas_t *get_coretype(void){
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if(model == 0){
//AMD STEAMROLLER
if(support_avx())
return &gotoblas_STEAMROLLER;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
if (exmodel == 3) {
//AMD STEAMROLLER
if(support_avx())
return &gotoblas_STEAMROLLER;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
}else if (exmodel == 6) {
if(support_avx())
return &gotoblas_EXCAVATOR;
else{
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}

}
}

@@ -357,6 +369,7 @@ static char *corename[] = {
"Piledriver",
"Haswell",
"Steamroller",
"Excavator",
};

char *gotoblas_corename(void) {
@@ -382,6 +395,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
if (gotoblas == &gotoblas_HASWELL) return corename[20];
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21];
if (gotoblas == &gotoblas_EXCAVATOR) return corename[22];

return corename[0];
}
@@ -412,7 +426,7 @@ static gotoblas_t *force_coretype(char *coretype){

switch (found)
{
case 22: return (&gotoblas_EXCAVATOR);
case 21: return (&gotoblas_STEAMROLLER);
case 20: return (&gotoblas_HASWELL);
case 19: return (&gotoblas_PILEDRIVER);


+ 16
- 0
getarch.c View File

@@ -448,6 +448,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "STEAMROLLER"
#endif

#if defined (FORCE_EXCAVATOR)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "EXCAVATOR"
#define ARCHCONFIG "-DEXCAVATOR " \
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
"-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
#define LIBNAME "excavator"
#define CORENAME "EXCAVATOR"
#endif


#ifdef FORCE_SSE_GENERIC
#define FORCE


+ 92
- 0
kernel/x86_64/KERNEL.EXCAVATOR View File

@@ -0,0 +1,92 @@
SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
CAXPYKERNEL = caxpy.c
ZAXPYKERNEL = zaxpy.c

SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c
CDOTKERNEL = cdot.c
ZDOTKERNEL = zdot.c


DSYMV_U_KERNEL = dsymv_U.c
DSYMV_L_KERNEL = dsymv_L.c
SSYMV_U_KERNEL = ssymv_U.c
SSYMV_L_KERNEL = ssymv_L.c

SGEMVNKERNEL = sgemv_n_4.c
SGEMVTKERNEL = sgemv_t_4.c

DGEMVNKERNEL = dgemv_n_4.c
DGEMVTKERNEL = dgemv_t_4.c

ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t_4.c

DCOPYKERNEL = dcopy_bulldozer.S


SGEMMKERNEL = sgemm_kernel_16x2_piledriver.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
SGEMMONCOPY = gemm_ncopy_2_bulldozer.S
SGEMMOTCOPY = gemm_tcopy_2_bulldozer.S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
DGEMMONCOPY = gemm_ncopy_2_bulldozer.S
DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

CGEMMKERNEL = cgemm_kernel_4x2_piledriver.S
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_2x2_piledriver.S
ZGEMMINCOPY =
ZGEMMITCOPY =
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPYOBJ =
ZGEMMITCOPYOBJ =
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c


DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S
DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c



+ 92
- 0
param.h View File

@@ -499,6 +499,98 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif


#ifdef EXCAVATOR
#define SNUMOPT 8
#define DNUMOPT 4

#define GEMM_DEFAULT_OFFSET_A 64
#define GEMM_DEFAULT_OFFSET_B 832
#define GEMM_DEFAULT_ALIGN 0x0fffUL



#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1

#ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
#else
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_M 2
#define XGEMM_DEFAULT_UNROLL_M 1
#define CGEMM3M_DEFAULT_UNROLL_N 4
#define CGEMM3M_DEFAULT_UNROLL_M 8
#define ZGEMM3M_DEFAULT_UNROLL_N 4
#define ZGEMM3M_DEFAULT_UNROLL_M 4
#define GEMV_UNROLL 8
#endif

#if defined(ARCH_X86_64)
#define SGEMM_DEFAULT_P 768
#define DGEMM_DEFAULT_P 576
#define ZGEMM_DEFAULT_P 288
#define CGEMM_DEFAULT_P 576
#else
#define SGEMM_DEFAULT_P 448
#define DGEMM_DEFAULT_P 480
#define ZGEMM_DEFAULT_P 112
#define CGEMM_DEFAULT_P 224
#endif
#define QGEMM_DEFAULT_P 112
#define XGEMM_DEFAULT_P 56

#if defined(ARCH_X86_64)
#define SGEMM_DEFAULT_Q 192
#define DGEMM_DEFAULT_Q 160
#define ZGEMM_DEFAULT_Q 160
#define CGEMM_DEFAULT_Q 160
#else
#define SGEMM_DEFAULT_Q 224
#define DGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 224
#define CGEMM_DEFAULT_Q 224
#endif
#define QGEMM_DEFAULT_Q 224
#define XGEMM_DEFAULT_Q 224

#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288

#define SGEMM_DEFAULT_R 12288
#define QGEMM_DEFAULT_R qgemm_r
#define DGEMM_DEFAULT_R 12288
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_R xgemm_r

#define SYMV_P 16
#define HAVE_EXCLUSIVE_CACHE

#define GEMM_THREAD gemm_thread_mn

#endif

#ifdef ATHLON

#define SNUMOPT 4


Loading…
Cancel
Save