| @@ -55,6 +55,7 @@ | |||||
| #define CPUTYPE_POWER6 5 | #define CPUTYPE_POWER6 5 | ||||
| #define CPUTYPE_CELL 6 | #define CPUTYPE_CELL 6 | ||||
| #define CPUTYPE_PPCG4 7 | #define CPUTYPE_PPCG4 7 | ||||
| #define CPUTYPE_POWER8 8 | |||||
| char *cpuname[] = { | char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| @@ -65,6 +66,7 @@ char *cpuname[] = { | |||||
| "POWER6", | "POWER6", | ||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8", | |||||
| }; | }; | ||||
| char *lowercpuname[] = { | char *lowercpuname[] = { | ||||
| @@ -76,6 +78,7 @@ char *lowercpuname[] = { | |||||
| "power6", | "power6", | ||||
| "cell", | "cell", | ||||
| "ppcg4", | "ppcg4", | ||||
| "power8", | |||||
| }; | }; | ||||
| char *corename[] = { | char *corename[] = { | ||||
| @@ -87,6 +90,7 @@ char *corename[] = { | |||||
| "POWER6", | "POWER6", | ||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8", | |||||
| }; | }; | ||||
| int detect(void){ | int detect(void){ | ||||
| @@ -115,7 +119,7 @@ int detect(void){ | |||||
| if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | ||||
| if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6; | |||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | |||||
| if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | ||||
| if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | ||||
| @@ -565,6 +565,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "POWER6" | #define CORENAME "POWER6" | ||||
| #endif | #endif | ||||
| #if defined(FORCE_POWER8) | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "POWER" | |||||
| #define SUBARCHITECTURE "POWER8" | |||||
| #define SUBDIRNAME "power" | |||||
| #define ARCHCONFIG "-DPOWER8 " \ | |||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \ | |||||
| "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||||
| #define LIBNAME "power8" | |||||
| #define CORENAME "POWER8" | |||||
| #endif | |||||
| #ifdef FORCE_PPCG4 | #ifdef FORCE_PPCG4 | ||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "POWER" | #define ARCHITECTURE "POWER" | ||||
| @@ -0,0 +1,56 @@ | |||||
| SGEMMKERNEL = gemm_kernel_power6.S | |||||
| SGEMMINCOPY = | |||||
| SGEMMITCOPY = | |||||
| SGEMMONCOPY = gemm_ncopy_4.S | |||||
| SGEMMOTCOPY = gemm_tcopy_4.S | |||||
| SGEMMINCOPYOBJ = | |||||
| SGEMMITCOPYOBJ = | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = gemm_kernel_power6.S | |||||
| DGEMMINCOPY = | |||||
| DGEMMITCOPY = | |||||
| DGEMMONCOPY = gemm_ncopy_4.S | |||||
| DGEMMOTCOPY = gemm_tcopy_4.S | |||||
| DGEMMINCOPYOBJ = | |||||
| DGEMMITCOPYOBJ = | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = zgemm_kernel_power6.S | |||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_2.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = zgemm_kernel_power6.S | |||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRSMKERNEL_LN = trsm_kernel_power6_LN.S | |||||
| STRSMKERNEL_LT = trsm_kernel_power6_LT.S | |||||
| STRSMKERNEL_RN = trsm_kernel_power6_LT.S | |||||
| STRSMKERNEL_RT = trsm_kernel_power6_RT.S | |||||
| DTRSMKERNEL_LN = trsm_kernel_power6_LN.S | |||||
| DTRSMKERNEL_LT = trsm_kernel_power6_LT.S | |||||
| DTRSMKERNEL_RN = trsm_kernel_power6_LT.S | |||||
| DTRSMKERNEL_RT = trsm_kernel_power6_RT.S | |||||
| CTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S | |||||
| CTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S | |||||
| CTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S | |||||
| CTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S | |||||
| ZTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S | |||||
| ZTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S | |||||
| ZTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S | |||||
| ZTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S | |||||
| @@ -104,6 +104,11 @@ | |||||
| #define PREFETCHWSIZE 72 | #define PREFETCHWSIZE 72 | ||||
| #endif | #endif | ||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE 16 | |||||
| #define PREFETCHWSIZE 72 | |||||
| #endif | |||||
| #ifdef PPCG4 | #ifdef PPCG4 | ||||
| #define PREFETCHSIZE 16 | #define PREFETCHSIZE 16 | ||||
| #define PREFETCHWSIZE 72 | #define PREFETCHWSIZE 72 | ||||
| @@ -108,6 +108,11 @@ | |||||
| #define PREFETCHWSIZE 48 | #define PREFETCHWSIZE 48 | ||||
| #endif | #endif | ||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE 16 | |||||
| #define PREFETCHWSIZE 48 | |||||
| #endif | |||||
| #ifdef PPCG4 | #ifdef PPCG4 | ||||
| #define PREFETCHSIZE 16 | #define PREFETCHSIZE 16 | ||||
| #define PREFETCHWSIZE 48 | #define PREFETCHWSIZE 48 | ||||
| @@ -174,6 +174,11 @@ | |||||
| #define PREFETCHSIZE_C 40 | #define PREFETCHSIZE_C 40 | ||||
| #endif | #endif | ||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 96 | |||||
| #define PREFETCHSIZE_C 40 | |||||
| #endif | |||||
| #ifndef NEEDPARAM | #ifndef NEEDPARAM | ||||
| #ifndef __64BIT__ | #ifndef __64BIT__ | ||||
| @@ -139,6 +139,11 @@ | |||||
| #define PREFETCHSIZE_C 8 | #define PREFETCHSIZE_C 8 | ||||
| #endif | #endif | ||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 96 | |||||
| #define PREFETCHSIZE_C 8 | |||||
| #endif | |||||
| #define y01 f0 | #define y01 f0 | ||||
| #define y02 f1 | #define y02 f1 | ||||
| #define y03 f2 | #define y03 f2 | ||||
| @@ -168,7 +168,11 @@ | |||||
| #define PREFETCHSIZE_A 40 | #define PREFETCHSIZE_A 40 | ||||
| #endif | #endif | ||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) | |||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 40 | |||||
| #endif | |||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8) | |||||
| #define NOP1 | #define NOP1 | ||||
| #define NOP2 | #define NOP2 | ||||
| #else | #else | ||||
| @@ -167,7 +167,11 @@ | |||||
| #define PREFETCHSIZE_A 40 | #define PREFETCHSIZE_A 40 | ||||
| #endif | #endif | ||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) | |||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 40 | |||||
| #endif | |||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8) | |||||
| #define NOP1 | #define NOP1 | ||||
| #define NOP2 | #define NOP2 | ||||
| #else | #else | ||||
| @@ -170,6 +170,11 @@ | |||||
| #define PREFETCHSIZE_C 24 | #define PREFETCHSIZE_C 24 | ||||
| #endif | #endif | ||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 24 | |||||
| #define PREFETCHSIZE_C 24 | |||||
| #endif | |||||
| #ifndef XCONJ | #ifndef XCONJ | ||||
| #define FMADDR FMADD | #define FMADDR FMADD | ||||
| #define FMSUBR FNMSUB | #define FMSUBR FNMSUB | ||||
| @@ -144,6 +144,11 @@ | |||||
| #define PREFETCHSIZE_C 8 | #define PREFETCHSIZE_C 8 | ||||
| #endif | #endif | ||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 24 | |||||
| #define PREFETCHSIZE_C 8 | |||||
| #endif | |||||
| #if !(defined(CONJ) && defined(XCONJ)) | #if !(defined(CONJ) && defined(XCONJ)) | ||||
| #define FMADDR FMADD | #define FMADDR FMADD | ||||
| #define FMSUBR FNMSUB | #define FMSUBR FNMSUB | ||||
| @@ -169,7 +169,11 @@ | |||||
| #define PREFETCHSIZE_A 112 | #define PREFETCHSIZE_A 112 | ||||
| #endif | #endif | ||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) | |||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 112 | |||||
| #endif | |||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8) | |||||
| #define NOP1 | #define NOP1 | ||||
| #define NOP2 | #define NOP2 | ||||
| #else | #else | ||||
| @@ -166,7 +166,11 @@ | |||||
| #define PREFETCHSIZE_A 112 | #define PREFETCHSIZE_A 112 | ||||
| #endif | #endif | ||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) | |||||
| #ifdef POWER8 | |||||
| #define PREFETCHSIZE_A 112 | |||||
| #endif | |||||
| #if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8) | |||||
| #define NOP1 | #define NOP1 | ||||
| #define NOP2 | #define NOP2 | ||||
| #else | #else | ||||
| @@ -1959,6 +1959,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(POWER8) | |||||
| #define SNUMOPT 4 | |||||
| #define DNUMOPT 4 | |||||
| #define GEMM_DEFAULT_OFFSET_A 384 | |||||
| #define GEMM_DEFAULT_OFFSET_B 1024 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
| #define SGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 4 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define SGEMM_DEFAULT_P 992 | |||||
| #define DGEMM_DEFAULT_P 480 | |||||
| #define CGEMM_DEFAULT_P 488 | |||||
| #define ZGEMM_DEFAULT_P 248 | |||||
| #define SGEMM_DEFAULT_Q 504 | |||||
| #define DGEMM_DEFAULT_Q 504 | |||||
| #define CGEMM_DEFAULT_Q 400 | |||||
| #define ZGEMM_DEFAULT_Q 400 | |||||
| #define SYMV_P 8 | |||||
| #endif | |||||
| #if defined(SPARC) && defined(V7) | #if defined(SPARC) && defined(V7) | ||||
| #define SNUMOPT 4 | #define SNUMOPT 4 | ||||