This is the initial patch to support build infrastructure for POWER10 architecture.tags/v0.3.11^2
| @@ -9,6 +9,16 @@ else | |||||
| USE_OPENMP = 1 | USE_OPENMP = 1 | ||||
| endif | endif | ||||
| ifeq ($(CORE), POWER10) | |||||
| ifeq ($(USE_OPENMP), 1) | |||||
| COMMON_OPT += -Ofast -mcpu=future -mtune=future -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | |||||
| FCOMMON_OPT += -O2 -frecursive -mcpu=future -mtune=future -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | |||||
| else | |||||
| COMMON_OPT += -Ofast -mcpu=future -mtune=future -mvsx -malign-power -fno-fast-math | |||||
| FCOMMON_OPT += -O2 -frecursive -mcpu=future -mtune=future -malign-power -fno-fast-math | |||||
| endif | |||||
| endif | |||||
| ifeq ($(CORE), POWER9) | ifeq ($(CORE), POWER9) | ||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | ||||
| @@ -595,6 +595,7 @@ DYNAMIC_CORE = POWER6 | |||||
| DYNAMIC_CORE += POWER8 | DYNAMIC_CORE += POWER8 | ||||
| ifneq ($(C_COMPILER), GCC) | ifneq ($(C_COMPILER), GCC) | ||||
| DYNAMIC_CORE += POWER9 | DYNAMIC_CORE += POWER9 | ||||
| DYNAMIC_CORE += POWER10 | |||||
| endif | endif | ||||
| ifeq ($(C_COMPILER), GCC) | ifeq ($(C_COMPILER), GCC) | ||||
| GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) | GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) | ||||
| @@ -603,6 +604,12 @@ DYNAMIC_CORE += POWER9 | |||||
| else | else | ||||
| $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) | $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) | ||||
| endif | endif | ||||
| GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11) | |||||
| ifeq ($(GCCVERSIONGTEQ11), 1) | |||||
| DYNAMIC_CORE += POWER10 | |||||
| else | |||||
| $(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.) | |||||
| endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -49,6 +49,7 @@ POWER6 | |||||
| POWER7 | POWER7 | ||||
| POWER8 | POWER8 | ||||
| POWER9 | POWER9 | ||||
| POWER10 | |||||
| PPCG4 | PPCG4 | ||||
| PPC970 | PPC970 | ||||
| PPC970MP | PPC970MP | ||||
| @@ -49,7 +49,7 @@ if (DYNAMIC_ARCH) | |||||
| endif () | endif () | ||||
| if (POWER) | if (POWER) | ||||
| set(DYNAMIC_CORE POWER6 POWER8 POWER9) | |||||
| set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10) | |||||
| endif () | endif () | ||||
| if (X86) | if (X86) | ||||
| @@ -420,7 +420,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||||
| set(ZGEMM_UNROLL_M 8) | set(ZGEMM_UNROLL_M 8) | ||||
| set(ZGEMM_UNROLL_N 2) | set(ZGEMM_UNROLL_N 2) | ||||
| set(SYMV_P 8) | set(SYMV_P 8) | ||||
| elseif ("${TCORE}" STREQUAL "POWER9") | |||||
| elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | file(APPEND ${TARGET_CONF_TEMP} | ||||
| "#define L1_DATA_SIZE 32768\n" | "#define L1_DATA_SIZE 32768\n" | ||||
| "#define L1_DATA_LINESIZE 128\n" | "#define L1_DATA_LINESIZE 128\n" | ||||
| @@ -360,13 +360,8 @@ typedef int blasint; | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifdef POWER8 | |||||
| #ifndef YIELDING | |||||
| #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | |||||
| #endif | |||||
| #endif | |||||
| #ifdef POWER9 | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #ifndef YIELDING | #ifndef YIELDING | ||||
| #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | ||||
| #endif | #endif | ||||
| @@ -68,7 +68,7 @@ | |||||
| #endif | #endif | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #define MB __asm__ __volatile__ ("eieio":::"memory") | #define MB __asm__ __volatile__ ("eieio":::"memory") | ||||
| #define WMB __asm__ __volatile__ ("eieio":::"memory") | #define WMB __asm__ __volatile__ ("eieio":::"memory") | ||||
| #define RMB __asm__ __volatile__ ("eieio":::"memory") | #define RMB __asm__ __volatile__ ("eieio":::"memory") | ||||
| @@ -272,7 +272,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| #define HAVE_PREFETCH | #define HAVE_PREFETCH | ||||
| #endif | #endif | ||||
| #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || defined(PPC970) | |||||
| #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || defined(POWER10) || defined(PPC970) | |||||
| #define DCBT_ARG 0 | #define DCBT_ARG 0 | ||||
| #else | #else | ||||
| #define DCBT_ARG 8 | #define DCBT_ARG 8 | ||||
| @@ -294,7 +294,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| #define L1_PREFETCH dcbtst | #define L1_PREFETCH dcbtst | ||||
| #endif | #endif | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #define L1_DUALFETCH | #define L1_DUALFETCH | ||||
| #define L1_PREFETCHSIZE (16 + 128 * 100) | #define L1_PREFETCHSIZE (16 + 128 * 100) | ||||
| #define L1_PREFETCH dcbtst | #define L1_PREFETCH dcbtst | ||||
| @@ -843,7 +843,7 @@ Lmcount$lazy_ptr: | |||||
| #define BUFFER_SIZE ( 2 << 20) | #define BUFFER_SIZE ( 2 << 20) | ||||
| #elif defined(PPC440FP2) | #elif defined(PPC440FP2) | ||||
| #define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
| #elif defined(POWER8) || defined(POWER9) | |||||
| #elif defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #define BUFFER_SIZE ( 64 << 20) | #define BUFFER_SIZE ( 64 << 20) | ||||
| #else | #else | ||||
| #define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
| @@ -57,6 +57,7 @@ | |||||
| #define CPUTYPE_PPCG4 7 | #define CPUTYPE_PPCG4 7 | ||||
| #define CPUTYPE_POWER8 8 | #define CPUTYPE_POWER8 8 | ||||
| #define CPUTYPE_POWER9 9 | #define CPUTYPE_POWER9 9 | ||||
| #define CPUTYPE_POWER10 10 | |||||
| char *cpuname[] = { | char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| @@ -68,7 +69,8 @@ char *cpuname[] = { | |||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8", | "POWER8", | ||||
| "POWER9" | |||||
| "POWER9", | |||||
| "POWER10" | |||||
| }; | }; | ||||
| char *lowercpuname[] = { | char *lowercpuname[] = { | ||||
| @@ -81,7 +83,8 @@ char *lowercpuname[] = { | |||||
| "cell", | "cell", | ||||
| "ppcg4", | "ppcg4", | ||||
| "power8", | "power8", | ||||
| "power9" | |||||
| "power9", | |||||
| "power10" | |||||
| }; | }; | ||||
| char *corename[] = { | char *corename[] = { | ||||
| @@ -94,7 +97,8 @@ char *corename[] = { | |||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8", | "POWER8", | ||||
| "POWER9" | |||||
| "POWER9", | |||||
| "POWER10" | |||||
| }; | }; | ||||
| int detect(void){ | int detect(void){ | ||||
| @@ -125,6 +129,7 @@ int detect(void){ | |||||
| if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | ||||
| if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; | if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; | ||||
| if (!strncasecmp(p, "POWER10", 7)) return CPUTYPE_POWER10; | |||||
| if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | ||||
| if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | ||||
| @@ -157,6 +162,7 @@ int detect(void){ | |||||
| if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | ||||
| if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; | if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; | ||||
| if (!strncasecmp(p, "POWER10", 7)) return CPUTYPE_POWER10; | |||||
| if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | ||||
| if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | ||||
| return CPUTYPE_POWER5; | return CPUTYPE_POWER5; | ||||
| @@ -179,6 +185,9 @@ int detect(void){ | |||||
| int id; | int id; | ||||
| __asm __volatile("mfpvr %0" : "=r"(id)); | __asm __volatile("mfpvr %0" : "=r"(id)); | ||||
| switch ( id >> 16 ) { | switch ( id >> 16 ) { | ||||
| case 0x80: // POWER10 | |||||
| return CPUTYPE_POWER10; | |||||
| break; | |||||
| case 0x4e: // POWER9 | case 0x4e: // POWER9 | ||||
| return CPUTYPE_POWER9; | return CPUTYPE_POWER9; | ||||
| break; | break; | ||||
| @@ -6,6 +6,9 @@ extern gotoblas_t gotoblas_POWER8; | |||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | #if (!defined __GNUC__) || ( __GNUC__ >= 6) | ||||
| extern gotoblas_t gotoblas_POWER9; | extern gotoblas_t gotoblas_POWER9; | ||||
| #endif | #endif | ||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 11) | |||||
| extern gotoblas_t gotoblas_POWER10; | |||||
| #endif | |||||
| extern void openblas_warning(int verbose, const char *msg); | extern void openblas_warning(int verbose, const char *msg); | ||||
| @@ -13,7 +16,8 @@ static char *corename[] = { | |||||
| "unknown", | "unknown", | ||||
| "POWER6", | "POWER6", | ||||
| "POWER8", | "POWER8", | ||||
| "POWER9" | |||||
| "POWER9", | |||||
| "POWER10" | |||||
| }; | }; | ||||
| #define NUM_CORETYPES 4 | #define NUM_CORETYPES 4 | ||||
| @@ -23,6 +27,9 @@ char *gotoblas_corename(void) { | |||||
| if (gotoblas == &gotoblas_POWER8) return corename[2]; | if (gotoblas == &gotoblas_POWER8) return corename[2]; | ||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | #if (!defined __GNUC__) || ( __GNUC__ >= 6) | ||||
| if (gotoblas == &gotoblas_POWER9) return corename[3]; | if (gotoblas == &gotoblas_POWER9) return corename[3]; | ||||
| #endif | |||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 11) | |||||
| if (gotoblas == &gotoblas_POWER10) return corename[4]; | |||||
| #endif | #endif | ||||
| return corename[0]; | return corename[0]; | ||||
| } | } | ||||
| @@ -36,6 +43,10 @@ static gotoblas_t *get_coretype(void) { | |||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | #if (!defined __GNUC__) || ( __GNUC__ >= 6) | ||||
| if (__builtin_cpu_is("power9")) | if (__builtin_cpu_is("power9")) | ||||
| return &gotoblas_POWER9; | return &gotoblas_POWER9; | ||||
| #endif | |||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 11) | |||||
| if (__builtin_cpu_is("isa_3_1") && __builtin_cpu_supports ("mma")) | |||||
| return &gotoblas_POWER10; | |||||
| #endif | #endif | ||||
| return NULL; | return NULL; | ||||
| } | } | ||||
| @@ -61,6 +72,9 @@ static gotoblas_t *force_coretype(char * coretype) { | |||||
| case 2: return (&gotoblas_POWER8); | case 2: return (&gotoblas_POWER8); | ||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 6) | #if (!defined __GNUC__) || ( __GNUC__ >= 6) | ||||
| case 3: return (&gotoblas_POWER9); | case 3: return (&gotoblas_POWER9); | ||||
| #endif | |||||
| #if (!defined __GNUC__) || ( __GNUC__ >= 11) | |||||
| case 4: return (&gotoblas_POWER10); | |||||
| #endif | #endif | ||||
| default: return NULL; | default: return NULL; | ||||
| } | } | ||||
| @@ -650,6 +650,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define CORENAME "POWER9" | #define CORENAME "POWER9" | ||||
| #endif | #endif | ||||
| #if defined(FORCE_POWER10) | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "POWER" | |||||
| #define SUBARCHITECTURE "POWER10" | |||||
| #define SUBDIRNAME "power" | |||||
| #define ARCHCONFIG "-DPOWER10 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ | |||||
| "-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||||
| #define LIBNAME "power10" | |||||
| #define CORENAME "POWER10" | |||||
| #endif | |||||
| #ifdef FORCE_PPCG4 | #ifdef FORCE_PPCG4 | ||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "POWER" | #define ARCHITECTURE "POWER" | ||||
| @@ -130,7 +130,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
| if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) ) | if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) ) | ||||
| set(USE_TRMM true) | set(USE_TRMM true) | ||||
| endif () | endif () | ||||
| if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9)) | |||||
| if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9) OR (TARGET_CORE MATCHES POWER10)) | |||||
| set(USE_TRMM true) | set(USE_TRMM true) | ||||
| endif () | endif () | ||||
| @@ -51,6 +51,10 @@ ifeq ($(CORE), POWER9) | |||||
| USE_TRMM = 1 | USE_TRMM = 1 | ||||
| endif | endif | ||||
| ifeq ($(CORE), POWER10) | |||||
| USE_TRMM = 1 | |||||
| endif | |||||
| ifeq ($(ARCH), zarch) | ifeq ($(ARCH), zarch) | ||||
| USE_TRMM = 1 | USE_TRMM = 1 | ||||
| endif | endif | ||||
| @@ -0,0 +1,214 @@ | |||||
| ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) | |||||
| include $(KERNELDIR)/KERNEL.POWER8 | |||||
| else | |||||
| #SGEMM_BETA = ../generic/gemm_beta.c | |||||
| #DGEMM_BETA = ../generic/gemm_beta.c | |||||
| #CGEMM_BETA = ../generic/zgemm_beta.c | |||||
| #ZGEMM_BETA = ../generic/zgemm_beta.c | |||||
| STRMMKERNEL = sgemm_kernel_power9.S | |||||
| DTRMMKERNEL = dgemm_kernel_power9.S | |||||
| CTRMMKERNEL = cgemm_kernel_power9.S | |||||
| ZTRMMKERNEL = zgemm_kernel_power9.S | |||||
| SGEMMKERNEL = sgemm_kernel_power9.S | |||||
| SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||||
| SGEMMITCOPY = sgemm_tcopy_16_power8.S | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||||
| SGEMMOTCOPY = sgemm_tcopy_8_power8.S | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = dgemm_kernel_power9.S | |||||
| DGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||||
| DGEMMITCOPY = dgemm_tcopy_16_power8.S | |||||
| DGEMMONCOPY = dgemm_ncopy_4_power8.S | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = cgemm_kernel_power9.S | |||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_8.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = zgemm_kernel_power9.S | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||||
| ZGEMMITCOPY = zgemm_tcopy_8_power8.S | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| #Todo: CGEMM3MKERNEL should be 4x4 blocksizes. | |||||
| #CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S | |||||
| #ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S | |||||
| #Pure C for other kernels | |||||
| #SAMAXKERNEL = ../arm/amax.c | |||||
| #DAMAXKERNEL = ../arm/amax.c | |||||
| #CAMAXKERNEL = ../arm/zamax.c | |||||
| #ZAMAXKERNEL = ../arm/zamax.c | |||||
| # | |||||
| #SAMINKERNEL = ../arm/amin.c | |||||
| #DAMINKERNEL = ../arm/amin.c | |||||
| #CAMINKERNEL = ../arm/zamin.c | |||||
| #ZAMINKERNEL = ../arm/zamin.c | |||||
| # | |||||
| #SMAXKERNEL = ../arm/max.c | |||||
| #DMAXKERNEL = ../arm/max.c | |||||
| # | |||||
| #SMINKERNEL = ../arm/min.c | |||||
| #DMINKERNEL = ../arm/min.c | |||||
| # | |||||
| ifneq ($(GCCVERSIONGTEQ9),1) | |||||
| ISAMAXKERNEL = isamax_power9.S | |||||
| else | |||||
| ISAMAXKERNEL = isamax.c | |||||
| endif | |||||
| IDAMAXKERNEL = idamax.c | |||||
| ifneq ($(GCCVERSIONGTEQ9),1) | |||||
| ICAMAXKERNEL = icamax_power9.S | |||||
| else | |||||
| ICAMAXKERNEL = icamax.c | |||||
| endif | |||||
| IZAMAXKERNEL = izamax.c | |||||
| # | |||||
| ifneq ($(GCCVERSIONGTEQ9),1) | |||||
| ISAMINKERNEL = isamin_power9.S | |||||
| else | |||||
| ISAMINKERNEL = isamin.c | |||||
| endif | |||||
| IDAMINKERNEL = idamin.c | |||||
| ifneq ($(GCCVERSIONGTEQ9),1) | |||||
| ICAMINKERNEL = icamin_power9.S | |||||
| else | |||||
| ICAMINKERNEL = icamin.c | |||||
| endif | |||||
| IZAMINKERNEL = izamin.c | |||||
| # | |||||
| #ISMAXKERNEL = ../arm/imax.c | |||||
| #IDMAXKERNEL = ../arm/imax.c | |||||
| # | |||||
| #ISMINKERNEL = ../arm/imin.c | |||||
| #IDMINKERNEL = ../arm/imin.c | |||||
| # | |||||
| SASUMKERNEL = sasum.c | |||||
| DASUMKERNEL = dasum.c | |||||
| CASUMKERNEL = casum.c | |||||
| ZASUMKERNEL = zasum.c | |||||
| # | |||||
| SAXPYKERNEL = saxpy.c | |||||
| DAXPYKERNEL = daxpy.c | |||||
| ifneq ($(GCCVERSIONGTEQ9),1) | |||||
| CAXPYKERNEL = caxpy_power9.S | |||||
| else | |||||
| CAXPYKERNEL = caxpy.c | |||||
| endif | |||||
| ZAXPYKERNEL = zaxpy.c | |||||
| # | |||||
| SCOPYKERNEL = scopy.c | |||||
| DCOPYKERNEL = dcopy.c | |||||
| CCOPYKERNEL = ccopy.c | |||||
| ZCOPYKERNEL = zcopy.c | |||||
| # | |||||
| SDOTKERNEL = sdot.c | |||||
| DDOTKERNEL = ddot.c | |||||
| DSDOTKERNEL = sdot.c | |||||
| ifneq ($(GCCVERSIONGTEQ9),1) | |||||
| CDOTKERNEL = cdot_power9.S | |||||
| else | |||||
| CDOTKERNEL = cdot.c | |||||
| endif | |||||
| ZDOTKERNEL = zdot.c | |||||
| # | |||||
| SNRM2KERNEL = ../arm/nrm2.c | |||||
| DNRM2KERNEL = ../arm/nrm2.c | |||||
| CNRM2KERNEL = ../arm/znrm2.c | |||||
| ZNRM2KERNEL = ../arm/znrm2.c | |||||
| # | |||||
| SROTKERNEL = srot.c | |||||
| DROTKERNEL = drot.c | |||||
| CROTKERNEL = crot.c | |||||
| ZROTKERNEL = zrot.c | |||||
| # | |||||
| SSCALKERNEL = sscal.c | |||||
| DSCALKERNEL = dscal.c | |||||
| CSCALKERNEL = zscal.c | |||||
| ZSCALKERNEL = zscal.c | |||||
| # | |||||
| SSWAPKERNEL = sswap.c | |||||
| DSWAPKERNEL = dswap.c | |||||
| CSWAPKERNEL = cswap.c | |||||
| ZSWAPKERNEL = zswap.c | |||||
| # | |||||
| SGEMVNKERNEL = sgemv_n.c | |||||
| DGEMVNKERNEL = dgemv_n.c | |||||
| CGEMVNKERNEL = cgemv_n.c | |||||
| ZGEMVNKERNEL = zgemv_n_4.c | |||||
| # | |||||
| SGEMVTKERNEL = sgemv_t.c | |||||
| DGEMVTKERNEL = dgemv_t.c | |||||
| CGEMVTKERNEL = cgemv_t.c | |||||
| ZGEMVTKERNEL = zgemv_t_4.c | |||||
| #SSYMV_U_KERNEL = ../generic/symv_k.c | |||||
| #SSYMV_L_KERNEL = ../generic/symv_k.c | |||||
| #DSYMV_U_KERNEL = ../generic/symv_k.c | |||||
| #DSYMV_L_KERNEL = ../generic/symv_k.c | |||||
| #QSYMV_U_KERNEL = ../generic/symv_k.c | |||||
| #QSYMV_L_KERNEL = ../generic/symv_k.c | |||||
| #CSYMV_U_KERNEL = ../generic/zsymv_k.c | |||||
| #CSYMV_L_KERNEL = ../generic/zsymv_k.c | |||||
| #ZSYMV_U_KERNEL = ../generic/zsymv_k.c | |||||
| #ZSYMV_L_KERNEL = ../generic/zsymv_k.c | |||||
| #XSYMV_U_KERNEL = ../generic/zsymv_k.c | |||||
| #XSYMV_L_KERNEL = ../generic/zsymv_k.c | |||||
| #ZHEMV_U_KERNEL = ../generic/zhemv_k.c | |||||
| #ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||||
| LSAME_KERNEL = ../generic/lsame.c | |||||
| SCABS_KERNEL = ../generic/cabs.c | |||||
| DCABS_KERNEL = ../generic/cabs.c | |||||
| QCABS_KERNEL = ../generic/cabs.c | |||||
| #Dump kernel | |||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | |||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | |||||
| endif | |||||
| @@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "casum_microk_power8.c" | #include "casum_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "ccopy_microk_power8.c" | #include "ccopy_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| static void crot_kernel_8 (long n, float *x, float *y, float c, float s) | static void crot_kernel_8 (long n, float *x, float *y, float c, float s) | ||||
| { | { | ||||
| @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "cswap_microk_power8.c" | #include "cswap_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "dasum_microk_power8.c" | #include "dasum_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "daxpy_microk_power8.c" | #include "daxpy_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "dcopy_microk_power8.c" | #include "dcopy_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "ddot_microk_power8.c" | #include "ddot_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "dgemv_n_microk_power8.c" | #include "dgemv_n_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #pragma GCC optimize "O1" | #pragma GCC optimize "O1" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "drot_microk_power8.c" | #include "drot_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "dscal_microk_power8.c" | #include "dscal_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "dswap_microk_power8.c" | #include "dswap_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "sasum_microk_power8.c" | #include "sasum_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "scopy_microk_power8.c" | #include "scopy_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "sdot_microk_power8.c" | #include "sdot_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #pragma GCC optimize "O1" | #pragma GCC optimize "O1" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "srot_microk_power8.c" | #include "srot_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "sscal_microk_power8.c" | #include "sscal_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "sswap_microk_power8.c" | #include "sswap_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "zasum_microk_power8.c" | #include "zasum_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "zaxpy_microk_power8.c" | #include "zaxpy_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "zcopy_microk_power8.c" | #include "zcopy_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "zdot_microk_power8.c" | #include "zdot_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #pragma GCC optimize "O1" | #pragma GCC optimize "O1" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #if defined(DOUBLE) | #if defined(DOUBLE) | ||||
| #include "zscal_microk_power8.c" | #include "zscal_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(POWER8) || defined(POWER9) | |||||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||||
| #include "zswap_microk_power8.c" | #include "zswap_microk_power8.c" | ||||
| #endif | #endif | ||||
| @@ -2260,7 +2260,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(POWER9) | |||||
| #if defined(POWER9) || defined(POWER10) | |||||
| #define SNUMOPT 16 | #define SNUMOPT 16 | ||||
| #define DNUMOPT 8 | #define DNUMOPT 8 | ||||