Browse Source

powerpc: Add support for future processor

This is the initial patch to support build infrastructure
for POWER10 architecture.
tags/v0.3.11^2
Rajalakshmi Srinivasaraghavan 5 years ago
parent
commit
9fe930f205
38 changed files with 309 additions and 42 deletions
  1. +10
    -0
      Makefile.power
  2. +7
    -0
      Makefile.system
  3. +1
    -0
      TargetList.txt
  4. +1
    -1
      cmake/arch.cmake
  5. +1
    -1
      cmake/prebuild.cmake
  6. +1
    -6
      common.h
  7. +4
    -4
      common_power.h
  8. +12
    -3
      cpuid_power.c
  9. +15
    -1
      driver/others/dynamic_power.c
  10. +13
    -0
      getarch.c
  11. +1
    -1
      kernel/CMakeLists.txt
  12. +4
    -0
      kernel/Makefile.L3
  13. +214
    -0
      kernel/power/KERNEL.POWER10
  14. +1
    -1
      kernel/power/casum.c
  15. +1
    -1
      kernel/power/ccopy.c
  16. +1
    -1
      kernel/power/crot.c
  17. +1
    -1
      kernel/power/cswap.c
  18. +1
    -1
      kernel/power/dasum.c
  19. +1
    -1
      kernel/power/daxpy.c
  20. +1
    -1
      kernel/power/dcopy.c
  21. +1
    -1
      kernel/power/ddot.c
  22. +1
    -1
      kernel/power/dgemv_n.c
  23. +1
    -1
      kernel/power/drot.c
  24. +1
    -1
      kernel/power/dscal.c
  25. +1
    -1
      kernel/power/dswap.c
  26. +1
    -1
      kernel/power/sasum.c
  27. +1
    -1
      kernel/power/scopy.c
  28. +1
    -1
      kernel/power/sdot.c
  29. +1
    -1
      kernel/power/srot.c
  30. +1
    -1
      kernel/power/sscal.c
  31. +1
    -1
      kernel/power/sswap.c
  32. +1
    -1
      kernel/power/zasum.c
  33. +1
    -1
      kernel/power/zaxpy.c
  34. +1
    -1
      kernel/power/zcopy.c
  35. +1
    -1
      kernel/power/zdot.c
  36. +1
    -1
      kernel/power/zscal.c
  37. +1
    -1
      kernel/power/zswap.c
  38. +1
    -1
      param.h

+ 10
- 0
Makefile.power View File

@@ -9,6 +9,16 @@ else
USE_OPENMP = 1 USE_OPENMP = 1
endif endif


ifeq ($(CORE), POWER10)
ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=future -mtune=future -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=future -mtune=future -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
else
COMMON_OPT += -Ofast -mcpu=future -mtune=future -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=future -mtune=future -malign-power -fno-fast-math
endif
endif

ifeq ($(CORE), POWER9) ifeq ($(CORE), POWER9)
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp


+ 7
- 0
Makefile.system View File

@@ -595,6 +595,7 @@ DYNAMIC_CORE = POWER6
DYNAMIC_CORE += POWER8 DYNAMIC_CORE += POWER8
ifneq ($(C_COMPILER), GCC) ifneq ($(C_COMPILER), GCC)
DYNAMIC_CORE += POWER9 DYNAMIC_CORE += POWER9
DYNAMIC_CORE += POWER10
endif endif
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5) GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
@@ -603,6 +604,12 @@ DYNAMIC_CORE += POWER9
else else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif endif
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
ifeq ($(GCCVERSIONGTEQ11), 1)
DYNAMIC_CORE += POWER10
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
endif
endif endif
endif endif




+ 1
- 0
TargetList.txt View File

@@ -49,6 +49,7 @@ POWER6
POWER7 POWER7
POWER8 POWER8
POWER9 POWER9
POWER10
PPCG4 PPCG4
PPC970 PPC970
PPC970MP PPC970MP


+ 1
- 1
cmake/arch.cmake View File

@@ -49,7 +49,7 @@ if (DYNAMIC_ARCH)
endif () endif ()
if (POWER) if (POWER)
set(DYNAMIC_CORE POWER6 POWER8 POWER9)
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
endif () endif ()
if (X86) if (X86)


+ 1
- 1
cmake/prebuild.cmake View File

@@ -420,7 +420,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 8) set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 2) set(ZGEMM_UNROLL_N 2)
set(SYMV_P 8) set(SYMV_P 8)
elseif ("${TCORE}" STREQUAL "POWER9")
elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10")
file(APPEND ${TARGET_CONF_TEMP} file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n" "#define L1_DATA_SIZE 32768\n"
"#define L1_DATA_LINESIZE 128\n" "#define L1_DATA_LINESIZE 128\n"


+ 1
- 6
common.h View File

@@ -360,13 +360,8 @@ typedef int blasint;
#endif #endif
#endif #endif


#ifdef POWER8
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif


#ifdef POWER9
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#ifndef YIELDING #ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif #endif


+ 4
- 4
common_power.h View File

@@ -68,7 +68,7 @@
#endif #endif




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#define MB __asm__ __volatile__ ("eieio":::"memory") #define MB __asm__ __volatile__ ("eieio":::"memory")
#define WMB __asm__ __volatile__ ("eieio":::"memory") #define WMB __asm__ __volatile__ ("eieio":::"memory")
#define RMB __asm__ __volatile__ ("eieio":::"memory") #define RMB __asm__ __volatile__ ("eieio":::"memory")
@@ -272,7 +272,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define HAVE_PREFETCH #define HAVE_PREFETCH
#endif #endif


#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || defined(PPC970)
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || defined(POWER10) || defined(PPC970)
#define DCBT_ARG 0 #define DCBT_ARG 0
#else #else
#define DCBT_ARG 8 #define DCBT_ARG 8
@@ -294,7 +294,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define L1_PREFETCH dcbtst #define L1_PREFETCH dcbtst
#endif #endif


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#define L1_DUALFETCH #define L1_DUALFETCH
#define L1_PREFETCHSIZE (16 + 128 * 100) #define L1_PREFETCHSIZE (16 + 128 * 100)
#define L1_PREFETCH dcbtst #define L1_PREFETCH dcbtst
@@ -843,7 +843,7 @@ Lmcount$lazy_ptr:
#define BUFFER_SIZE ( 2 << 20) #define BUFFER_SIZE ( 2 << 20)
#elif defined(PPC440FP2) #elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8) || defined(POWER9)
#elif defined(POWER8) || defined(POWER9) || defined(POWER10)
#define BUFFER_SIZE ( 64 << 20) #define BUFFER_SIZE ( 64 << 20)
#else #else
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)


+ 12
- 3
cpuid_power.c View File

@@ -57,6 +57,7 @@
#define CPUTYPE_PPCG4 7 #define CPUTYPE_PPCG4 7
#define CPUTYPE_POWER8 8 #define CPUTYPE_POWER8 8
#define CPUTYPE_POWER9 9 #define CPUTYPE_POWER9 9
#define CPUTYPE_POWER10 10


char *cpuname[] = { char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
@@ -68,7 +69,8 @@ char *cpuname[] = {
"CELL", "CELL",
"PPCG4", "PPCG4",
"POWER8", "POWER8",
"POWER9"
"POWER9",
"POWER10"
}; };


char *lowercpuname[] = { char *lowercpuname[] = {
@@ -81,7 +83,8 @@ char *lowercpuname[] = {
"cell", "cell",
"ppcg4", "ppcg4",
"power8", "power8",
"power9"
"power9",
"power10"
}; };


char *corename[] = { char *corename[] = {
@@ -94,7 +97,8 @@ char *corename[] = {
"CELL", "CELL",
"PPCG4", "PPCG4",
"POWER8", "POWER8",
"POWER9"
"POWER9",
"POWER10"
}; };


int detect(void){ int detect(void){
@@ -125,6 +129,7 @@ int detect(void){
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
if (!strncasecmp(p, "POWER10", 7)) return CPUTYPE_POWER10;
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;


@@ -157,6 +162,7 @@ int detect(void){
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
if (!strncasecmp(p, "POWER10", 7)) return CPUTYPE_POWER10;
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
return CPUTYPE_POWER5; return CPUTYPE_POWER5;
@@ -179,6 +185,9 @@ int detect(void){
int id; int id;
__asm __volatile("mfpvr %0" : "=r"(id)); __asm __volatile("mfpvr %0" : "=r"(id));
switch ( id >> 16 ) { switch ( id >> 16 ) {
case 0x80: // POWER10
return CPUTYPE_POWER10;
break;
case 0x4e: // POWER9 case 0x4e: // POWER9
return CPUTYPE_POWER9; return CPUTYPE_POWER9;
break; break;


+ 15
- 1
driver/others/dynamic_power.c View File

@@ -6,6 +6,9 @@ extern gotoblas_t gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if (!defined __GNUC__) || ( __GNUC__ >= 6)
extern gotoblas_t gotoblas_POWER9; extern gotoblas_t gotoblas_POWER9;
#endif #endif
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
extern gotoblas_t gotoblas_POWER10;
#endif


extern void openblas_warning(int verbose, const char *msg); extern void openblas_warning(int verbose, const char *msg);


@@ -13,7 +16,8 @@ static char *corename[] = {
"unknown", "unknown",
"POWER6", "POWER6",
"POWER8", "POWER8",
"POWER9"
"POWER9",
"POWER10"
}; };


#define NUM_CORETYPES 4 #define NUM_CORETYPES 4
@@ -23,6 +27,9 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_POWER8) return corename[2]; if (gotoblas == &gotoblas_POWER8) return corename[2];
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if (!defined __GNUC__) || ( __GNUC__ >= 6)
if (gotoblas == &gotoblas_POWER9) return corename[3]; if (gotoblas == &gotoblas_POWER9) return corename[3];
#endif
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
if (gotoblas == &gotoblas_POWER10) return corename[4];
#endif #endif
return corename[0]; return corename[0];
} }
@@ -36,6 +43,10 @@ static gotoblas_t *get_coretype(void) {
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if (!defined __GNUC__) || ( __GNUC__ >= 6)
if (__builtin_cpu_is("power9")) if (__builtin_cpu_is("power9"))
return &gotoblas_POWER9; return &gotoblas_POWER9;
#endif
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
if (__builtin_cpu_is("isa_3_1") && __builtin_cpu_supports ("mma"))
return &gotoblas_POWER10;
#endif #endif
return NULL; return NULL;
} }
@@ -61,6 +72,9 @@ static gotoblas_t *force_coretype(char * coretype) {
case 2: return (&gotoblas_POWER8); case 2: return (&gotoblas_POWER8);
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if (!defined __GNUC__) || ( __GNUC__ >= 6)
case 3: return (&gotoblas_POWER9); case 3: return (&gotoblas_POWER9);
#endif
#if (!defined __GNUC__) || ( __GNUC__ >= 11)
case 4: return (&gotoblas_POWER10);
#endif #endif
default: return NULL; default: return NULL;
} }


+ 13
- 0
getarch.c View File

@@ -650,6 +650,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "POWER9" #define CORENAME "POWER9"
#endif #endif


#if defined(FORCE_POWER10)
#define FORCE
#define ARCHITECTURE "POWER"
#define SUBARCHITECTURE "POWER10"
#define SUBDIRNAME "power"
#define ARCHCONFIG "-DPOWER10 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
#define LIBNAME "power10"
#define CORENAME "POWER10"
#endif

#ifdef FORCE_PPCG4 #ifdef FORCE_PPCG4
#define FORCE #define FORCE
#define ARCHITECTURE "POWER" #define ARCHITECTURE "POWER"


+ 1
- 1
kernel/CMakeLists.txt View File

@@ -130,7 +130,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) ) if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) )
set(USE_TRMM true) set(USE_TRMM true)
endif () endif ()
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9))
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9) OR (TARGET_CORE MATCHES POWER10))
set(USE_TRMM true) set(USE_TRMM true)
endif () endif ()




+ 4
- 0
kernel/Makefile.L3 View File

@@ -51,6 +51,10 @@ ifeq ($(CORE), POWER9)
USE_TRMM = 1 USE_TRMM = 1
endif endif


ifeq ($(CORE), POWER10)
USE_TRMM = 1
endif

ifeq ($(ARCH), zarch) ifeq ($(ARCH), zarch)
USE_TRMM = 1 USE_TRMM = 1
endif endif


+ 214
- 0
kernel/power/KERNEL.POWER10 View File

@@ -0,0 +1,214 @@
ifeq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
include $(KERNELDIR)/KERNEL.POWER8
else

#SGEMM_BETA = ../generic/gemm_beta.c
#DGEMM_BETA = ../generic/gemm_beta.c
#CGEMM_BETA = ../generic/zgemm_beta.c
#ZGEMM_BETA = ../generic/zgemm_beta.c

STRMMKERNEL = sgemm_kernel_power9.S
DTRMMKERNEL = dgemm_kernel_power9.S
CTRMMKERNEL = cgemm_kernel_power9.S
ZTRMMKERNEL = zgemm_kernel_power9.S

SGEMMKERNEL = sgemm_kernel_power9.S
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
SGEMMITCOPY = sgemm_tcopy_16_power8.S
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMMKERNEL = dgemm_kernel_power9.S
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
DGEMMITCOPY = dgemm_tcopy_16_power8.S
DGEMMONCOPY = dgemm_ncopy_4_power8.S
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

CGEMMKERNEL = cgemm_kernel_power9.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)

ZGEMMKERNEL = zgemm_kernel_power9.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
ZGEMMITCOPY = zgemm_tcopy_8_power8.S
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)

STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

#Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S

#Pure C for other kernels
#SAMAXKERNEL = ../arm/amax.c
#DAMAXKERNEL = ../arm/amax.c
#CAMAXKERNEL = ../arm/zamax.c
#ZAMAXKERNEL = ../arm/zamax.c
#
#SAMINKERNEL = ../arm/amin.c
#DAMINKERNEL = ../arm/amin.c
#CAMINKERNEL = ../arm/zamin.c
#ZAMINKERNEL = ../arm/zamin.c
#
#SMAXKERNEL = ../arm/max.c
#DMAXKERNEL = ../arm/max.c
#
#SMINKERNEL = ../arm/min.c
#DMINKERNEL = ../arm/min.c
#
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMAXKERNEL = isamax_power9.S
else
ISAMAXKERNEL = isamax.c
endif
IDAMAXKERNEL = idamax.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMAXKERNEL = icamax_power9.S
else
ICAMAXKERNEL = icamax.c
endif
IZAMAXKERNEL = izamax.c
#
ifneq ($(GCCVERSIONGTEQ9),1)
ISAMINKERNEL = isamin_power9.S
else
ISAMINKERNEL = isamin.c
endif
IDAMINKERNEL = idamin.c
ifneq ($(GCCVERSIONGTEQ9),1)
ICAMINKERNEL = icamin_power9.S
else
ICAMINKERNEL = icamin.c
endif
IZAMINKERNEL = izamin.c
#
#ISMAXKERNEL = ../arm/imax.c
#IDMAXKERNEL = ../arm/imax.c
#
#ISMINKERNEL = ../arm/imin.c
#IDMINKERNEL = ../arm/imin.c
#
SASUMKERNEL = sasum.c
DASUMKERNEL = dasum.c
CASUMKERNEL = casum.c
ZASUMKERNEL = zasum.c
#
SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power9.S
else
CAXPYKERNEL = caxpy.c
endif
ZAXPYKERNEL = zaxpy.c
#
SCOPYKERNEL = scopy.c
DCOPYKERNEL = dcopy.c
CCOPYKERNEL = ccopy.c
ZCOPYKERNEL = zcopy.c
#
SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c
DSDOTKERNEL = sdot.c
ifneq ($(GCCVERSIONGTEQ9),1)
CDOTKERNEL = cdot_power9.S
else
CDOTKERNEL = cdot.c
endif
ZDOTKERNEL = zdot.c
#
SNRM2KERNEL = ../arm/nrm2.c
DNRM2KERNEL = ../arm/nrm2.c
CNRM2KERNEL = ../arm/znrm2.c
ZNRM2KERNEL = ../arm/znrm2.c
#
SROTKERNEL = srot.c
DROTKERNEL = drot.c
CROTKERNEL = crot.c
ZROTKERNEL = zrot.c
#
SSCALKERNEL = sscal.c
DSCALKERNEL = dscal.c
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
#
SSWAPKERNEL = sswap.c
DSWAPKERNEL = dswap.c
CSWAPKERNEL = cswap.c
ZSWAPKERNEL = zswap.c
#

SGEMVNKERNEL = sgemv_n.c
DGEMVNKERNEL = dgemv_n.c
CGEMVNKERNEL = cgemv_n.c
ZGEMVNKERNEL = zgemv_n_4.c
#
SGEMVTKERNEL = sgemv_t.c
DGEMVTKERNEL = dgemv_t.c
CGEMVTKERNEL = cgemv_t.c
ZGEMVTKERNEL = zgemv_t_4.c


#SSYMV_U_KERNEL = ../generic/symv_k.c
#SSYMV_L_KERNEL = ../generic/symv_k.c
#DSYMV_U_KERNEL = ../generic/symv_k.c
#DSYMV_L_KERNEL = ../generic/symv_k.c
#QSYMV_U_KERNEL = ../generic/symv_k.c
#QSYMV_L_KERNEL = ../generic/symv_k.c
#CSYMV_U_KERNEL = ../generic/zsymv_k.c
#CSYMV_L_KERNEL = ../generic/zsymv_k.c
#ZSYMV_U_KERNEL = ../generic/zsymv_k.c
#ZSYMV_L_KERNEL = ../generic/zsymv_k.c
#XSYMV_U_KERNEL = ../generic/zsymv_k.c
#XSYMV_L_KERNEL = ../generic/zsymv_k.c

#ZHEMV_U_KERNEL = ../generic/zhemv_k.c
#ZHEMV_L_KERNEL = ../generic/zhemv_k.c

LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c

#Dump kernel
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

endif

+ 1
- 1
kernel/power/casum.c View File

@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "casum_microk_power8.c" #include "casum_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/ccopy.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "ccopy_microk_power8.c" #include "ccopy_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/crot.c View File

@@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"
#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
static void crot_kernel_8 (long n, float *x, float *y, float c, float s) static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
{ {


+ 1
- 1
kernel/power/cswap.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "cswap_microk_power8.c" #include "cswap_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/dasum.c View File

@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "dasum_microk_power8.c" #include "dasum_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/daxpy.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "daxpy_microk_power8.c" #include "daxpy_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/dcopy.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "dcopy_microk_power8.c" #include "dcopy_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/ddot.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "ddot_microk_power8.c" #include "ddot_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/dgemv_n.c View File

@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "dgemv_n_microk_power8.c" #include "dgemv_n_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/drot.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#pragma GCC optimize "O1" #pragma GCC optimize "O1"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "drot_microk_power8.c" #include "drot_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/dscal.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "dscal_microk_power8.c" #include "dscal_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/dswap.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "dswap_microk_power8.c" #include "dswap_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/sasum.c View File

@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "sasum_microk_power8.c" #include "sasum_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/scopy.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "scopy_microk_power8.c" #include "scopy_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/sdot.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "sdot_microk_power8.c" #include "sdot_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/srot.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#pragma GCC optimize "O1" #pragma GCC optimize "O1"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "srot_microk_power8.c" #include "srot_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/sscal.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "sscal_microk_power8.c" #include "sscal_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/sswap.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "sswap_microk_power8.c" #include "sswap_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/zasum.c View File

@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "zasum_microk_power8.c" #include "zasum_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/zaxpy.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "zaxpy_microk_power8.c" #include "zaxpy_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/zcopy.c View File

@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "zcopy_microk_power8.c" #include "zcopy_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/zdot.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "zdot_microk_power8.c" #include "zdot_microk_power8.c"
#endif #endif




+ 1
- 1
kernel/power/zscal.c View File

@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#pragma GCC optimize "O1" #pragma GCC optimize "O1"


#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#if defined(DOUBLE) #if defined(DOUBLE)
#include "zscal_microk_power8.c" #include "zscal_microk_power8.c"
#endif #endif


+ 1
- 1
kernel/power/zswap.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"




#if defined(POWER8) || defined(POWER9)
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#include "zswap_microk_power8.c" #include "zswap_microk_power8.c"
#endif #endif




+ 1
- 1
param.h View File

@@ -2260,7 +2260,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(POWER9)
#if defined(POWER9) || defined(POWER10)


#define SNUMOPT 16 #define SNUMOPT 16
#define DNUMOPT 8 #define DNUMOPT 8


Loading…
Cancel
Save