add in runtime cpu detection for zarchtags/v0.3.8^2
@@ -25,6 +25,8 @@ else ifeq ($(ARCH), i386) | |||
override ARCH=x86 | |||
else ifeq ($(ARCH), aarch64) | |||
override ARCH=arm64 | |||
else ifeq ($(ARCH), zarch) | |||
override ARCH=zarch | |||
endif | |||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib | |||
@@ -558,6 +560,11 @@ DYNAMIC_CORE += THUNDERX2T99 | |||
DYNAMIC_CORE += TSV110 | |||
endif | |||
ifeq ($(ARCH), zarch) | |||
DYNAMIC_CORE = Z13 | |||
DYNAMIC_CORE += Z14 | |||
endif | |||
ifeq ($(ARCH), power) | |||
DYNAMIC_CORE = POWER6 | |||
DYNAMIC_CORE += POWER8 | |||
@@ -21,9 +21,13 @@ else | |||
ifeq ($(ARCH),power) | |||
COMMONOBJS += dynamic_power.$(SUFFIX) | |||
else | |||
ifeq ($(ARCH),zarch) | |||
COMMONOBJS += dynamic_zarch.$(SUFFIX) | |||
else | |||
COMMONOBJS += dynamic.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
else | |||
COMMONOBJS += parameter.$(SUFFIX) | |||
endif | |||
@@ -85,9 +89,13 @@ else | |||
ifeq ($(ARCH),power) | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX) | |||
else | |||
ifeq ($(ARCH),zarch) | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX) | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||
endif | |||
@@ -0,0 +1,131 @@ | |||
#include "common.h" | |||
extern gotoblas_t gotoblas_Z13; | |||
extern gotoblas_t gotoblas_Z14; | |||
extern gotoblas_t gotoblas_Z15; | |||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
//extern gotoblas_t gotoblas_Z14; | |||
//#endif | |||
#define NUM_CORETYPES 5 | |||
extern void openblas_warning(int verbose, const char* msg); | |||
static char* corename[] = { | |||
"unknown", | |||
"Z13", | |||
"Z14", | |||
"Z15", | |||
"ZARCH_GENERIC", | |||
}; | |||
char* gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_Z13) return corename[1]; | |||
if (gotoblas == &gotoblas_Z14) return corename[2]; | |||
if (gotoblas == &gotoblas_Z15) return corename[3]; | |||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
// if (gotoblas == &gotoblas_POWER9) return corename[3]; | |||
//#endif | |||
return corename[0]; // try generic? | |||
} | |||
// __builtin_cpu_is is not supported by zarch | |||
static gotolabs_t* get_coretype(void) { | |||
FILE* infile; | |||
char buffer[512], * p; | |||
p = (char*)NULL; | |||
infile = fopen("/proc/sysinfo", "r"); | |||
while (fgets(buffer, sizeof(buffer), infile)) { | |||
if (!strncmp("Type", buffer, 4)) { | |||
p = strchr(buffer, ':') + 2; | |||
#if 0 | |||
fprintf(stderr, "%s\n", p); | |||
#endif | |||
break; | |||
} | |||
} | |||
fclose(infile); | |||
if (strstr(p, "2964")) return &gotoblas_Z13; | |||
if (strstr(p, "2965")) return &gotoblas_Z13; | |||
if (strstr(p, "3906")) return &gotoblas_Z14; | |||
if (strstr(p, "3907")) return &gotoblas_Z14; | |||
if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14 | |||
if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14 | |||
return NULL; // should be ZARCH_GENERIC | |||
} | |||
static gotoblas_t* force_coretype(char* coretype) { | |||
int i; | |||
int found = -1; | |||
char message[128]; | |||
for (i = 0; i < NUM_CORETYPES; i++) | |||
{ | |||
if (!strncasecmp(coretype, corename[i], 20)) | |||
{ | |||
found = i; | |||
break; | |||
} | |||
} | |||
switch (found) | |||
{ | |||
case 1: return (&gotoblas_Z13); | |||
case 2: return (&gotoblas_Z14); | |||
case 3: return (&gotoblas_Z15); | |||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
// case 3: return (&gotoblas_POWER9); | |||
//#endif | |||
default: return NULL; | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
openblas_warning(1, message); | |||
} | |||
void gotoblas_dynamic_init(void) { | |||
char coremsg[128]; | |||
char coren[22]; | |||
char* p; | |||
if (gotoblas) return; | |||
p = getenv("OPENBLAS_CORETYPE"); | |||
if (p) | |||
{ | |||
gotoblas = force_coretype(p); | |||
} | |||
else | |||
{ | |||
gotoblas = get_coretype(); | |||
} | |||
if (gotoblas == NULL) | |||
{ | |||
snprintf(coremsg, 128, "Falling back to Z14 core\n"); | |||
openblas_warning(1, coremsg); | |||
gotoblas = &gotoblas_Z14; | |||
} | |||
if (gotoblas && gotoblas->init) { | |||
strncpy(coren, gotoblas_corename(), 20); | |||
sprintf(coremsg, "Core: %s\n", coren); | |||
openblas_warning(2, coremsg); | |||
gotoblas->init(); | |||
} | |||
else { | |||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||
exit(1); | |||
} | |||
} | |||
void gotoblas_dynamic_quit(void) { | |||
gotoblas = NULL; | |||
} |
@@ -739,6 +739,26 @@ static void init_parameter(void) { | |||
} | |||
#else //POWER | |||
#if defined(ARCH_ZARCH) | |||
static void init_parameter(void) { | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
} | |||
#else //ZARCH | |||
#ifdef ARCH_X86 | |||
static int get_l2_size_old(void){ | |||
int i, eax, ebx, ecx, edx, cpuid_level; | |||
@@ -1325,4 +1345,5 @@ static void init_parameter(void) { | |||
} | |||
#endif //POWER | |||
#endif //ZARCH | |||
#endif //defined(ARCH_ARM64) |
@@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
@@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ctrmm4x4V.S | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ztrmm4x4V.S | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
@@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
@@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ctrmm4x4V.S | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ztrmm4x4V.S | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
@@ -94,26 +94,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||