|
@@ -56,6 +56,40 @@ static int detect(void) |
|
|
{ |
|
|
{ |
|
|
unsigned long hwcap = get_hwcap(); |
|
|
unsigned long hwcap = get_hwcap(); |
|
|
|
|
|
|
|
|
|
|
|
// Choose the architecture level for optimized kernels based on hardware |
|
|
|
|
|
// capability bits (just like glibc chooses optimized implementations). |
|
|
|
|
|
// |
|
|
|
|
|
// The hardware capability bits that are used here indicate both |
|
|
|
|
|
// hardware support for a particular ISA extension and the presence of |
|
|
|
|
|
// software support to enable its use. For example, when HWCAP_S390_VX |
|
|
|
|
|
// is set then both the CPU can execute SIMD instructions and the Linux |
|
|
|
|
|
// kernel can manage applications using the vector registers and SIMD |
|
|
|
|
|
// instructions. |
|
|
|
|
|
// |
|
|
|
|
|
// See glibc's sysdeps/s390/dl-procinfo.h for an overview (also in |
|
|
|
|
|
// sysdeps/unix/sysv/linux/s390/bits/hwcap.h) of the defined hardware |
|
|
|
|
|
// capability bits. They are derived from the information that the |
|
|
|
|
|
// "store facility list (extended)" instructions provide. |
|
|
|
|
|
// (https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/s390/dl-procinfo.h;hb=HEAD) |
|
|
|
|
|
// |
|
|
|
|
|
// currently used: |
|
|
|
|
|
// HWCAP_S390_VX - vector facility for z/Architecture (introduced with |
|
|
|
|
|
// IBM z13), enables level CPU_Z13 (SIMD) |
|
|
|
|
|
// HWCAP_S390_VXE - vector enhancements facility 1 (introduced with IBM |
|
|
|
|
|
// z14), together with VX enables level CPU_Z14 |
|
|
|
|
|
// (single-precision SIMD instructions) |
|
|
|
|
|
// |
|
|
|
|
|
// When you add optimized kernels that make use of other ISA extensions |
|
|
|
|
|
// (e.g., for exploiting the vector-enhancements facility 2 that was introduced |
|
|
|
|
|
// with IBM z15), then add a new architecture level (e.g., CPU_Z15) and gate |
|
|
|
|
|
// it on the hwcap that represents it here (e.g., HWCAP_S390_VXRS_EXT2 |
|
|
|
|
|
// for the z15 vector enhancements). |
|
|
|
|
|
// |
|
|
|
|
|
// To learn the value of hwcaps on a given system, set the environment |
|
|
|
|
|
// variable LD_SHOW_AUXV and let ld.so dump it (e.g., by running |
|
|
|
|
|
// LD_SHOW_AUXV=1 /bin/true). |
|
|
|
|
|
// Also, the init function for dynamic arch support will print hwcaps |
|
|
|
|
|
// when OPENBLAS_VERBOSE is set to 2 or higher. |
|
|
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE)) |
|
|
if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE)) |
|
|
return CPU_Z14; |
|
|
return CPU_Z14; |
|
|
|
|
|
|
|
|