You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpuid_zarch.h 3.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #include <stdlib.h>
  2. #define CPU_GENERIC 0
  3. #define CPU_Z13 1
  4. #define CPU_Z14 2
  5. #define CPU_Z15 3
  6. static char *cpuname[] = {
  7. "ZARCH_GENERIC",
  8. "Z13",
  9. "Z14",
  10. "Z15"
  11. };
  12. static char *cpuname_lower[] = {
  13. "zarch_generic",
  14. "z13",
  15. "z14",
  16. "z15"
  17. };
  18. // Guard the use of getauxval() on glibc version >= 2.16
  19. #ifdef __GLIBC__
  20. #include <features.h>
  21. #if __GLIBC_PREREQ(2, 16)
  22. #include <sys/auxv.h>
  23. #define HAVE_GETAUXVAL 1
  24. static unsigned long get_hwcap(void)
  25. {
  26. unsigned long hwcap = getauxval(AT_HWCAP);
  27. char *maskenv;
  28. // honor requests for not using specific CPU features in LD_HWCAP_MASK
  29. maskenv = getenv("LD_HWCAP_MASK");
  30. if (maskenv)
  31. hwcap &= strtoul(maskenv, NULL, 0);
  32. return hwcap;
  33. // note that a missing auxval is interpreted as no capabilities
  34. // available, which is safe.
  35. }
  36. #else // __GLIBC_PREREQ(2, 16)
  37. #warn "Cannot detect SIMD support in Z13 or newer architectures since glibc is older than 2.16"
  38. static unsigned long get_hwcap(void) {
  39. // treat missing support for getauxval() as no capabilities available,
  40. // which is safe.
  41. return 0;
  42. }
  43. #endif // __GLIBC_PREREQ(2, 16)
  44. #endif // __GLIBC
  45. static int detect(void)
  46. {
  47. unsigned long hwcap = get_hwcap();
  48. // Choose the architecture level for optimized kernels based on hardware
  49. // capability bits (just like glibc chooses optimized implementations).
  50. //
  51. // The hardware capability bits that are used here indicate both
  52. // hardware support for a particular ISA extension and the presence of
  53. // software support to enable its use. For example, when HWCAP_S390_VX
  54. // is set then both the CPU can execute SIMD instructions and the Linux
  55. // kernel can manage applications using the vector registers and SIMD
  56. // instructions.
  57. //
  58. // See glibc's sysdeps/s390/dl-procinfo.h for an overview (also in
  59. // sysdeps/unix/sysv/linux/s390/bits/hwcap.h) of the defined hardware
  60. // capability bits. They are derived from the information that the
  61. // "store facility list (extended)" instructions provide.
  62. // (https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/s390/dl-procinfo.h;hb=HEAD)
  63. //
  64. // currently used:
  65. // HWCAP_S390_VX - vector facility for z/Architecture (introduced with
  66. // IBM z13), enables level CPU_Z13 (SIMD)
  67. // HWCAP_S390_VXE - vector enhancements facility 1 (introduced with IBM
  68. // z14), together with VX enables level CPU_Z14
  69. // (single-precision SIMD instructions)
  70. //
  71. // When you add optimized kernels that make use of other ISA extensions
  72. // (e.g., for exploiting the vector-enhancements facility 2 that was introduced
  73. // with IBM z15), then add a new architecture level (e.g., CPU_Z15) and gate
  74. // it on the hwcap that represents it here (e.g., HWCAP_S390_VXRS_EXT2
  75. // for the z15 vector enhancements).
  76. //
  77. // To learn the value of hwcaps on a given system, set the environment
  78. // variable LD_SHOW_AUXV and let ld.so dump it (e.g., by running
  79. // LD_SHOW_AUXV=1 /bin/true).
  80. // Also, the init function for dynamic arch support will print hwcaps
  81. // when OPENBLAS_VERBOSE is set to 2 or higher.
  82. if ((hwcap & HWCAP_S390_VX) && (hwcap & HWCAP_S390_VXE))
  83. return CPU_Z14;
  84. if (hwcap & HWCAP_S390_VX)
  85. return CPU_Z13;
  86. return CPU_GENERIC;
  87. }