You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_power.c 8.6 kB

3 months ago
3 months ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. #include "common.h"
  2. extern gotoblas_t gotoblas_POWER6;
  3. extern gotoblas_t gotoblas_POWER8;
  4. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  5. extern gotoblas_t gotoblas_POWER9;
  6. #endif
  7. #ifdef HAVE_P10_SUPPORT
  8. extern gotoblas_t gotoblas_POWER10;
  9. #endif
  10. extern void openblas_warning(int verbose, const char *msg);
  11. static char *corename[] = {
  12. "unknown",
  13. "POWER6",
  14. "POWER8",
  15. "POWER9",
  16. "POWER10"
  17. };
  18. #define NUM_CORETYPES 5
  19. char *gotoblas_corename(void) {
  20. #ifndef C_PGI
  21. if (gotoblas == &gotoblas_POWER6) return corename[1];
  22. #endif
  23. if (gotoblas == &gotoblas_POWER8) return corename[2];
  24. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  25. if (gotoblas == &gotoblas_POWER9) return corename[3];
  26. #endif
  27. #ifdef HAVE_P10_SUPPORT
  28. if (gotoblas == &gotoblas_POWER10) return corename[4];
  29. #endif
  30. return corename[0];
  31. }
  32. #define CPU_UNKNOWN 0
  33. #define CPU_POWER5 5
  34. #define CPU_POWER6 6
  35. #define CPU_POWER8 8
  36. #define CPU_POWER9 9
  37. #define CPU_POWER10 10
  38. #ifndef POWER_9
  39. #define POWER_9 0x20000 /* 9 class CPU */
  40. #endif
  41. #ifndef POWER_10
  42. #define POWER_10 0x40000 /* 10 class CPU */
  43. #endif
  44. #ifdef _AIX
  45. #include <sys/systemcfg.h>
  46. static int cpuid(void)
  47. {
  48. int arch = _system_configuration.implementation;
  49. #ifdef POWER_6
  50. if (arch == POWER_6) return CPU_POWER6;
  51. #endif
  52. #ifdef POWER_7
  53. else if (arch == POWER_7) return CPU_POWER6;
  54. #endif
  55. #ifdef POWER_8
  56. else if (arch == POWER_8) return CPU_POWER8;
  57. #endif
  58. #ifdef POWER_9
  59. else if (arch == POWER_9) return CPU_POWER9;
  60. #endif
  61. #ifdef POWER_10
  62. else if (arch >= POWER_10) return CPU_POWER10;
  63. #endif
  64. #ifdef POWER_11
  65. else if (arch >= POWER_11) return CPU_POWER10;
  66. #endif
  67. return CPU_UNKNOWN;
  68. }
  69. #elif defined(C_PGI) || defined(__clang__)
  70. /*
  71. * NV HPC compilers do not yet implement __builtin_cpu_is().
  72. * Fake a version here for use in the CPU detection code below.
  73. *
  74. * Strategy here is to first check the CPU to see what it actually is,
  75. * and then test the input to see if what the CPU actually is matches
  76. * what was requested.
  77. */
  78. /*
  79. * Define POWER processor version table.
  80. *
  81. * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
  82. */
  83. static struct {
  84. uint32_t pvr_mask;
  85. uint32_t pvr_value;
  86. const char* cpu_name;
  87. uint32_t cpu_type;
  88. } pvrPOWER [] = {
  89. { /* POWER6 in P5+ mode; 2.04-compliant processor */
  90. .pvr_mask = 0xffffffff,
  91. .pvr_value = 0x0f000001,
  92. .cpu_name = "POWER5+",
  93. .cpu_type = CPU_POWER5,
  94. },
  95. { /* Power6 aka POWER6X*/
  96. .pvr_mask = 0xffff0000,
  97. .pvr_value = 0x003e0000,
  98. .cpu_name = "POWER6 (raw)",
  99. .cpu_type = CPU_POWER6,
  100. },
  101. { /* Power7 */
  102. .pvr_mask = 0xffff0000,
  103. .pvr_value = 0x003f0000,
  104. .cpu_name = "POWER7 (raw)",
  105. .cpu_type = CPU_POWER6,
  106. },
  107. { /* Power7+ */
  108. .pvr_mask = 0xffff0000,
  109. .pvr_value = 0x004A0000,
  110. .cpu_name = "POWER7+ (raw)",
  111. .cpu_type = CPU_POWER6,
  112. },
  113. { /* Power8E */
  114. .pvr_mask = 0xffff0000,
  115. .pvr_value = 0x004b0000,
  116. .cpu_name = "POWER8E (raw)",
  117. .cpu_type = CPU_POWER8,
  118. },
  119. { /* Power8NVL */
  120. .pvr_mask = 0xffff0000,
  121. .pvr_value = 0x004c0000,
  122. .cpu_name = "POWER8NVL (raw)",
  123. .cpu_type = CPU_POWER8,
  124. },
  125. { /* Power8 */
  126. .pvr_mask = 0xffff0000,
  127. .pvr_value = 0x004d0000,
  128. .cpu_name = "POWER8 (raw)",
  129. .cpu_type = CPU_POWER8,
  130. },
  131. { /* Power9 DD2.0 */
  132. .pvr_mask = 0xffffefff,
  133. .pvr_value = 0x004e0200,
  134. .cpu_name = "POWER9 (raw)",
  135. .cpu_type = CPU_POWER9,
  136. },
  137. { /* Power9 DD 2.1 */
  138. .pvr_mask = 0xffffefff,
  139. .pvr_value = 0x004e0201,
  140. .cpu_name = "POWER9 (raw)",
  141. .cpu_type = CPU_POWER9,
  142. },
  143. { /* Power9 DD2.2 or later */
  144. .pvr_mask = 0xffff0000,
  145. .pvr_value = 0x004e0000,
  146. .cpu_name = "POWER9 (raw)",
  147. .cpu_type = CPU_POWER9,
  148. },
  149. { /* Power10 */
  150. .pvr_mask = 0xffff0000,
  151. .pvr_value = 0x00800000,
  152. .cpu_name = "POWER10 (raw)",
  153. .cpu_type = CPU_POWER10,
  154. },
  155. { /* Power11 */
  156. .pvr_mask = 0xffff0000,
  157. .pvr_value = 0x00820000,
  158. .cpu_name = "POWER11 (raw)",
  159. .cpu_type = CPU_POWER10,
  160. },
  161. { /* End of table, pvr_mask and pvr_value must be zero */
  162. .pvr_mask = 0x0,
  163. .pvr_value = 0x0,
  164. .cpu_name = "Unknown",
  165. .cpu_type = CPU_UNKNOWN,
  166. },
  167. };
  168. static int cpuid(void)
  169. {
  170. int i;
  171. uint32_t pvr;
  172. uint32_t cpu_type;
  173. asm("mfpvr %0" : "=r"(pvr));
  174. for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
  175. if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
  176. break;
  177. }
  178. }
  179. #if defined(DEBUG)
  180. printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
  181. pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
  182. #endif
  183. cpu_type = pvrPOWER[i].cpu_type;
  184. return (int)(cpu_type);
  185. }
  186. #elif !defined(__BUILTIN_CPU_SUPPORTS__)
  187. static int cpuid(void)
  188. {
  189. return CPU_UNKNOWN;
  190. }
  191. #endif /* _AIX */
  192. #ifndef __BUILTIN_CPU_SUPPORTS__
  193. #include <string.h>
  194. #ifndef __has_builtin
  195. #define __has_builtin(x) 0
  196. #endif
  197. #if defined(_AIX) || !__has_builtin(__builtin_cpu_is)
  198. static int __builtin_cpu_is(const char *arg)
  199. {
  200. static int ipinfo = -1;
  201. if (ipinfo < 0) {
  202. ipinfo = cpuid();
  203. }
  204. #ifdef HAVE_P10_SUPPORT
  205. if (ipinfo == CPU_POWER10) {
  206. if (!strcmp(arg, "power10")) return 1;
  207. }
  208. #endif
  209. if (ipinfo == CPU_POWER9) {
  210. if (!strcmp(arg, "power9")) return 1;
  211. } else if (ipinfo == CPU_POWER8) {
  212. if (!strcmp(arg, "power8")) return 1;
  213. #ifndef C_PGI
  214. } else if (ipinfo == CPU_POWER6) {
  215. if (!strcmp(arg, "power6")) return 1;
  216. #endif
  217. }
  218. return 0;
  219. }
  220. #endif
  221. #if defined(_AIX) || !__has_builtin(__builtin_cpu_supports)
  222. static int __builtin_cpu_supports(const char *arg)
  223. {
  224. return 0;
  225. }
  226. #endif
  227. #endif
  228. static gotoblas_t *get_coretype(void) {
  229. #ifndef C_PGI
  230. if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
  231. return &gotoblas_POWER6;
  232. #endif
  233. if (__builtin_cpu_is("power8"))
  234. return &gotoblas_POWER8;
  235. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  236. if (__builtin_cpu_is("power9"))
  237. return &gotoblas_POWER9;
  238. #endif
  239. #ifdef HAVE_P10_SUPPORT
  240. #if defined(_AIX) || defined(__clang__)
  241. if (__builtin_cpu_is("power10"))
  242. #else
  243. if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
  244. #endif
  245. return &gotoblas_POWER10;
  246. #endif
  247. /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
  248. #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
  249. if (__builtin_cpu_is("power10"))
  250. return &gotoblas_POWER9;
  251. #endif
  252. return NULL;
  253. }
  254. static gotoblas_t *force_coretype(char * coretype) {
  255. int i ;
  256. int found = -1;
  257. char message[128];
  258. for ( i = 0 ; i < NUM_CORETYPES; i++)
  259. {
  260. if (!strncasecmp(coretype, corename[i], 20))
  261. {
  262. found = i;
  263. break;
  264. }
  265. }
  266. switch (found)
  267. {
  268. #ifndef C_PGI
  269. case 1: return (&gotoblas_POWER6);
  270. #endif
  271. case 2: return (&gotoblas_POWER8);
  272. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  273. case 3: return (&gotoblas_POWER9);
  274. #endif
  275. #ifdef HAVE_P10_SUPPORT
  276. case 4: return (&gotoblas_POWER10);
  277. #endif
  278. default: return NULL;
  279. }
  280. snprintf(message, 128, "Core not found: %s\n", coretype);
  281. openblas_warning(1, message);
  282. }
  283. void gotoblas_dynamic_init(void) {
  284. char coremsg[128];
  285. char coren[22];
  286. char *p;
  287. if (gotoblas) return;
  288. p = getenv("OPENBLAS_CORETYPE");
  289. if ( p )
  290. {
  291. gotoblas = force_coretype(p);
  292. }
  293. else
  294. {
  295. gotoblas = get_coretype();
  296. }
  297. if (gotoblas == NULL)
  298. {
  299. snprintf(coremsg, 128, "Falling back to POWER8 core\n");
  300. openblas_warning(1, coremsg);
  301. gotoblas = &gotoblas_POWER8;
  302. }
  303. if (gotoblas && gotoblas -> init) {
  304. strncpy(coren,gotoblas_corename(),20);
  305. sprintf(coremsg, "Core: %s\n",coren);
  306. if (getenv("GET_OPENBLAS_CORETYPE")) {
  307. fprintf(stderr, "%s", coremsg);
  308. }
  309. openblas_warning(2, coremsg);
  310. gotoblas -> init();
  311. } else {
  312. openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
  313. exit(1);
  314. }
  315. }
  316. void gotoblas_dynamic_quit(void) {
  317. gotoblas = NULL;
  318. }