You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_power.c 8.3 kB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. #include "common.h"
  2. extern gotoblas_t gotoblas_POWER6;
  3. extern gotoblas_t gotoblas_POWER8;
  4. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  5. extern gotoblas_t gotoblas_POWER9;
  6. #endif
  7. #ifdef HAVE_P10_SUPPORT
  8. extern gotoblas_t gotoblas_POWER10;
  9. #endif
  10. extern void openblas_warning(int verbose, const char *msg);
  11. static char *corename[] = {
  12. "unknown",
  13. "POWER6",
  14. "POWER8",
  15. "POWER9",
  16. "POWER10"
  17. };
  18. #define NUM_CORETYPES 5
  19. char *gotoblas_corename(void) {
  20. #ifndef C_PGI
  21. if (gotoblas == &gotoblas_POWER6) return corename[1];
  22. #endif
  23. if (gotoblas == &gotoblas_POWER8) return corename[2];
  24. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  25. if (gotoblas == &gotoblas_POWER9) return corename[3];
  26. #endif
  27. #ifdef HAVE_P10_SUPPORT
  28. if (gotoblas == &gotoblas_POWER10) return corename[4];
  29. #endif
  30. return corename[0];
  31. }
  32. #define CPU_UNKNOWN 0
  33. #define CPU_POWER5 5
  34. #define CPU_POWER6 6
  35. #define CPU_POWER8 8
  36. #define CPU_POWER9 9
  37. #define CPU_POWER10 10
  38. #ifndef POWER_9
  39. #define POWER_9 0x20000 /* 9 class CPU */
  40. #endif
  41. #ifndef POWER_10
  42. #define POWER_10 0x40000 /* 10 class CPU */
  43. #endif
  44. #ifdef _AIX
  45. #include <sys/systemcfg.h>
  46. static int cpuid(void)
  47. {
  48. int arch = _system_configuration.implementation;
  49. #ifdef POWER_6
  50. if (arch == POWER_6) return CPU_POWER6;
  51. #endif
  52. #ifdef POWER_7
  53. else if (arch == POWER_7) return CPU_POWER6;
  54. #endif
  55. #ifdef POWER_8
  56. else if (arch == POWER_8) return CPU_POWER8;
  57. #endif
  58. #ifdef POWER_9
  59. else if (arch == POWER_9) return CPU_POWER9;
  60. #endif
  61. #ifdef POWER_10
  62. else if (arch >= POWER_10) return CPU_POWER10;
  63. #endif
  64. return CPU_UNKNOWN;
  65. }
  66. #elif defined(C_PGI) || defined(__clang__)
  67. /*
  68. * NV HPC compilers do not yet implement __builtin_cpu_is().
  69. * Fake a version here for use in the CPU detection code below.
  70. *
  71. * Strategy here is to first check the CPU to see what it actually is,
  72. * and then test the input to see if what the CPU actually is matches
  73. * what was requested.
  74. */
  75. /*
  76. * Define POWER processor version table.
  77. *
  78. * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
  79. */
  80. static struct {
  81. uint32_t pvr_mask;
  82. uint32_t pvr_value;
  83. const char* cpu_name;
  84. uint32_t cpu_type;
  85. } pvrPOWER [] = {
  86. { /* POWER6 in P5+ mode; 2.04-compliant processor */
  87. .pvr_mask = 0xffffffff,
  88. .pvr_value = 0x0f000001,
  89. .cpu_name = "POWER5+",
  90. .cpu_type = CPU_POWER5,
  91. },
  92. { /* Power6 aka POWER6X*/
  93. .pvr_mask = 0xffff0000,
  94. .pvr_value = 0x003e0000,
  95. .cpu_name = "POWER6 (raw)",
  96. .cpu_type = CPU_POWER6,
  97. },
  98. { /* Power7 */
  99. .pvr_mask = 0xffff0000,
  100. .pvr_value = 0x003f0000,
  101. .cpu_name = "POWER7 (raw)",
  102. .cpu_type = CPU_POWER6,
  103. },
  104. { /* Power7+ */
  105. .pvr_mask = 0xffff0000,
  106. .pvr_value = 0x004A0000,
  107. .cpu_name = "POWER7+ (raw)",
  108. .cpu_type = CPU_POWER6,
  109. },
  110. { /* Power8E */
  111. .pvr_mask = 0xffff0000,
  112. .pvr_value = 0x004b0000,
  113. .cpu_name = "POWER8E (raw)",
  114. .cpu_type = CPU_POWER8,
  115. },
  116. { /* Power8NVL */
  117. .pvr_mask = 0xffff0000,
  118. .pvr_value = 0x004c0000,
  119. .cpu_name = "POWER8NVL (raw)",
  120. .cpu_type = CPU_POWER8,
  121. },
  122. { /* Power8 */
  123. .pvr_mask = 0xffff0000,
  124. .pvr_value = 0x004d0000,
  125. .cpu_name = "POWER8 (raw)",
  126. .cpu_type = CPU_POWER8,
  127. },
  128. { /* Power9 DD2.0 */
  129. .pvr_mask = 0xffffefff,
  130. .pvr_value = 0x004e0200,
  131. .cpu_name = "POWER9 (raw)",
  132. .cpu_type = CPU_POWER9,
  133. },
  134. { /* Power9 DD 2.1 */
  135. .pvr_mask = 0xffffefff,
  136. .pvr_value = 0x004e0201,
  137. .cpu_name = "POWER9 (raw)",
  138. .cpu_type = CPU_POWER9,
  139. },
  140. { /* Power9 DD2.2 or later */
  141. .pvr_mask = 0xffff0000,
  142. .pvr_value = 0x004e0000,
  143. .cpu_name = "POWER9 (raw)",
  144. .cpu_type = CPU_POWER9,
  145. },
  146. { /* Power10 */
  147. .pvr_mask = 0xffff0000,
  148. .pvr_value = 0x00800000,
  149. .cpu_name = "POWER10 (raw)",
  150. .cpu_type = CPU_POWER10,
  151. },
  152. { /* End of table, pvr_mask and pvr_value must be zero */
  153. .pvr_mask = 0x0,
  154. .pvr_value = 0x0,
  155. .cpu_name = "Unknown",
  156. .cpu_type = CPU_UNKNOWN,
  157. },
  158. };
  159. static int cpuid(void)
  160. {
  161. int i;
  162. uint32_t pvr;
  163. uint32_t cpu_type;
  164. asm("mfpvr %0" : "=r"(pvr));
  165. for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
  166. if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
  167. break;
  168. }
  169. }
  170. #if defined(DEBUG)
  171. printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
  172. pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
  173. #endif
  174. cpu_type = pvrPOWER[i].cpu_type;
  175. return (int)(cpu_type);
  176. }
  177. #elif !defined(__BUILTIN_CPU_SUPPORTS__)
  178. static int cpuid(void)
  179. {
  180. return CPU_UNKNOWN;
  181. }
  182. #endif /* _AIX */
  183. #ifndef __BUILTIN_CPU_SUPPORTS__
  184. #include <string.h>
  185. #ifndef __has_builtin
  186. #define __has_builtin(x) 0
  187. #endif
  188. #if defined(_AIX) || !__has_builtin(__builtin_cpu_is)
  189. static int __builtin_cpu_is(const char *arg)
  190. {
  191. static int ipinfo = -1;
  192. if (ipinfo < 0) {
  193. ipinfo = cpuid();
  194. }
  195. #ifdef HAVE_P10_SUPPORT
  196. if (ipinfo == CPU_POWER10) {
  197. if (!strcmp(arg, "power10")) return 1;
  198. }
  199. #endif
  200. if (ipinfo == CPU_POWER9) {
  201. if (!strcmp(arg, "power9")) return 1;
  202. } else if (ipinfo == CPU_POWER8) {
  203. if (!strcmp(arg, "power8")) return 1;
  204. #ifndef C_PGI
  205. } else if (ipinfo == CPU_POWER6) {
  206. if (!strcmp(arg, "power6")) return 1;
  207. #endif
  208. }
  209. return 0;
  210. }
  211. #endif
  212. #if defined(_AIX) || !__has_builtin(__builtin_cpu_supports)
  213. static int __builtin_cpu_supports(const char *arg)
  214. {
  215. return 0;
  216. }
  217. #endif
  218. #endif
  219. static gotoblas_t *get_coretype(void) {
  220. #ifndef C_PGI
  221. if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
  222. return &gotoblas_POWER6;
  223. #endif
  224. if (__builtin_cpu_is("power8"))
  225. return &gotoblas_POWER8;
  226. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  227. if (__builtin_cpu_is("power9"))
  228. return &gotoblas_POWER9;
  229. #endif
  230. #ifdef HAVE_P10_SUPPORT
  231. #if defined(_AIX) || defined(__clang__)
  232. if (__builtin_cpu_is("power10"))
  233. #else
  234. if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
  235. #endif
  236. return &gotoblas_POWER10;
  237. #endif
  238. /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
  239. #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
  240. if (__builtin_cpu_is("power10"))
  241. return &gotoblas_POWER9;
  242. #endif
  243. return NULL;
  244. }
  245. static gotoblas_t *force_coretype(char * coretype) {
  246. int i ;
  247. int found = -1;
  248. char message[128];
  249. for ( i = 0 ; i < NUM_CORETYPES; i++)
  250. {
  251. if (!strncasecmp(coretype, corename[i], 20))
  252. {
  253. found = i;
  254. break;
  255. }
  256. }
  257. switch (found)
  258. {
  259. #ifndef C_PGI
  260. case 1: return (&gotoblas_POWER6);
  261. #endif
  262. case 2: return (&gotoblas_POWER8);
  263. #if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
  264. case 3: return (&gotoblas_POWER9);
  265. #endif
  266. #ifdef HAVE_P10_SUPPORT
  267. case 4: return (&gotoblas_POWER10);
  268. #endif
  269. default: return NULL;
  270. }
  271. snprintf(message, 128, "Core not found: %s\n", coretype);
  272. openblas_warning(1, message);
  273. }
  274. void gotoblas_dynamic_init(void) {
  275. char coremsg[128];
  276. char coren[22];
  277. char *p;
  278. if (gotoblas) return;
  279. p = getenv("OPENBLAS_CORETYPE");
  280. if ( p )
  281. {
  282. gotoblas = force_coretype(p);
  283. }
  284. else
  285. {
  286. gotoblas = get_coretype();
  287. }
  288. if (gotoblas == NULL)
  289. {
  290. snprintf(coremsg, 128, "Falling back to POWER8 core\n");
  291. openblas_warning(1, coremsg);
  292. gotoblas = &gotoblas_POWER8;
  293. }
  294. if (gotoblas && gotoblas -> init) {
  295. strncpy(coren,gotoblas_corename(),20);
  296. sprintf(coremsg, "Core: %s\n",coren);
  297. if (getenv("GET_OPENBLAS_CORETYPE")) {
  298. fprintf(stderr, "%s", coremsg);
  299. }
  300. openblas_warning(2, coremsg);
  301. gotoblas -> init();
  302. } else {
  303. openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
  304. exit(1);
  305. }
  306. }
  307. void gotoblas_dynamic_quit(void) {
  308. gotoblas = NULL;
  309. }