You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_power.c 6.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. #include "common.h"
  2. extern gotoblas_t gotoblas_POWER6;
  3. extern gotoblas_t gotoblas_POWER8;
  4. #if (!defined __GNUC__) || ( __GNUC__ >= 6)
  5. extern gotoblas_t gotoblas_POWER9;
  6. #endif
  7. #ifdef HAVE_P10_SUPPORT
  8. extern gotoblas_t gotoblas_POWER10;
  9. #endif
  10. extern void openblas_warning(int verbose, const char *msg);
  11. static char *corename[] = {
  12. "unknown",
  13. "POWER6",
  14. "POWER8",
  15. "POWER9",
  16. "POWER10"
  17. };
  18. #define NUM_CORETYPES 4
  19. char *gotoblas_corename(void) {
  20. #ifndef C_PGI
  21. if (gotoblas == &gotoblas_POWER6) return corename[1];
  22. #endif
  23. if (gotoblas == &gotoblas_POWER8) return corename[2];
  24. #if (!defined __GNUC__) || ( __GNUC__ >= 6)
  25. if (gotoblas == &gotoblas_POWER9) return corename[3];
  26. #endif
  27. #ifdef HAVE_P10_SUPPORT
  28. if (gotoblas == &gotoblas_POWER10) return corename[4];
  29. #endif
  30. return corename[0];
  31. }
  32. #if defined(__clang__)
  33. static int __builtin_cpu_supports(char* arg)
  34. {
  35. return 0;
  36. }
  37. #endif
  38. #if defined(C_PGI) || defined(__clang__)
  39. /*
  40. * NV HPC compilers do not yet implement __builtin_cpu_is().
  41. * Fake a version here for use in the CPU detection code below.
  42. *
  43. * Strategy here is to first check the CPU to see what it actually is,
  44. * and then test the input to see if what the CPU actually is matches
  45. * what was requested.
  46. */
  47. #include <string.h>
  48. /*
  49. * Define POWER processor version table.
  50. *
  51. * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
  52. */
  53. #define CPU_UNKNOWN 0
  54. #define CPU_POWER5 5
  55. #define CPU_POWER6 6
  56. #define CPU_POWER8 8
  57. #define CPU_POWER9 9
  58. #define CPU_POWER10 10
  59. static struct {
  60. uint32_t pvr_mask;
  61. uint32_t pvr_value;
  62. const char* cpu_name;
  63. uint32_t cpu_type;
  64. } pvrPOWER [] = {
  65. { /* POWER6 in P5+ mode; 2.04-compliant processor */
  66. .pvr_mask = 0xffffffff,
  67. .pvr_value = 0x0f000001,
  68. .cpu_name = "POWER5+",
  69. .cpu_type = CPU_POWER5,
  70. },
  71. { /* Power6 aka POWER6X*/
  72. .pvr_mask = 0xffff0000,
  73. .pvr_value = 0x003e0000,
  74. .cpu_name = "POWER6 (raw)",
  75. .cpu_type = CPU_POWER6,
  76. },
  77. { /* Power7 */
  78. .pvr_mask = 0xffff0000,
  79. .pvr_value = 0x003f0000,
  80. .cpu_name = "POWER7 (raw)",
  81. .cpu_type = CPU_POWER6,
  82. },
  83. { /* Power7+ */
  84. .pvr_mask = 0xffff0000,
  85. .pvr_value = 0x004A0000,
  86. .cpu_name = "POWER7+ (raw)",
  87. .cpu_type = CPU_POWER6,
  88. },
  89. { /* Power8E */
  90. .pvr_mask = 0xffff0000,
  91. .pvr_value = 0x004b0000,
  92. .cpu_name = "POWER8E (raw)",
  93. .cpu_type = CPU_POWER8,
  94. },
  95. { /* Power8NVL */
  96. .pvr_mask = 0xffff0000,
  97. .pvr_value = 0x004c0000,
  98. .cpu_name = "POWER8NVL (raw)",
  99. .cpu_type = CPU_POWER8,
  100. },
  101. { /* Power8 */
  102. .pvr_mask = 0xffff0000,
  103. .pvr_value = 0x004d0000,
  104. .cpu_name = "POWER8 (raw)",
  105. .cpu_type = CPU_POWER8,
  106. },
  107. { /* Power9 DD2.0 */
  108. .pvr_mask = 0xffffefff,
  109. .pvr_value = 0x004e0200,
  110. .cpu_name = "POWER9 (raw)",
  111. .cpu_type = CPU_POWER9,
  112. },
  113. { /* Power9 DD 2.1 */
  114. .pvr_mask = 0xffffefff,
  115. .pvr_value = 0x004e0201,
  116. .cpu_name = "POWER9 (raw)",
  117. .cpu_type = CPU_POWER9,
  118. },
  119. { /* Power9 DD2.2 or later */
  120. .pvr_mask = 0xffff0000,
  121. .pvr_value = 0x004e0000,
  122. .cpu_name = "POWER9 (raw)",
  123. .cpu_type = CPU_POWER9,
  124. },
  125. { /* Power10 */
  126. .pvr_mask = 0xffff0000,
  127. .pvr_value = 0x00800000,
  128. .cpu_name = "POWER10 (raw)",
  129. .cpu_type = CPU_POWER10,
  130. },
  131. { /* End of table, pvr_mask and pvr_value must be zero */
  132. .pvr_mask = 0x0,
  133. .pvr_value = 0x0,
  134. .cpu_name = "Unknown",
  135. .cpu_type = CPU_UNKNOWN,
  136. },
  137. };
  138. static int __builtin_cpu_is(const char *cpu) {
  139. int i;
  140. uint32_t pvr;
  141. uint32_t cpu_type;
  142. asm("mfpvr %0" : "=r"(pvr));
  143. for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
  144. if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
  145. break;
  146. }
  147. }
  148. #if defined(DEBUG)
  149. printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
  150. pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
  151. #endif
  152. cpu_type = pvrPOWER[i].cpu_type;
  153. if (!strcmp(cpu, "power8"))
  154. return cpu_type == CPU_POWER8;
  155. if (!strcmp(cpu, "power9"))
  156. return cpu_type == CPU_POWER9;
  157. return 0;
  158. }
  159. #endif /* C_PGI */
  160. static gotoblas_t *get_coretype(void) {
  161. #ifndef C_PGI
  162. if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
  163. return &gotoblas_POWER6;
  164. #endif
  165. if (__builtin_cpu_is("power8"))
  166. return &gotoblas_POWER8;
  167. #if (!defined __GNUC__) || ( __GNUC__ >= 6)
  168. if (__builtin_cpu_is("power9"))
  169. return &gotoblas_POWER9;
  170. #endif
  171. #ifdef HAVE_P10_SUPPORT
  172. if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
  173. return &gotoblas_POWER10;
  174. #endif
  175. /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
  176. #if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
  177. if (__builtin_cpu_is("power10"))
  178. return &gotoblas_POWER9;
  179. #endif
  180. return NULL;
  181. }
  182. static gotoblas_t *force_coretype(char * coretype) {
  183. int i ;
  184. int found = -1;
  185. char message[128];
  186. for ( i = 0 ; i < NUM_CORETYPES; i++)
  187. {
  188. if (!strncasecmp(coretype, corename[i], 20))
  189. {
  190. found = i;
  191. break;
  192. }
  193. }
  194. switch (found)
  195. {
  196. #ifndef C_PGI
  197. case 1: return (&gotoblas_POWER6);
  198. #endif
  199. case 2: return (&gotoblas_POWER8);
  200. #if (!defined __GNUC__) || ( __GNUC__ >= 6)
  201. case 3: return (&gotoblas_POWER9);
  202. #endif
  203. #ifdef HAVE_P10_SUPPORT
  204. case 4: return (&gotoblas_POWER10);
  205. #endif
  206. default: return NULL;
  207. }
  208. snprintf(message, 128, "Core not found: %s\n", coretype);
  209. openblas_warning(1, message);
  210. }
  211. void gotoblas_dynamic_init(void) {
  212. char coremsg[128];
  213. char coren[22];
  214. char *p;
  215. if (gotoblas) return;
  216. p = getenv("OPENBLAS_CORETYPE");
  217. if ( p )
  218. {
  219. gotoblas = force_coretype(p);
  220. }
  221. else
  222. {
  223. gotoblas = get_coretype();
  224. }
  225. if (gotoblas == NULL)
  226. {
  227. snprintf(coremsg, 128, "Falling back to POWER8 core\n");
  228. openblas_warning(1, coremsg);
  229. gotoblas = &gotoblas_POWER8;
  230. }
  231. if (gotoblas && gotoblas -> init) {
  232. strncpy(coren,gotoblas_corename(),20);
  233. sprintf(coremsg, "Core: %s\n",coren);
  234. openblas_warning(2, coremsg);
  235. gotoblas -> init();
  236. } else {
  237. openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
  238. exit(1);
  239. }
  240. }
  241. void gotoblas_dynamic_quit(void) {
  242. gotoblas = NULL;
  243. }