You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_arm64.c 13 kB

4 years ago
4 years ago
4 years ago
2 years ago
4 years ago
2 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023-2024 The OpenBLAS Project */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include "common.h"
  40. #if (defined OS_LINUX || defined OS_ANDROID)
  41. #include <asm/hwcap.h>
  42. #include <sys/auxv.h>
  43. #endif
  44. extern gotoblas_t gotoblas_ARMV8;
  45. #ifdef DYNAMIC_LIST
  46. #ifdef DYN_CORTEXA53
  47. extern gotoblas_t gotoblas_CORTEXA53;
  48. #else
  49. #define gotoblas_CORTEXA53 gotoblas_ARMV8
  50. #endif
  51. #ifdef DYN_CORTEXA57
  52. extern gotoblas_t gotoblas_CORTEXA57;
  53. #else
  54. #define gotoblas_CORTEXA57 gotoblas_ARMV8
  55. #endif
  56. #ifdef DYN_CORTEXA72
  57. extern gotoblas_t gotoblas_CORTEXA72;
  58. #else
  59. #define gotoblas_CORTEXA72 gotoblas_ARMV8
  60. #endif
  61. #ifdef DYN_CORTEXA73
  62. extern gotoblas_t gotoblas_CORTEXA73;
  63. #else
  64. #define gotoblas_CORTEXA73 gotoblas_ARMV8
  65. #endif
  66. #ifdef DYN_FALKOR
  67. extern gotoblas_t gotoblas_FALKOR;
  68. #else
  69. #define gotoblas_FALKOR gotoblas_ARMV8
  70. #endif
  71. #ifdef DYN_TSV110
  72. extern gotoblas_t gotoblas_TSV110;
  73. #else
  74. #define gotoblas_TSV110 gotoblas_ARMV8
  75. #endif
  76. #ifdef DYN_THUNDERX
  77. extern gotoblas_t gotoblas_THUNDERX;
  78. #else
  79. #define gotoblas_THUNDERX gotoblas_ARMV8
  80. #endif
  81. #ifdef DYN_THUNDERX2T99
  82. extern gotoblas_t gotoblas_THUNDERX2T99;
  83. #else
  84. #define gotoblas_THUNDERX2T99 gotoblas_ARMV8
  85. #endif
  86. #ifdef DYN_THUNDERX3T110
  87. extern gotoblas_t gotoblas_THUNDERX3T110;
  88. #else
  89. #define gotoblas_THUNDERX3T110 gotoblas_ARMV8
  90. #endif
  91. #ifdef DYN_EMAG8180
  92. extern gotoblas_t gotoblas_EMAG8180;
  93. #else
  94. #define gotoblas_EMAG8180 gotoblas_ARMV8
  95. #endif
  96. #ifdef DYN_NEOVERSEN1
  97. extern gotoblas_t gotoblas_NEOVERSEN1;
  98. #else
  99. #define gotoblas_NEOVERSEN1 gotoblas_ARMV8
  100. #endif
  101. #ifdef DYN_NEOVERSEV1
  102. extern gotoblas_t gotoblas_NEOVERSEV1;
  103. #else
  104. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  105. #endif
  106. #ifdef DYN_NEOVERSEN2
  107. extern gotoblas_t gotoblas_NEOVERSEN2;
  108. #else
  109. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  110. #endif
  111. #ifdef DYN_ARMV8SVE
  112. extern gotoblas_t gotoblas_ARMV8SVE;
  113. #else
  114. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  115. #endif
  116. #ifdef DYN_CORTEX_A55
  117. extern gotoblas_t gotoblas_CORTEXA55;
  118. #else
  119. #define gotoblas_CORTEXA55 gotoblas_ARMV8
  120. #endif
  121. #else
  122. extern gotoblas_t gotoblas_CORTEXA53;
  123. #define gotoblas_CORTEXA55 gotoblas_CORTEXA53
  124. extern gotoblas_t gotoblas_CORTEXA57;
  125. #define gotoblas_CORTEXA72 gotoblas_CORTEXA57
  126. #define gotoblas_CORTEXA73 gotoblas_CORTEXA57
  127. #define gotoblas_FALKOR gotoblas_CORTEXA57
  128. extern gotoblas_t gotoblas_THUNDERX;
  129. extern gotoblas_t gotoblas_THUNDERX2T99;
  130. extern gotoblas_t gotoblas_TSV110;
  131. extern gotoblas_t gotoblas_EMAG8180;
  132. extern gotoblas_t gotoblas_NEOVERSEN1;
  133. #ifndef NO_SVE
  134. extern gotoblas_t gotoblas_NEOVERSEV1;
  135. extern gotoblas_t gotoblas_NEOVERSEN2;
  136. extern gotoblas_t gotoblas_ARMV8SVE;
  137. #else
  138. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  139. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  140. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  141. #endif
  142. extern gotoblas_t gotoblas_THUNDERX3T110;
  143. #endif
  144. #define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEV1
  145. extern void openblas_warning(int verbose, const char * msg);
  146. #define FALLBACK_VERBOSE 1
  147. #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
  148. #define NUM_CORETYPES 17
  149. /*
  150. * In case asm/hwcap.h is outdated on the build system, make sure
  151. * that HWCAP_CPUID is defined
  152. */
  153. #ifndef HWCAP_CPUID
  154. #define HWCAP_CPUID (1 << 11)
  155. #endif
  156. #ifndef HWCAP_SVE
  157. #define HWCAP_SVE (1 << 22)
  158. #endif
  159. #define get_cpu_ftr(id, var) ({ \
  160. __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
  161. })
  162. static char *corename[] = {
  163. "armv8",
  164. "cortexa53",
  165. "cortexa57",
  166. "cortexa72",
  167. "cortexa73",
  168. "falkor",
  169. "thunderx",
  170. "thunderx2t99",
  171. "tsv110",
  172. "emag8180",
  173. "neoversen1",
  174. "neoversev1",
  175. "neoversev2",
  176. "neoversen2",
  177. "thunderx3t110",
  178. "cortexa55",
  179. "armv8sve",
  180. "unknown"
  181. };
  182. char *gotoblas_corename(void) {
  183. if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
  184. if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
  185. if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
  186. if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
  187. if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
  188. if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
  189. if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
  190. if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
  191. if (gotoblas == &gotoblas_TSV110) return corename[ 8];
  192. if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
  193. if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
  194. if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
  195. if (gotoblas == &gotoblas_NEOVERSEV2) return corename[12];
  196. if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
  197. if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
  198. if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
  199. if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
  200. return corename[NUM_CORETYPES];
  201. }
  202. static gotoblas_t *force_coretype(char *coretype) {
  203. int i ;
  204. int found = -1;
  205. char message[128];
  206. for ( i=0 ; i < NUM_CORETYPES; i++)
  207. {
  208. if (!strncasecmp(coretype, corename[i], 20))
  209. {
  210. found = i;
  211. break;
  212. }
  213. }
  214. switch (found)
  215. {
  216. case 0: return (&gotoblas_ARMV8);
  217. case 1: return (&gotoblas_CORTEXA53);
  218. case 2: return (&gotoblas_CORTEXA57);
  219. case 3: return (&gotoblas_CORTEXA72);
  220. case 4: return (&gotoblas_CORTEXA73);
  221. case 5: return (&gotoblas_FALKOR);
  222. case 6: return (&gotoblas_THUNDERX);
  223. case 7: return (&gotoblas_THUNDERX2T99);
  224. case 8: return (&gotoblas_TSV110);
  225. case 9: return (&gotoblas_EMAG8180);
  226. case 10: return (&gotoblas_NEOVERSEN1);
  227. case 11: return (&gotoblas_NEOVERSEV1);
  228. case 12: return (&gotoblas_NEOVERSEV2);
  229. case 13: return (&gotoblas_NEOVERSEN2);
  230. case 14: return (&gotoblas_THUNDERX3T110);
  231. case 15: return (&gotoblas_CORTEXA55);
  232. case 16: return (&gotoblas_ARMV8SVE);
  233. }
  234. snprintf(message, 128, "Core not found: %s\n", coretype);
  235. openblas_warning(1, message);
  236. return NULL;
  237. }
  238. static gotoblas_t *get_coretype(void) {
  239. int implementer, variant, part, arch, revision, midr_el1;
  240. char coremsg[128];
  241. #if defined (OS_DARWIN)
  242. return &gotoblas_NEOVERSEN1;
  243. #endif
  244. #if (!defined OS_LINUX && !defined OS_ANDROID)
  245. return NULL;
  246. #else
  247. if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
  248. #ifdef __linux
  249. FILE *infile;
  250. char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
  251. p = (char *) NULL ;
  252. infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r");
  253. if (!infile) return NULL;
  254. (void)fgets(buffer, sizeof(buffer), infile);
  255. midr_el1=strtoul(buffer,NULL,16);
  256. fclose(infile);
  257. #else
  258. snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
  259. openblas_warning(1, coremsg);
  260. return NULL;
  261. #endif
  262. } else {
  263. get_cpu_ftr(MIDR_EL1, midr_el1);
  264. }
  265. /*
  266. * MIDR_EL1
  267. *
  268. * 31 24 23 20 19 16 15 4 3 0
  269. * -----------------------------------------------------------------
  270. * | Implementer | Variant | Architecture | Part Number | Revision |
  271. * -----------------------------------------------------------------
  272. */
  273. implementer = (midr_el1 >> 24) & 0xFF;
  274. part = (midr_el1 >> 4) & 0xFFF;
  275. switch(implementer)
  276. {
  277. case 0x41: // ARM
  278. switch (part)
  279. {
  280. case 0xd03: // Cortex A53
  281. return &gotoblas_CORTEXA53;
  282. case 0xd07: // Cortex A57
  283. return &gotoblas_CORTEXA57;
  284. case 0xd08: // Cortex A72
  285. return &gotoblas_CORTEXA72;
  286. case 0xd09: // Cortex A73
  287. return &gotoblas_CORTEXA73;
  288. case 0xd0c: // Neoverse N1
  289. return &gotoblas_NEOVERSEN1;
  290. #ifndef NO_SVE
  291. case 0xd49:
  292. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  293. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  294. return &gotoblas_NEOVERSEN1;
  295. } else
  296. return &gotoblas_NEOVERSEN2;
  297. case 0xd40:
  298. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  299. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  300. return &gotoblas_NEOVERSEN1;
  301. }else
  302. return &gotoblas_NEOVERSEV1;
  303. case 0xd4f:
  304. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  305. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  306. return &gotoblas_NEOVERSEN1;
  307. } else {
  308. return &gotoblas_NEOVERSEV2;
  309. }
  310. #endif
  311. case 0xd05: // Cortex A55
  312. return &gotoblas_CORTEXA55;
  313. }
  314. break;
  315. case 0x42: // Broadcom
  316. switch (part)
  317. {
  318. case 0x516: // Vulcan
  319. return &gotoblas_THUNDERX2T99;
  320. }
  321. break;
  322. case 0x43: // Cavium
  323. switch (part)
  324. {
  325. case 0x0a1: // ThunderX
  326. return &gotoblas_THUNDERX;
  327. case 0x0af: // ThunderX2
  328. return &gotoblas_THUNDERX2T99;
  329. case 0x0b8: // ThunderX3
  330. return &gotoblas_THUNDERX3T110;
  331. }
  332. break;
  333. case 0x48: // HiSilicon
  334. switch (part)
  335. {
  336. case 0xd01: // tsv110
  337. return &gotoblas_TSV110;
  338. }
  339. break;
  340. case 0x50: // Ampere
  341. switch (part)
  342. {
  343. case 0x000: // Skylark/EMAG8180
  344. return &gotoblas_EMAG8180;
  345. }
  346. break;
  347. case 0x51: // Qualcomm
  348. switch (part)
  349. {
  350. case 0xc00: // Falkor
  351. return &gotoblas_FALKOR;
  352. }
  353. break;
  354. case 0x61: // Apple
  355. return &gotoblas_NEOVERSEN1;
  356. break;
  357. default:
  358. snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
  359. openblas_warning(1, coremsg);
  360. }
  361. #ifndef NO_SVE
  362. if ((getauxval(AT_HWCAP) & HWCAP_SVE)) {
  363. return &gotoblas_ARMV8SVE;
  364. }
  365. #endif
  366. return NULL;
  367. #endif
  368. }
  369. void gotoblas_dynamic_init(void) {
  370. char coremsg[128];
  371. char coren[22];
  372. char *p;
  373. if (gotoblas) return;
  374. p = getenv("OPENBLAS_CORETYPE");
  375. if ( p )
  376. {
  377. gotoblas = force_coretype(p);
  378. }
  379. else
  380. {
  381. gotoblas = get_coretype();
  382. }
  383. if (gotoblas == NULL)
  384. {
  385. snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
  386. openblas_warning(1, coremsg);
  387. gotoblas = &gotoblas_ARMV8;
  388. }
  389. if (gotoblas && gotoblas->init) {
  390. strncpy(coren, gotoblas_corename(), 20);
  391. sprintf(coremsg, "Core: %s\n", coren);
  392. openblas_warning(2, coremsg);
  393. gotoblas -> init();
  394. } else {
  395. openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
  396. exit(1);
  397. }
  398. }
  399. void gotoblas_dynamic_quit(void) {
  400. gotoblas = NULL;
  401. }