You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_arm64.c 14 kB

4 years ago
4 years ago
4 years ago
2 years ago
4 years ago
2 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023-2024 The OpenBLAS Project */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include "common.h"
  40. #if (defined OS_LINUX || defined OS_ANDROID)
  41. #include <asm/hwcap.h>
  42. #include <sys/auxv.h>
  43. #endif
  44. extern gotoblas_t gotoblas_ARMV8;
  45. #ifdef DYNAMIC_LIST
  46. #ifdef DYN_CORTEXA53
  47. extern gotoblas_t gotoblas_CORTEXA53;
  48. #else
  49. #define gotoblas_CORTEXA53 gotoblas_ARMV8
  50. #endif
  51. #ifdef DYN_CORTEXA57
  52. extern gotoblas_t gotoblas_CORTEXA57;
  53. #else
  54. #define gotoblas_CORTEXA57 gotoblas_ARMV8
  55. #endif
  56. #ifdef DYN_CORTEXA72
  57. extern gotoblas_t gotoblas_CORTEXA72;
  58. #else
  59. #define gotoblas_CORTEXA72 gotoblas_ARMV8
  60. #endif
  61. #ifdef DYN_CORTEXA73
  62. extern gotoblas_t gotoblas_CORTEXA73;
  63. #else
  64. #define gotoblas_CORTEXA73 gotoblas_ARMV8
  65. #endif
  66. #ifdef DYN_FALKOR
  67. extern gotoblas_t gotoblas_FALKOR;
  68. #else
  69. #define gotoblas_FALKOR gotoblas_ARMV8
  70. #endif
  71. #ifdef DYN_TSV110
  72. extern gotoblas_t gotoblas_TSV110;
  73. #else
  74. #define gotoblas_TSV110 gotoblas_ARMV8
  75. #endif
  76. #ifdef DYN_THUNDERX
  77. extern gotoblas_t gotoblas_THUNDERX;
  78. #else
  79. #define gotoblas_THUNDERX gotoblas_ARMV8
  80. #endif
  81. #ifdef DYN_THUNDERX2T99
  82. extern gotoblas_t gotoblas_THUNDERX2T99;
  83. #else
  84. #define gotoblas_THUNDERX2T99 gotoblas_ARMV8
  85. #endif
  86. #ifdef DYN_THUNDERX3T110
  87. extern gotoblas_t gotoblas_THUNDERX3T110;
  88. #else
  89. #define gotoblas_THUNDERX3T110 gotoblas_ARMV8
  90. #endif
  91. #ifdef DYN_EMAG8180
  92. extern gotoblas_t gotoblas_EMAG8180;
  93. #else
  94. #define gotoblas_EMAG8180 gotoblas_ARMV8
  95. #endif
  96. #ifdef DYN_NEOVERSEN1
  97. extern gotoblas_t gotoblas_NEOVERSEN1;
  98. #else
  99. #define gotoblas_NEOVERSEN1 gotoblas_ARMV8
  100. #endif
  101. #ifdef DYN_NEOVERSEV1
  102. extern gotoblas_t gotoblas_NEOVERSEV1;
  103. #else
  104. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  105. #endif
  106. #ifdef DYN_NEOVERSEN2
  107. extern gotoblas_t gotoblas_NEOVERSEN2;
  108. #else
  109. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  110. #endif
  111. #ifdef DYN_ARMV8SVE
  112. extern gotoblas_t gotoblas_ARMV8SVE;
  113. #else
  114. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  115. #endif
  116. #ifdef DYN_CORTEX_A55
  117. extern gotoblas_t gotoblas_CORTEXA55;
  118. #else
  119. #define gotoblas_CORTEXA55 gotoblas_ARMV8
  120. #endif
  121. #ifdef DYN_A64FX
  122. extern gotoblas_t gotoblas_A64FX;
  123. #else
  124. #define gotoblas_A64FX gotoblas_ARMV8
  125. #endif
  126. #else
  127. extern gotoblas_t gotoblas_CORTEXA53;
  128. #define gotoblas_CORTEXA55 gotoblas_CORTEXA53
  129. extern gotoblas_t gotoblas_CORTEXA57;
  130. #define gotoblas_CORTEXA72 gotoblas_CORTEXA57
  131. #define gotoblas_CORTEXA73 gotoblas_CORTEXA57
  132. #define gotoblas_FALKOR gotoblas_CORTEXA57
  133. extern gotoblas_t gotoblas_THUNDERX;
  134. extern gotoblas_t gotoblas_THUNDERX2T99;
  135. extern gotoblas_t gotoblas_TSV110;
  136. extern gotoblas_t gotoblas_EMAG8180;
  137. extern gotoblas_t gotoblas_NEOVERSEN1;
  138. #ifndef NO_SVE
  139. extern gotoblas_t gotoblas_NEOVERSEV1;
  140. extern gotoblas_t gotoblas_NEOVERSEN2;
  141. extern gotoblas_t gotoblas_ARMV8SVE;
  142. extern gotoblas_t gotoblas_A64FX;
  143. #else
  144. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  145. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  146. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  147. #define gotoblas_A64FX gotoblas_ARMV8
  148. #endif
  149. extern gotoblas_t gotoblas_THUNDERX3T110;
  150. #endif
  151. #define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEV1
  152. extern void openblas_warning(int verbose, const char * msg);
  153. #define FALLBACK_VERBOSE 1
  154. #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
  155. #define NUM_CORETYPES 18
  156. /*
  157. * In case asm/hwcap.h is outdated on the build system, make sure
  158. * that HWCAP_CPUID is defined
  159. */
  160. #ifndef HWCAP_CPUID
  161. #define HWCAP_CPUID (1 << 11)
  162. #endif
  163. #ifndef HWCAP_SVE
  164. #define HWCAP_SVE (1 << 22)
  165. #endif
  166. #define get_cpu_ftr(id, var) ({ \
  167. __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
  168. })
  169. static char *corename[] = {
  170. "armv8",
  171. "cortexa53",
  172. "cortexa57",
  173. "cortexa72",
  174. "cortexa73",
  175. "falkor",
  176. "thunderx",
  177. "thunderx2t99",
  178. "tsv110",
  179. "emag8180",
  180. "neoversen1",
  181. "neoversev1",
  182. "neoversev2",
  183. "neoversen2",
  184. "thunderx3t110",
  185. "cortexa55",
  186. "armv8sve",
  187. "a64fx",
  188. "unknown"
  189. };
  190. char *gotoblas_corename(void) {
  191. if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
  192. if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
  193. if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
  194. if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
  195. if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
  196. if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
  197. if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
  198. if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
  199. if (gotoblas == &gotoblas_TSV110) return corename[ 8];
  200. if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
  201. if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
  202. if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
  203. if (gotoblas == &gotoblas_NEOVERSEV2) return corename[12];
  204. if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
  205. if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
  206. if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
  207. if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
  208. if (gotoblas == &gotoblas_A64FX) return corename[17];
  209. return corename[NUM_CORETYPES];
  210. }
  211. static gotoblas_t *force_coretype(char *coretype) {
  212. int i ;
  213. int found = -1;
  214. char message[128];
  215. for ( i=0 ; i < NUM_CORETYPES; i++)
  216. {
  217. if (!strncasecmp(coretype, corename[i], 20))
  218. {
  219. found = i;
  220. break;
  221. }
  222. }
  223. switch (found)
  224. {
  225. case 0: return (&gotoblas_ARMV8);
  226. case 1: return (&gotoblas_CORTEXA53);
  227. case 2: return (&gotoblas_CORTEXA57);
  228. case 3: return (&gotoblas_CORTEXA72);
  229. case 4: return (&gotoblas_CORTEXA73);
  230. case 5: return (&gotoblas_FALKOR);
  231. case 6: return (&gotoblas_THUNDERX);
  232. case 7: return (&gotoblas_THUNDERX2T99);
  233. case 8: return (&gotoblas_TSV110);
  234. case 9: return (&gotoblas_EMAG8180);
  235. case 10: return (&gotoblas_NEOVERSEN1);
  236. case 11: return (&gotoblas_NEOVERSEV1);
  237. case 12: return (&gotoblas_NEOVERSEV2);
  238. case 13: return (&gotoblas_NEOVERSEN2);
  239. case 14: return (&gotoblas_THUNDERX3T110);
  240. case 15: return (&gotoblas_CORTEXA55);
  241. case 16: return (&gotoblas_ARMV8SVE);
  242. case 17: return (&gotoblas_A64FX);
  243. }
  244. snprintf(message, 128, "Core not found: %s\n", coretype);
  245. openblas_warning(1, message);
  246. return NULL;
  247. }
  248. static gotoblas_t *get_coretype(void) {
  249. int implementer, variant, part, arch, revision, midr_el1;
  250. char coremsg[128];
  251. #if defined (OS_DARWIN)
  252. return &gotoblas_NEOVERSEN1;
  253. #endif
  254. #if (!defined OS_LINUX && !defined OS_ANDROID)
  255. return NULL;
  256. #else
  257. if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
  258. #ifdef __linux
  259. FILE *infile;
  260. char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
  261. p = (char *) NULL ;
  262. infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r");
  263. if (!infile) return NULL;
  264. (void)fgets(buffer, sizeof(buffer), infile);
  265. midr_el1=strtoul(buffer,NULL,16);
  266. fclose(infile);
  267. #else
  268. snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
  269. openblas_warning(1, coremsg);
  270. return NULL;
  271. #endif
  272. } else {
  273. get_cpu_ftr(MIDR_EL1, midr_el1);
  274. }
  275. /*
  276. * MIDR_EL1
  277. *
  278. * 31 24 23 20 19 16 15 4 3 0
  279. * -----------------------------------------------------------------
  280. * | Implementer | Variant | Architecture | Part Number | Revision |
  281. * -----------------------------------------------------------------
  282. */
  283. implementer = (midr_el1 >> 24) & 0xFF;
  284. part = (midr_el1 >> 4) & 0xFFF;
  285. switch(implementer)
  286. {
  287. case 0x41: // ARM
  288. switch (part)
  289. {
  290. case 0xd03: // Cortex A53
  291. return &gotoblas_CORTEXA53;
  292. case 0xd07: // Cortex A57
  293. return &gotoblas_CORTEXA57;
  294. case 0xd08: // Cortex A72
  295. return &gotoblas_CORTEXA72;
  296. case 0xd09: // Cortex A73
  297. return &gotoblas_CORTEXA73;
  298. case 0xd0c: // Neoverse N1
  299. return &gotoblas_NEOVERSEN1;
  300. #ifndef NO_SVE
  301. case 0xd49:
  302. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  303. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  304. return &gotoblas_NEOVERSEN1;
  305. } else
  306. return &gotoblas_NEOVERSEN2;
  307. case 0xd40:
  308. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  309. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  310. return &gotoblas_NEOVERSEN1;
  311. }else
  312. return &gotoblas_NEOVERSEV1;
  313. case 0xd4f:
  314. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  315. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  316. return &gotoblas_NEOVERSEN1;
  317. } else {
  318. return &gotoblas_NEOVERSEV2;
  319. }
  320. #endif
  321. case 0xd05: // Cortex A55
  322. return &gotoblas_CORTEXA55;
  323. }
  324. break;
  325. case 0x42: // Broadcom
  326. switch (part)
  327. {
  328. case 0x516: // Vulcan
  329. return &gotoblas_THUNDERX2T99;
  330. }
  331. break;
  332. case 0x43: // Cavium
  333. switch (part)
  334. {
  335. case 0x0a1: // ThunderX
  336. return &gotoblas_THUNDERX;
  337. case 0x0af: // ThunderX2
  338. return &gotoblas_THUNDERX2T99;
  339. case 0x0b8: // ThunderX3
  340. return &gotoblas_THUNDERX3T110;
  341. }
  342. break;
  343. case 0x46: // Fujitsu
  344. switch (part)
  345. {
  346. #ifndef NO_SVE
  347. case 0x001: // A64FX
  348. return &gotoblas_A64FX;
  349. #endif
  350. }
  351. break;
  352. case 0x48: // HiSilicon
  353. switch (part)
  354. {
  355. case 0xd01: // tsv110
  356. return &gotoblas_TSV110;
  357. }
  358. break;
  359. case 0x50: // Ampere
  360. switch (part)
  361. {
  362. case 0x000: // Skylark/EMAG8180
  363. return &gotoblas_EMAG8180;
  364. }
  365. break;
  366. case 0x51: // Qualcomm
  367. switch (part)
  368. {
  369. case 0xc00: // Falkor
  370. return &gotoblas_FALKOR;
  371. }
  372. break;
  373. case 0x61: // Apple
  374. return &gotoblas_NEOVERSEN1;
  375. break;
  376. default:
  377. snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
  378. openblas_warning(1, coremsg);
  379. }
  380. #ifndef NO_SVE
  381. if ((getauxval(AT_HWCAP) & HWCAP_SVE)) {
  382. return &gotoblas_ARMV8SVE;
  383. }
  384. #endif
  385. return NULL;
  386. #endif
  387. }
  388. void gotoblas_dynamic_init(void) {
  389. char coremsg[128];
  390. char coren[22];
  391. char *p;
  392. if (gotoblas) return;
  393. p = getenv("OPENBLAS_CORETYPE");
  394. if ( p )
  395. {
  396. gotoblas = force_coretype(p);
  397. }
  398. else
  399. {
  400. gotoblas = get_coretype();
  401. }
  402. if (gotoblas == NULL)
  403. {
  404. snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
  405. openblas_warning(1, coremsg);
  406. gotoblas = &gotoblas_ARMV8;
  407. }
  408. if (gotoblas && gotoblas->init) {
  409. strncpy(coren, gotoblas_corename(), 20);
  410. sprintf(coremsg, "Core: %s\n", coren);
  411. openblas_warning(2, coremsg);
  412. gotoblas -> init();
  413. } else {
  414. openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
  415. exit(1);
  416. }
  417. }
  418. void gotoblas_dynamic_quit(void) {
  419. gotoblas = NULL;
  420. }