You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_arm64.c 15 kB

4 years ago
4 years ago
4 years ago
2 years ago
4 years ago
9 months ago
2 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023-2024 The OpenBLAS Project */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include "common.h"
  40. #if (defined OS_LINUX || defined OS_ANDROID)
  41. #include <asm/hwcap.h>
  42. #include <sys/auxv.h>
  43. #endif
  44. extern gotoblas_t gotoblas_ARMV8;
  45. #ifdef DYNAMIC_LIST
  46. #ifdef DYN_CORTEXA53
  47. extern gotoblas_t gotoblas_CORTEXA53;
  48. #else
  49. #define gotoblas_CORTEXA53 gotoblas_ARMV8
  50. #endif
  51. #ifdef DYN_CORTEXA57
  52. extern gotoblas_t gotoblas_CORTEXA57;
  53. #else
  54. #define gotoblas_CORTEXA57 gotoblas_ARMV8
  55. #endif
  56. #ifdef DYN_CORTEXA72
  57. extern gotoblas_t gotoblas_CORTEXA72;
  58. #else
  59. #define gotoblas_CORTEXA72 gotoblas_ARMV8
  60. #endif
  61. #ifdef DYN_CORTEXA73
  62. extern gotoblas_t gotoblas_CORTEXA73;
  63. #else
  64. #define gotoblas_CORTEXA73 gotoblas_ARMV8
  65. #endif
  66. #ifdef DYN_FALKOR
  67. extern gotoblas_t gotoblas_FALKOR;
  68. #else
  69. #define gotoblas_FALKOR gotoblas_ARMV8
  70. #endif
  71. #ifdef DYN_TSV110
  72. extern gotoblas_t gotoblas_TSV110;
  73. #else
  74. #define gotoblas_TSV110 gotoblas_ARMV8
  75. #endif
  76. #ifdef DYN_THUNDERX
  77. extern gotoblas_t gotoblas_THUNDERX;
  78. #else
  79. #define gotoblas_THUNDERX gotoblas_ARMV8
  80. #endif
  81. #ifdef DYN_THUNDERX2T99
  82. extern gotoblas_t gotoblas_THUNDERX2T99;
  83. #else
  84. #define gotoblas_THUNDERX2T99 gotoblas_ARMV8
  85. #endif
  86. #ifdef DYN_THUNDERX3T110
  87. extern gotoblas_t gotoblas_THUNDERX3T110;
  88. #else
  89. #define gotoblas_THUNDERX3T110 gotoblas_ARMV8
  90. #endif
  91. #ifdef DYN_EMAG8180
  92. extern gotoblas_t gotoblas_EMAG8180;
  93. #else
  94. #define gotoblas_EMAG8180 gotoblas_ARMV8
  95. #endif
  96. #ifdef DYN_NEOVERSEN1
  97. extern gotoblas_t gotoblas_NEOVERSEN1;
  98. #else
  99. #define gotoblas_NEOVERSEN1 gotoblas_ARMV8
  100. #endif
  101. #ifdef DYN_NEOVERSEV1
  102. extern gotoblas_t gotoblas_NEOVERSEV1;
  103. #else
  104. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  105. #endif
  106. #ifdef DYN_NEOVERSEN2
  107. extern gotoblas_t gotoblas_NEOVERSEN2;
  108. #else
  109. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  110. #endif
  111. #ifdef DYN_ARMV8SVE
  112. extern gotoblas_t gotoblas_ARMV8SVE;
  113. #else
  114. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  115. #endif
  116. #ifdef DYN_ARMV9SME
  117. extern gotoblas_t gotoblas_ARMV9SME;
  118. #else
  119. #define gotoblas_ARMV9SME gotoblas_ARMV8
  120. #endif
  121. #ifdef DYN_CORTEX_A55
  122. extern gotoblas_t gotoblas_CORTEXA55;
  123. #else
  124. #define gotoblas_CORTEXA55 gotoblas_ARMV8
  125. #endif
  126. #ifdef DYN_A64FX
  127. extern gotoblas_t gotoblas_A64FX;
  128. #else
  129. #define gotoblas_A64FX gotoblas_ARMV8
  130. #endif
  131. #else
  132. extern gotoblas_t gotoblas_CORTEXA53;
  133. #define gotoblas_CORTEXA55 gotoblas_CORTEXA53
  134. extern gotoblas_t gotoblas_CORTEXA57;
  135. #define gotoblas_CORTEXA72 gotoblas_CORTEXA57
  136. #define gotoblas_CORTEXA73 gotoblas_CORTEXA57
  137. #define gotoblas_FALKOR gotoblas_CORTEXA57
  138. extern gotoblas_t gotoblas_THUNDERX;
  139. extern gotoblas_t gotoblas_THUNDERX2T99;
  140. extern gotoblas_t gotoblas_TSV110;
  141. extern gotoblas_t gotoblas_EMAG8180;
  142. extern gotoblas_t gotoblas_NEOVERSEN1;
  143. #ifndef NO_SVE
  144. extern gotoblas_t gotoblas_NEOVERSEV1;
  145. extern gotoblas_t gotoblas_NEOVERSEN2;
  146. extern gotoblas_t gotoblas_ARMV8SVE;
  147. extern gotoblas_t gotoblas_A64FX;
  148. #else
  149. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  150. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  151. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  152. #define gotoblas_A64FX gotoblas_ARMV8
  153. #endif
  154. #ifndef NO_SME
  155. extern gotoblas_t gotoblas_ARMV9SME;
  156. #else
  157. #define gotoblas_ARMV9SME gotoblas_ARMV8SVE
  158. #endif
  159. extern gotoblas_t gotoblas_THUNDERX3T110;
  160. #endif
  161. #define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEN2
  162. extern void openblas_warning(int verbose, const char * msg);
  163. #define FALLBACK_VERBOSE 1
  164. #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
  165. #define NUM_CORETYPES 18
  166. /*
  167. * In case asm/hwcap.h is outdated on the build system, make sure
  168. * that HWCAP_CPUID is defined
  169. */
  170. #ifndef HWCAP_CPUID
  171. #define HWCAP_CPUID (1 << 11)
  172. #endif
  173. #ifndef HWCAP_SVE
  174. #define HWCAP_SVE (1 << 22)
  175. #endif
  176. #ifndef HWCAP2_SME
  177. #define HWCAP2_SME 1<<23
  178. #endif
  179. #define get_cpu_ftr(id, var) ({ \
  180. __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
  181. })
  182. static char *corename[] = {
  183. "armv8",
  184. "cortexa53",
  185. "cortexa57",
  186. "cortexa72",
  187. "cortexa73",
  188. "falkor",
  189. "thunderx",
  190. "thunderx2t99",
  191. "tsv110",
  192. "emag8180",
  193. "neoversen1",
  194. "neoversev1",
  195. "neoversev2",
  196. "neoversen2",
  197. "thunderx3t110",
  198. "cortexa55",
  199. "armv8sve",
  200. "a64fx",
  201. "unknown"
  202. };
  203. char *gotoblas_corename(void) {
  204. if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
  205. if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
  206. if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
  207. if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
  208. if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
  209. if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
  210. if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
  211. if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
  212. if (gotoblas == &gotoblas_TSV110) return corename[ 8];
  213. if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
  214. if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
  215. if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
  216. if (gotoblas == &gotoblas_NEOVERSEV2) return corename[12];
  217. if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
  218. if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
  219. if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
  220. if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
  221. if (gotoblas == &gotoblas_A64FX) return corename[17];
  222. return corename[NUM_CORETYPES];
  223. }
  224. static gotoblas_t *force_coretype(char *coretype) {
  225. int i ;
  226. int found = -1;
  227. char message[128];
  228. for ( i=0 ; i < NUM_CORETYPES; i++)
  229. {
  230. if (!strncasecmp(coretype, corename[i], 20))
  231. {
  232. found = i;
  233. break;
  234. }
  235. }
  236. switch (found)
  237. {
  238. case 0: return (&gotoblas_ARMV8);
  239. case 1: return (&gotoblas_CORTEXA53);
  240. case 2: return (&gotoblas_CORTEXA57);
  241. case 3: return (&gotoblas_CORTEXA72);
  242. case 4: return (&gotoblas_CORTEXA73);
  243. case 5: return (&gotoblas_FALKOR);
  244. case 6: return (&gotoblas_THUNDERX);
  245. case 7: return (&gotoblas_THUNDERX2T99);
  246. case 8: return (&gotoblas_TSV110);
  247. case 9: return (&gotoblas_EMAG8180);
  248. case 10: return (&gotoblas_NEOVERSEN1);
  249. case 11: return (&gotoblas_NEOVERSEV1);
  250. case 12: return (&gotoblas_NEOVERSEV2);
  251. case 13: return (&gotoblas_NEOVERSEN2);
  252. case 14: return (&gotoblas_THUNDERX3T110);
  253. case 15: return (&gotoblas_CORTEXA55);
  254. case 16: return (&gotoblas_ARMV8SVE);
  255. case 17: return (&gotoblas_A64FX);
  256. }
  257. snprintf(message, 128, "Core not found: %s\n", coretype);
  258. openblas_warning(1, message);
  259. return NULL;
  260. }
  261. static gotoblas_t *get_coretype(void) {
  262. int implementer, variant, part, arch, revision, midr_el1;
  263. char coremsg[128];
  264. #if defined (OS_DARWIN)
  265. return &gotoblas_NEOVERSEN1;
  266. #endif
  267. #if (!defined OS_LINUX && !defined OS_ANDROID)
  268. return NULL;
  269. #else
  270. if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
  271. #ifdef __linux
  272. int i;
  273. int ncores=0;
  274. int prt,cpucap,cpulowperf=0,cpumidperf=0,cpuhiperf=0;
  275. FILE *infile;
  276. char buffer[512], *cpu_part = NULL, *cpu_implementer = NULL;
  277. infile = fopen("/sys/devices/system/cpu/possible","r");
  278. if (infile) {
  279. (void)fgets(buffer, sizeof(buffer), infile);
  280. sscanf(buffer,"0-%d",&ncores);
  281. fclose (infile);
  282. ncores++;
  283. } else {
  284. infile = fopen("/proc/cpuinfo","r");
  285. while (fgets(buffer, sizeof(buffer), infile)) {
  286. if (!strncmp("processor", buffer, 9))
  287. ncores++;
  288. }
  289. }
  290. for (i=0;i<ncores;i++) {
  291. sprintf(buffer,"/sys/devices/system/cpu/cpu%d/regs/identification/midr_el1",i);
  292. infile = fopen(buffer,"r");
  293. if (!infile) return NULL;
  294. (void)fgets(buffer, sizeof(buffer), infile);
  295. midr_el1=strtoul(buffer,NULL,16);
  296. implementer = (midr_el1 >> 24) & 0xFF;
  297. prt = (midr_el1 >> 4) & 0xFFF;
  298. fclose(infile);
  299. sprintf(buffer,"/sys/devices/system/cpu/cpu%d/cpu_capability",i);
  300. infile = fopen(buffer,"r");
  301. if (infile) {
  302. (void)fgets(buffer, sizeof(buffer), infile);
  303. cpucap=strtoul(buffer,NULL,16);
  304. fclose(infile);
  305. if (cpucap >= 1000) cpuhiperf++;
  306. else if (cpucap >=500) cpumidperf++;
  307. else cpulowperf++;
  308. if (cpucap >=1000) part = prt;
  309. } else if (implementer == 0x41 ){
  310. if (prt >= 0xd4b) cpuhiperf++;
  311. else if (prt>= 0xd07) cpumidperf++;
  312. else cpulowperf++;
  313. } else cpulowperf++;
  314. }
  315. if (!part) part = prt;
  316. #else
  317. snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
  318. openblas_warning(1, coremsg);
  319. return NULL;
  320. #endif
  321. } else {
  322. get_cpu_ftr(MIDR_EL1, midr_el1);
  323. /*
  324. * MIDR_EL1
  325. *
  326. * 31 24 23 20 19 16 15 4 3 0
  327. * -----------------------------------------------------------------
  328. * | Implementer | Variant | Architecture | Part Number | Revision |
  329. * -----------------------------------------------------------------
  330. */
  331. implementer = (midr_el1 >> 24) & 0xFF;
  332. part = (midr_el1 >> 4) & 0xFFF;
  333. }
  334. switch(implementer)
  335. {
  336. case 0x41: // ARM
  337. switch (part)
  338. {
  339. case 0xd03: // Cortex A53
  340. return &gotoblas_CORTEXA53;
  341. case 0xd07: // Cortex A57
  342. return &gotoblas_CORTEXA57;
  343. case 0xd08: // Cortex A72
  344. return &gotoblas_CORTEXA72;
  345. case 0xd09: // Cortex A73
  346. return &gotoblas_CORTEXA73;
  347. case 0xd0c: // Neoverse N1
  348. return &gotoblas_NEOVERSEN1;
  349. #ifndef NO_SVE
  350. case 0xd49:
  351. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  352. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  353. return &gotoblas_NEOVERSEN1;
  354. } else
  355. return &gotoblas_NEOVERSEN2;
  356. case 0xd40:
  357. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  358. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  359. return &gotoblas_NEOVERSEN1;
  360. }else
  361. return &gotoblas_NEOVERSEV1;
  362. case 0xd4f:
  363. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  364. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  365. return &gotoblas_NEOVERSEN1;
  366. } else {
  367. return &gotoblas_NEOVERSEV2;
  368. }
  369. #endif
  370. case 0xd05: // Cortex A55
  371. return &gotoblas_CORTEXA55;
  372. }
  373. break;
  374. case 0x42: // Broadcom
  375. switch (part)
  376. {
  377. case 0x516: // Vulcan
  378. return &gotoblas_THUNDERX2T99;
  379. }
  380. break;
  381. case 0x43: // Cavium
  382. switch (part)
  383. {
  384. case 0x0a1: // ThunderX
  385. return &gotoblas_THUNDERX;
  386. case 0x0af: // ThunderX2
  387. return &gotoblas_THUNDERX2T99;
  388. case 0x0b8: // ThunderX3
  389. return &gotoblas_THUNDERX3T110;
  390. }
  391. break;
  392. case 0x46: // Fujitsu
  393. switch (part)
  394. {
  395. #ifndef NO_SVE
  396. case 0x001: // A64FX
  397. return &gotoblas_A64FX;
  398. #endif
  399. }
  400. break;
  401. case 0x48: // HiSilicon
  402. switch (part)
  403. {
  404. case 0xd01: // tsv110
  405. return &gotoblas_TSV110;
  406. }
  407. break;
  408. case 0x50: // Ampere
  409. switch (part)
  410. {
  411. case 0x000: // Skylark/EMAG8180
  412. return &gotoblas_EMAG8180;
  413. }
  414. break;
  415. case 0x51: // Qualcomm
  416. switch (part)
  417. {
  418. case 0xc00: // Falkor
  419. return &gotoblas_FALKOR;
  420. }
  421. break;
  422. case 0x61: // Apple
  423. return &gotoblas_NEOVERSEN1;
  424. break;
  425. default:
  426. snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
  427. openblas_warning(1, coremsg);
  428. }
  429. #if !defined(NO_SME) && defined(HWCAP2_SME)
  430. if ((getauxval(AT_HWCAP2) & HWCAP2_SME)) {
  431. return &gotoblas_ARMV9SME;
  432. }
  433. #endif
  434. #ifndef NO_SVE
  435. if ((getauxval(AT_HWCAP) & HWCAP_SVE)) {
  436. return &gotoblas_ARMV8SVE;
  437. }
  438. #endif
  439. return NULL;
  440. #endif
  441. }
  442. void gotoblas_dynamic_init(void) {
  443. char coremsg[128];
  444. char coren[22];
  445. char *p;
  446. if (gotoblas) return;
  447. p = getenv("OPENBLAS_CORETYPE");
  448. if ( p )
  449. {
  450. gotoblas = force_coretype(p);
  451. }
  452. else
  453. {
  454. gotoblas = get_coretype();
  455. }
  456. if (gotoblas == NULL)
  457. {
  458. snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
  459. openblas_warning(1, coremsg);
  460. gotoblas = &gotoblas_ARMV8;
  461. }
  462. if (gotoblas && gotoblas->init) {
  463. strncpy(coren, gotoblas_corename(), 20);
  464. sprintf(coremsg, "Core: %s\n", coren);
  465. openblas_warning(2, coremsg);
  466. gotoblas -> init();
  467. } else {
  468. openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
  469. exit(1);
  470. }
  471. }
  472. void gotoblas_dynamic_quit(void) {
  473. gotoblas = NULL;
  474. }
  475. int support_sme1(void) {
  476. int ret = 0;
  477. #if (defined OS_LINUX || defined OS_ANDROID)
  478. ret = getauxval(AT_HWCAP2) & HWCAP2_SME;
  479. if(getauxval(AT_HWCAP2) & HWCAP2_SME){
  480. ret = 1;
  481. }
  482. #endif
  483. return ret;
  484. }