You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_arm64.c 16 kB

4 years ago
4 years ago
4 years ago
2 years ago
4 years ago
9 months ago
2 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023-2024 The OpenBLAS Project */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include "common.h"
  40. #if (defined OS_LINUX || defined OS_ANDROID)
  41. #include <asm/hwcap.h>
  42. #include <sys/auxv.h>
  43. #endif
  44. #ifdef __APPLE__
  45. #include <sys/sysctl.h>
  46. int32_t value;
  47. size_t length=sizeof(value);
  48. int64_t value64;
  49. size_t length64=sizeof(value64);
  50. #endif
  51. extern gotoblas_t gotoblas_ARMV8;
  52. #ifdef DYNAMIC_LIST
  53. #ifdef DYN_CORTEXA53
  54. extern gotoblas_t gotoblas_CORTEXA53;
  55. #else
  56. #define gotoblas_CORTEXA53 gotoblas_ARMV8
  57. #endif
  58. #ifdef DYN_CORTEXA57
  59. extern gotoblas_t gotoblas_CORTEXA57;
  60. #else
  61. #define gotoblas_CORTEXA57 gotoblas_ARMV8
  62. #endif
  63. #ifdef DYN_CORTEXA72
  64. extern gotoblas_t gotoblas_CORTEXA72;
  65. #else
  66. #define gotoblas_CORTEXA72 gotoblas_ARMV8
  67. #endif
  68. #ifdef DYN_CORTEXA73
  69. extern gotoblas_t gotoblas_CORTEXA73;
  70. #else
  71. #define gotoblas_CORTEXA73 gotoblas_ARMV8
  72. #endif
  73. #ifdef DYN_FALKOR
  74. extern gotoblas_t gotoblas_FALKOR;
  75. #else
  76. #define gotoblas_FALKOR gotoblas_ARMV8
  77. #endif
  78. #ifdef DYN_TSV110
  79. extern gotoblas_t gotoblas_TSV110;
  80. #else
  81. #define gotoblas_TSV110 gotoblas_ARMV8
  82. #endif
  83. #ifdef DYN_THUNDERX
  84. extern gotoblas_t gotoblas_THUNDERX;
  85. #else
  86. #define gotoblas_THUNDERX gotoblas_ARMV8
  87. #endif
  88. #ifdef DYN_THUNDERX2T99
  89. extern gotoblas_t gotoblas_THUNDERX2T99;
  90. #else
  91. #define gotoblas_THUNDERX2T99 gotoblas_ARMV8
  92. #endif
  93. #ifdef DYN_THUNDERX3T110
  94. extern gotoblas_t gotoblas_THUNDERX3T110;
  95. #else
  96. #define gotoblas_THUNDERX3T110 gotoblas_ARMV8
  97. #endif
  98. #ifdef DYN_EMAG8180
  99. extern gotoblas_t gotoblas_EMAG8180;
  100. #else
  101. #define gotoblas_EMAG8180 gotoblas_ARMV8
  102. #endif
  103. #ifdef DYN_NEOVERSEN1
  104. extern gotoblas_t gotoblas_NEOVERSEN1;
  105. #else
  106. #define gotoblas_NEOVERSEN1 gotoblas_ARMV8
  107. #endif
  108. #ifdef DYN_NEOVERSEV1
  109. extern gotoblas_t gotoblas_NEOVERSEV1;
  110. #else
  111. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  112. #endif
  113. #ifdef DYN_NEOVERSEN2
  114. extern gotoblas_t gotoblas_NEOVERSEN2;
  115. #else
  116. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  117. #endif
  118. #ifdef DYN_ARMV8SVE
  119. extern gotoblas_t gotoblas_ARMV8SVE;
  120. #else
  121. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  122. #endif
  123. #ifdef DYN_ARMV9SME
  124. extern gotoblas_t gotoblas_ARMV9SME;
  125. #else
  126. #define gotoblas_ARMV9SME gotoblas_ARMV8
  127. #endif
  128. #ifdef DYN_CORTEXA55
  129. extern gotoblas_t gotoblas_CORTEXA55;
  130. #else
  131. #define gotoblas_CORTEXA55 gotoblas_ARMV8
  132. #endif
  133. #ifdef DYN_A64FX
  134. extern gotoblas_t gotoblas_A64FX;
  135. #else
  136. #define gotoblas_A64FX gotoblas_ARMV8
  137. #endif
  138. #else
  139. extern gotoblas_t gotoblas_CORTEXA53;
  140. #define gotoblas_CORTEXA55 gotoblas_CORTEXA53
  141. extern gotoblas_t gotoblas_CORTEXA57;
  142. #define gotoblas_CORTEXA72 gotoblas_CORTEXA57
  143. #define gotoblas_CORTEXA73 gotoblas_CORTEXA57
  144. #define gotoblas_FALKOR gotoblas_CORTEXA57
  145. extern gotoblas_t gotoblas_THUNDERX;
  146. extern gotoblas_t gotoblas_THUNDERX2T99;
  147. extern gotoblas_t gotoblas_TSV110;
  148. extern gotoblas_t gotoblas_EMAG8180;
  149. extern gotoblas_t gotoblas_NEOVERSEN1;
  150. #ifndef NO_SVE
  151. extern gotoblas_t gotoblas_NEOVERSEV1;
  152. extern gotoblas_t gotoblas_NEOVERSEN2;
  153. extern gotoblas_t gotoblas_ARMV8SVE;
  154. extern gotoblas_t gotoblas_A64FX;
  155. #ifndef NO_SME
  156. extern gotoblas_t gotoblas_ARMV9SME;
  157. #else
  158. #define gotoblas_ARMV9SME gotoblas_ARMV8SVE
  159. #endif
  160. #else
  161. #define gotoblas_NEOVERSEV1 gotoblas_ARMV8
  162. #define gotoblas_NEOVERSEN2 gotoblas_ARMV8
  163. #define gotoblas_ARMV8SVE gotoblas_ARMV8
  164. #define gotoblas_A64FX gotoblas_ARMV8
  165. #define gotoblas_ARMV9SME gotoblas_ARMV8
  166. #endif
  167. extern gotoblas_t gotoblas_THUNDERX3T110;
  168. #endif
  169. #define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEN2
  170. extern void openblas_warning(int verbose, const char * msg);
  171. #define FALLBACK_VERBOSE 1
  172. #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
  173. #define NUM_CORETYPES 19
  174. /*
  175. * In case asm/hwcap.h is outdated on the build system, make sure
  176. * that HWCAP_CPUID is defined
  177. */
  178. #ifndef HWCAP_CPUID
  179. #define HWCAP_CPUID (1 << 11)
  180. #endif
  181. #ifndef HWCAP_SVE
  182. #define HWCAP_SVE (1 << 22)
  183. #endif
  184. #ifndef HWCAP2_SME
  185. #define HWCAP2_SME 1<<23
  186. #endif
  187. #define get_cpu_ftr(id, var) ({ \
  188. __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
  189. })
  190. static char *corename[] = {
  191. "armv8",
  192. "cortexa53",
  193. "cortexa57",
  194. "cortexa72",
  195. "cortexa73",
  196. "falkor",
  197. "thunderx",
  198. "thunderx2t99",
  199. "tsv110",
  200. "emag8180",
  201. "neoversen1",
  202. "neoversev1",
  203. "neoversev2",
  204. "neoversen2",
  205. "thunderx3t110",
  206. "cortexa55",
  207. "armv8sve",
  208. "a64fx",
  209. "armv9sme",
  210. "unknown"
  211. };
  212. char *gotoblas_corename(void) {
  213. if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
  214. if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
  215. if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
  216. if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
  217. if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
  218. if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
  219. if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
  220. if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
  221. if (gotoblas == &gotoblas_TSV110) return corename[ 8];
  222. if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
  223. if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
  224. if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
  225. if (gotoblas == &gotoblas_NEOVERSEV2) return corename[12];
  226. if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
  227. if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
  228. if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
  229. if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
  230. if (gotoblas == &gotoblas_A64FX) return corename[17];
  231. if (gotoblas == &gotoblas_ARMV9SME) return corename[18];
  232. return corename[NUM_CORETYPES];
  233. }
  234. static gotoblas_t *force_coretype(char *coretype) {
  235. int i ;
  236. int found = -1;
  237. char message[128];
  238. for ( i=0 ; i < NUM_CORETYPES; i++)
  239. {
  240. if (!strncasecmp(coretype, corename[i], 20))
  241. {
  242. found = i;
  243. break;
  244. }
  245. }
  246. switch (found)
  247. {
  248. case 0: return (&gotoblas_ARMV8);
  249. case 1: return (&gotoblas_CORTEXA53);
  250. case 2: return (&gotoblas_CORTEXA57);
  251. case 3: return (&gotoblas_CORTEXA72);
  252. case 4: return (&gotoblas_CORTEXA73);
  253. case 5: return (&gotoblas_FALKOR);
  254. case 6: return (&gotoblas_THUNDERX);
  255. case 7: return (&gotoblas_THUNDERX2T99);
  256. case 8: return (&gotoblas_TSV110);
  257. case 9: return (&gotoblas_EMAG8180);
  258. case 10: return (&gotoblas_NEOVERSEN1);
  259. case 11: return (&gotoblas_NEOVERSEV1);
  260. case 12: return (&gotoblas_NEOVERSEV2);
  261. case 13: return (&gotoblas_NEOVERSEN2);
  262. case 14: return (&gotoblas_THUNDERX3T110);
  263. case 15: return (&gotoblas_CORTEXA55);
  264. case 16: return (&gotoblas_ARMV8SVE);
  265. case 17: return (&gotoblas_A64FX);
  266. case 18: return (&gotoblas_ARMV9SME);
  267. }
  268. snprintf(message, 128, "Core not found: %s\n", coretype);
  269. openblas_warning(1, message);
  270. return NULL;
  271. }
  272. static gotoblas_t *get_coretype(void) {
  273. int implementer, variant, part, arch, revision, midr_el1;
  274. char coremsg[128];
  275. #if defined (OS_DARWIN)
  276. //future #if !defined(NO_SME)
  277. // if (support_sme1()) {
  278. // return &gotoblas_ARMV9SME;
  279. // }
  280. // #endif
  281. return &gotoblas_NEOVERSEN1;
  282. #endif
  283. #if (!defined OS_LINUX && !defined OS_ANDROID)
  284. return NULL;
  285. #else
  286. if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
  287. #ifdef __linux
  288. int i;
  289. int ncores=0;
  290. int prt,cpucap,cpulowperf=0,cpumidperf=0,cpuhiperf=0;
  291. FILE *infile;
  292. char buffer[512], *cpu_part = NULL, *cpu_implementer = NULL;
  293. infile = fopen("/sys/devices/system/cpu/possible","r");
  294. if (infile) {
  295. (void)fgets(buffer, sizeof(buffer), infile);
  296. sscanf(buffer,"0-%d",&ncores);
  297. fclose (infile);
  298. ncores++;
  299. } else {
  300. infile = fopen("/proc/cpuinfo","r");
  301. while (fgets(buffer, sizeof(buffer), infile)) {
  302. if (!strncmp("processor", buffer, 9))
  303. ncores++;
  304. }
  305. }
  306. for (i=0;i<ncores;i++) {
  307. sprintf(buffer,"/sys/devices/system/cpu/cpu%d/regs/identification/midr_el1",i);
  308. infile = fopen(buffer,"r");
  309. if (!infile) return NULL;
  310. (void)fgets(buffer, sizeof(buffer), infile);
  311. midr_el1=strtoul(buffer,NULL,16);
  312. implementer = (midr_el1 >> 24) & 0xFF;
  313. prt = (midr_el1 >> 4) & 0xFFF;
  314. fclose(infile);
  315. sprintf(buffer,"/sys/devices/system/cpu/cpu%d/cpu_capability",i);
  316. infile = fopen(buffer,"r");
  317. if (infile) {
  318. (void)fgets(buffer, sizeof(buffer), infile);
  319. cpucap=strtoul(buffer,NULL,16);
  320. fclose(infile);
  321. if (cpucap >= 1000) cpuhiperf++;
  322. else if (cpucap >=500) cpumidperf++;
  323. else cpulowperf++;
  324. if (cpucap >=1000) part = prt;
  325. } else if (implementer == 0x41 ){
  326. if (prt >= 0xd4b) cpuhiperf++;
  327. else if (prt>= 0xd07) cpumidperf++;
  328. else cpulowperf++;
  329. } else cpulowperf++;
  330. }
  331. if (!part) part = prt;
  332. #else
  333. snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
  334. openblas_warning(1, coremsg);
  335. return NULL;
  336. #endif
  337. } else {
  338. get_cpu_ftr(MIDR_EL1, midr_el1);
  339. /*
  340. * MIDR_EL1
  341. *
  342. * 31 24 23 20 19 16 15 4 3 0
  343. * -----------------------------------------------------------------
  344. * | Implementer | Variant | Architecture | Part Number | Revision |
  345. * -----------------------------------------------------------------
  346. */
  347. implementer = (midr_el1 >> 24) & 0xFF;
  348. part = (midr_el1 >> 4) & 0xFFF;
  349. }
  350. switch(implementer)
  351. {
  352. case 0x41: // ARM
  353. switch (part)
  354. {
  355. case 0xd03: // Cortex A53
  356. return &gotoblas_CORTEXA53;
  357. case 0xd07: // Cortex A57
  358. return &gotoblas_CORTEXA57;
  359. case 0xd08: // Cortex A72
  360. return &gotoblas_CORTEXA72;
  361. case 0xd09: // Cortex A73
  362. return &gotoblas_CORTEXA73;
  363. case 0xd0c: // Neoverse N1
  364. return &gotoblas_NEOVERSEN1;
  365. #ifndef NO_SVE
  366. case 0xd49:
  367. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  368. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  369. return &gotoblas_NEOVERSEN1;
  370. } else
  371. return &gotoblas_NEOVERSEN2;
  372. case 0xd40:
  373. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  374. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  375. return &gotoblas_NEOVERSEN1;
  376. }else
  377. return &gotoblas_NEOVERSEV1;
  378. case 0xd4f:
  379. if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
  380. openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
  381. return &gotoblas_NEOVERSEN1;
  382. } else {
  383. return &gotoblas_NEOVERSEV2;
  384. }
  385. #endif
  386. case 0xd05: // Cortex A55
  387. return &gotoblas_CORTEXA55;
  388. }
  389. break;
  390. case 0x42: // Broadcom
  391. switch (part)
  392. {
  393. case 0x516: // Vulcan
  394. return &gotoblas_THUNDERX2T99;
  395. }
  396. break;
  397. case 0x43: // Cavium
  398. switch (part)
  399. {
  400. case 0x0a1: // ThunderX
  401. return &gotoblas_THUNDERX;
  402. case 0x0af: // ThunderX2
  403. return &gotoblas_THUNDERX2T99;
  404. case 0x0b8: // ThunderX3
  405. return &gotoblas_THUNDERX3T110;
  406. }
  407. break;
  408. case 0x46: // Fujitsu
  409. switch (part)
  410. {
  411. #ifndef NO_SVE
  412. case 0x001: // A64FX
  413. return &gotoblas_A64FX;
  414. #endif
  415. }
  416. break;
  417. case 0x48: // HiSilicon
  418. switch (part)
  419. {
  420. case 0xd01: // tsv110
  421. return &gotoblas_TSV110;
  422. }
  423. break;
  424. case 0x50: // Ampere/AppliedMicro
  425. switch (part)
  426. {
  427. case 0x000: // Skylark/EMAG8180
  428. return &gotoblas_EMAG8180;
  429. }
  430. break;
  431. case 0xc0: // Ampere
  432. switch(part)
  433. {
  434. case 0xac3:
  435. case 0xac4:
  436. return &gotoblas_NEOVERSEN1;
  437. }
  438. break;
  439. case 0x51: // Qualcomm
  440. switch (part)
  441. {
  442. case 0xc00: // Falkor
  443. return &gotoblas_FALKOR;
  444. }
  445. break;
  446. case 0x61: // Apple
  447. //future if (support_sme1()) return &gotoblas_ARMV9SME;
  448. return &gotoblas_NEOVERSEN1;
  449. break;
  450. default:
  451. snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
  452. openblas_warning(1, coremsg);
  453. }
  454. #if !defined(NO_SME)
  455. if (support_sme1()) {
  456. return &gotoblas_ARMV9SME;
  457. }
  458. #endif
  459. #ifndef NO_SVE
  460. if ((getauxval(AT_HWCAP) & HWCAP_SVE)) {
  461. return &gotoblas_ARMV8SVE;
  462. }
  463. #endif
  464. return NULL;
  465. #endif
  466. }
  467. void gotoblas_dynamic_init(void) {
  468. char coremsg[128];
  469. char coren[22];
  470. char *p;
  471. if (gotoblas) return;
  472. p = getenv("OPENBLAS_CORETYPE");
  473. if ( p )
  474. {
  475. gotoblas = force_coretype(p);
  476. }
  477. else
  478. {
  479. gotoblas = get_coretype();
  480. }
  481. if (gotoblas == NULL)
  482. {
  483. snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
  484. openblas_warning(1, coremsg);
  485. gotoblas = &gotoblas_ARMV8;
  486. }
  487. if (gotoblas && gotoblas->init) {
  488. strncpy(coren, gotoblas_corename(), 20);
  489. sprintf(coremsg, "Core: %s\n", coren);
  490. openblas_warning(2, coremsg);
  491. gotoblas -> init();
  492. } else {
  493. openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
  494. exit(1);
  495. }
  496. }
  497. void gotoblas_dynamic_quit(void) {
  498. gotoblas = NULL;
  499. }
  500. int support_sme1(void) {
  501. int ret = 0;
  502. #if (defined OS_LINUX || defined OS_ANDROID)
  503. ret = getauxval(AT_HWCAP2) & HWCAP2_SME;
  504. if(getauxval(AT_HWCAP2) & HWCAP2_SME){
  505. ret = 1;
  506. }
  507. #endif
  508. #if defined(__APPLE__)
  509. sysctlbyname("hw.optional.arm.FEAT_SME",&value64,&length64,NULL,0);
  510. ret = value64;
  511. #endif
  512. return ret;
  513. }