You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpuid_loongarch64.c 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*****************************************************************************
  2. Copyright (c) 2011-2024, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written
  16. permission.
  17. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  23. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  25. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  26. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. **********************************************************************************/
  28. #include <stdint.h>
  29. #include <stdio.h>
  30. #include <math.h>
  31. #include <string.h>
  32. #include <sys/auxv.h>
  33. #define CPU_LA64_GENERIC 0
  34. #define CPU_LA264 1
  35. #define CPU_LA364 2
  36. #define CPU_LA464 3
  37. #define CPU_LA664 4
  38. #define CORE_LA64_GENERIC 0
  39. #define CORE_LA264 1
  40. #define CORE_LA464 2
  41. #define LA_HWCAP_LSX (1U << 4)
  42. #define LA_HWCAP_LASX (1U << 5)
  43. #define LOONGARCH_CFG0 0x00
  44. #define LOONGARCH_CFG2 0x02
  45. #define LOONGARCH_CFG10 0x10
  46. #define LOONGARCH_CFG11 0x11
  47. #define LOONGARCH_CFG12 0x12
  48. #define LOONGARCH_CFG13 0x13
  49. #define LOONGARCH_CFG14 0x14
  50. #define LASX_MASK 1<<7
  51. #define LSX_MASK 1<<6
  52. #define PRID_SERIES_MASK 0xf000
  53. #define PRID_SERIES_LA264 0xa000
  54. #define PRID_SERIES_LA364 0xb000
  55. #define PRID_SERIES_LA464 0xc000
  56. #define PRID_SERIES_LA664 0xd000
  57. #define CACHE_INFO_L1_IU 0
  58. #define CACHE_INFO_L1_D 1
  59. #define CACHE_INFO_L2_IU 2
  60. #define CACHE_INFO_L2_D 3
  61. #define CACHE_INFO_L3_IU 4
  62. #define CACHE_INFO_L3_D 5
  63. #define L1_IU_PRESENT_MASK 0x0001
  64. #define L1_IU_UNITY_MASK 0x0002
  65. #define L1_D_PRESENT_MASK 0x0004
  66. #define L2_IU_PRESENT_MASK 0x0008
  67. #define L2_IU_UNITY_MASK 0x0010
  68. #define L2_D_PRESENT_MASK 0x0080
  69. #define L3_IU_PRESENT_MASK 0x0400
  70. #define L3_IU_UNITY_MASK 0x0800
  71. #define L3_D_PRESENT_MASK 0x4000
  72. #define CACHE_WAY_MINUS_1_MASK 0x0000ffff
  73. #define CACHE_INDEX_LOG2_MASK 0x00ff0000
  74. #define CACHE_LINESIZE_LOG2_MASK 0x7f000000
  75. typedef struct {
  76. int size;
  77. int associative;
  78. int linesize;
  79. int unify;
  80. int present;
  81. } cache_info_t;
  82. /* Using microarchitecture representation */
  83. static char *cpuname[] = {
  84. "LA64_GENERIC",
  85. "LA264", /* Loongson 64bit, 2-issue, Like 2K1000LA */
  86. "LA364", /* Loongson 64bit, 3-issue, Like 2K2000 */
  87. "LA464", /* Loongson 64bit, 4-issue, Like 3A5000, 3C5000L, 3C5000 and 3D5000 */
  88. "LA664" /* Loongson 64bit, 6-issue, Like 3A6000, 3C6000 and 3D6000 */
  89. };
  90. static char *cpuname_lower[] = {
  91. "la64_generic",
  92. "la264",
  93. "la364",
  94. "la464",
  95. "la664"
  96. };
  97. static char *corename[] = {
  98. "LA64_GENERIC", /* Implies using scalar instructions for optimization */
  99. "LA264", /* Implies using LSX instructions for optimization */
  100. "LA464", /* Implies using LASX instructions for optimization */
  101. };
  102. static char *corename_lower[] = {
  103. "la64_generic",
  104. "la264",
  105. "la464",
  106. };
  107. /*
  108. * Obtain cache and processor identification
  109. * through the cpucfg command.
  110. */
  111. static void get_cacheinfo(int type, cache_info_t *cacheinfo) {
  112. cache_info_t cache_info;
  113. memset(&cache_info, 0, sizeof(cache_info));
  114. uint32_t reg_10 = 0;
  115. __asm__ volatile (
  116. "cpucfg %0, %1 \n\t"
  117. : "+&r"(reg_10)
  118. : "r"(LOONGARCH_CFG10)
  119. );
  120. switch (type) {
  121. case CACHE_INFO_L1_IU:
  122. if (reg_10 & L1_IU_PRESENT_MASK) {
  123. uint32_t reg_11 = 0;
  124. cache_info.present = reg_10 & L1_IU_PRESENT_MASK;
  125. cache_info.unify = reg_10 & L1_IU_UNITY_MASK;
  126. __asm__ volatile (
  127. "cpucfg %0, %1 \n\t"
  128. : "+&r"(reg_11)
  129. : "r"(LOONGARCH_CFG11)
  130. );
  131. cache_info.associative = (reg_11 & CACHE_WAY_MINUS_1_MASK) + 1;
  132. cache_info.linesize = 1 << ((reg_11 & CACHE_LINESIZE_LOG2_MASK) >> 24);
  133. cache_info.size = cache_info.associative * cache_info.linesize *
  134. (1 << ((reg_11 & CACHE_INDEX_LOG2_MASK) >> 16));
  135. }
  136. break;
  137. case CACHE_INFO_L1_D:
  138. if (reg_10 & L1_D_PRESENT_MASK) {
  139. uint32_t reg_12 = 0;
  140. cache_info.present = reg_10 & L1_D_PRESENT_MASK;
  141. __asm__ volatile (
  142. "cpucfg %0, %1 \n\t"
  143. : "+&r"(reg_12)
  144. : "r"(LOONGARCH_CFG12)
  145. );
  146. cache_info.associative = (reg_12 & CACHE_WAY_MINUS_1_MASK) + 1;
  147. cache_info.linesize = 1 << ((reg_12 & CACHE_LINESIZE_LOG2_MASK) >> 24);
  148. cache_info.size = cache_info.associative * cache_info.linesize *
  149. (1 << ((reg_12 & CACHE_INDEX_LOG2_MASK) >> 16));
  150. }
  151. break;
  152. case CACHE_INFO_L2_IU:
  153. if (reg_10 & L2_IU_PRESENT_MASK) {
  154. uint32_t reg_13 = 0;
  155. cache_info.present = reg_10 & L2_IU_PRESENT_MASK;
  156. cache_info.unify = reg_10 & L2_IU_UNITY_MASK;
  157. __asm__ volatile (
  158. "cpucfg %0, %1 \n\t"
  159. : "+&r"(reg_13)
  160. : "r"(LOONGARCH_CFG13)
  161. );
  162. cache_info.associative = (reg_13 & CACHE_WAY_MINUS_1_MASK) + 1;
  163. cache_info.linesize = 1 << ((reg_13 & CACHE_LINESIZE_LOG2_MASK) >> 24);
  164. cache_info.size = cache_info.associative * cache_info.linesize *
  165. (1 << ((reg_13 & CACHE_INDEX_LOG2_MASK) >> 16));
  166. }
  167. break;
  168. case CACHE_INFO_L2_D:
  169. if (reg_10 & L2_D_PRESENT_MASK) {
  170. cache_info.present = reg_10 & L2_D_PRESENT_MASK;
  171. // No date fetch
  172. }
  173. break;
  174. case CACHE_INFO_L3_IU:
  175. if (reg_10 & L3_IU_PRESENT_MASK) {
  176. uint32_t reg_14 = 0;
  177. cache_info.present = reg_10 & L3_IU_PRESENT_MASK;
  178. cache_info.unify = reg_10 & L3_IU_UNITY_MASK;
  179. __asm__ volatile (
  180. "cpucfg %0, %1 \n\t"
  181. : "+&r"(reg_14)
  182. : "r"(LOONGARCH_CFG14)
  183. );
  184. cache_info.associative = (reg_14 & CACHE_WAY_MINUS_1_MASK) + 1;
  185. cache_info.linesize = 1 << ((reg_14 & CACHE_LINESIZE_LOG2_MASK) >> 24);
  186. cache_info.size = cache_info.associative * cache_info.linesize *
  187. (1 << ((reg_14 & CACHE_INDEX_LOG2_MASK) >> 16));
  188. }
  189. break;
  190. case CACHE_INFO_L3_D:
  191. if (reg_10 & L3_D_PRESENT_MASK) {
  192. cache_info.present = reg_10 & L3_D_PRESENT_MASK;
  193. // No data fetch
  194. }
  195. break;
  196. default:
  197. break;
  198. }
  199. *cacheinfo = cache_info;
  200. }
  201. static uint32_t get_prid() {
  202. uint32_t reg = 0;
  203. __asm__ volatile (
  204. "cpucfg %0, %1 \n\t"
  205. : "+&r"(reg)
  206. : "r"(LOONGARCH_CFG0)
  207. );
  208. return reg;
  209. }
  210. static void get_cpucount(uint32_t *count) {
  211. uint32_t num = 0;
  212. FILE *f = fopen("/proc/cpuinfo", "r");
  213. if (!f) return;
  214. char buf[200];
  215. while (fgets(buf, sizeof(buf), f))
  216. {
  217. if (!strncmp("processor", buf, 9))
  218. num ++;
  219. }
  220. fclose(f);
  221. *count = num;
  222. }
  223. /* Detect whether the OS supports the LASX instruction set */
  224. static int os_support_lasx() {
  225. int hwcap = (int)getauxval(AT_HWCAP);
  226. if (hwcap & LA_HWCAP_LASX)
  227. return 1;
  228. else
  229. return 0;
  230. }
  231. /* Detect whether the OS supports the LSX instruction set */
  232. static int os_support_lsx() {
  233. int hwcap = (int)getauxval(AT_HWCAP);
  234. if (hwcap & LA_HWCAP_LSX)
  235. return 1;
  236. else
  237. return 0;
  238. }
  239. int get_coretype(void) {
  240. uint32_t prid = get_prid();
  241. switch (prid & PRID_SERIES_MASK) {
  242. case (PRID_SERIES_LA464):
  243. case (PRID_SERIES_LA664):
  244. if (os_support_lasx())
  245. return CORE_LA464;
  246. else if (os_support_lsx())
  247. return CORE_LA264;
  248. else
  249. return CORE_LA64_GENERIC;
  250. break;
  251. case (PRID_SERIES_LA264):
  252. case (PRID_SERIES_LA364):
  253. if (os_support_lsx())
  254. return CORE_LA264;
  255. else
  256. return CORE_LA64_GENERIC;
  257. break;
  258. default:
  259. return CORE_LA64_GENERIC;
  260. break;
  261. }
  262. }
  263. int get_cputype(void) {
  264. uint32_t prid = get_prid();
  265. switch (prid & PRID_SERIES_MASK) {
  266. case (PRID_SERIES_LA264):
  267. return CPU_LA264;
  268. break;
  269. case (PRID_SERIES_LA364):
  270. return CPU_LA364;
  271. break;
  272. case (PRID_SERIES_LA464):
  273. return CPU_LA464;
  274. break;
  275. case (PRID_SERIES_LA664):
  276. return CPU_LA664;
  277. break;
  278. default:
  279. return CPU_LA64_GENERIC;
  280. break;
  281. }
  282. }
  283. char *get_corename(void) {
  284. return corename[get_coretype()];
  285. }
  286. void get_libname(void){
  287. printf("%s", corename_lower[get_coretype()]);
  288. }
  289. void get_architecture(void) {
  290. printf("LOONGARCH64");
  291. }
  292. void get_subarchitecture(void) {
  293. printf("%s", cpuname[get_cputype()]);
  294. }
  295. void get_subdirname(void) {
  296. printf("loongarch64");
  297. }
  298. void get_cpuconfig(void) {
  299. cache_info_t info;
  300. uint32_t num_cores = 0;
  301. printf("#define %s\n", corename[get_coretype()]); // Core name
  302. printf("#define CPU_NAME %s\n", cpuname[get_cputype()]); // Cpu microarchitecture name
  303. get_cacheinfo(CACHE_INFO_L1_IU, &info);
  304. if (info.present) {
  305. if (info.unify) { // Unified cache, without distinguishing between instructions and data
  306. printf("#define L1_SIZE %d\n", info.size);
  307. printf("#define L1_ASSOCIATIVE %d\n", info.associative);
  308. printf("#define L1_LINESIZE %d\n", info.linesize);
  309. } else {
  310. printf("#define L1_CODE_SIZE %d\n", info.size);
  311. printf("#define L1_CODE_ASSOCIATIVE %d\n", info.associative);
  312. printf("#define L1_CODE_LINESIZE %d\n", info.linesize);
  313. }
  314. }
  315. if (!info.unify) {
  316. get_cacheinfo(CACHE_INFO_L1_D, &info);
  317. if (info.present) {
  318. printf("#define L1_DATA_SIZE %d\n", info.size);
  319. printf("#define L1_DATA_ASSOCIATIVE %d\n", info.associative);
  320. printf("#define L1_DATA_LINESIZE %d\n", info.linesize);
  321. }
  322. }
  323. get_cacheinfo(CACHE_INFO_L2_IU, &info);
  324. if (info.present > 0) {
  325. if (info.unify) {
  326. printf("#define L2_SIZE %d\n", info.size);
  327. printf("#define L2_ASSOCIATIVE %d\n", info.associative);
  328. printf("#define L2_LINESIZE %d\n", info.linesize);
  329. } else {
  330. printf("#define L2_CODE_SIZE %d\n", info.size);
  331. printf("#define L2_CODE_ASSOCIATIVE %d\n", info.associative);
  332. printf("#define L2_CODE_LINESIZE %d\n", info.linesize);
  333. }
  334. }
  335. get_cacheinfo(CACHE_INFO_L3_IU, &info);
  336. if (info.present > 0) {
  337. if (info.unify) {
  338. printf("#define L3_SIZE %d\n", info.size);
  339. printf("#define L3_ASSOCIATIVE %d\n", info.associative);
  340. printf("#define L3_LINESIZE %d\n", info.linesize);
  341. } else {
  342. printf("#define L3_CODE_SIZE %d\n", info.size);
  343. printf("#define L3_CODE_ASSOCIATIVE %d\n", info.associative);
  344. printf("#define L3_CODE_LINESIZE %d\n", info.linesize);
  345. }
  346. }
  347. if(os_support_lsx) printf("#define HAVE_LSX\n");
  348. if(os_support_lasx) printf("#define HAVE_LASX\n");
  349. get_cpucount(&num_cores);
  350. if (num_cores)
  351. printf("#define NUM_CORES %d\n", num_cores);
  352. //TODO: It’s unclear what this entry represents, but it is indeed necessary.
  353. //It has been set based on reference to other platforms.
  354. printf("#define DTB_DEFAULT_ENTRIES 64\n");
  355. }