You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prebuild.cmake 48 kB

5 years ago
5 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527
  1. ##
  2. ## Author: Hank Anderson <hank@statease.com>
  3. ## Description: Ported from OpenBLAS/Makefile.prebuild
  4. ## This is triggered by system.cmake and runs before any of the code is built.
  5. ## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
  6. ## Next it runs f_check and appends some fortran information to the files.
  7. ## Then it runs getarch and getarch_2nd for even more environment information.
  8. ## Finally it builds gen_config_h for use at build time to generate config.h.
  9. # CMake vars set by this file:
  10. # CORE
  11. # LIBCORE
  12. # NUM_CORES
  13. # HAVE_MMX
  14. # HAVE_SSE
  15. # HAVE_SSE2
  16. # HAVE_SSE3
  17. # MAKE
  18. # SBGEMM_UNROLL_M
  19. # SBGEMM_UNROLL_N
  20. # SGEMM_UNROLL_M
  21. # SGEMM_UNROLL_N
  22. # DGEMM_UNROLL_M
  23. # DGEMM_UNROLL_M
  24. # QGEMM_UNROLL_N
  25. # QGEMM_UNROLL_N
  26. # CGEMM_UNROLL_M
  27. # CGEMM_UNROLL_M
  28. # ZGEMM_UNROLL_N
  29. # ZGEMM_UNROLL_N
  30. # XGEMM_UNROLL_M
  31. # XGEMM_UNROLL_N
  32. # CGEMM3M_UNROLL_M
  33. # CGEMM3M_UNROLL_N
  34. # ZGEMM3M_UNROLL_M
  35. # ZGEMM3M_UNROLL_M
  36. # XGEMM3M_UNROLL_N
  37. # XGEMM3M_UNROLL_N
  38. # CPUIDEMU = ../../cpuid/table.o
  39. if (DEFINED CPUIDEMU)
  40. set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
  41. endif ()
  42. if (BUILD_KERNEL)
  43. # set the C flags for just this file
  44. set(GETARCH2_FLAGS "-DBUILD_KERNEL")
  45. set(TARGET_CONF "config_kernel.h")
  46. set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE})
  47. else()
  48. set(TARGET_CONF "config.h")
  49. set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR})
  50. endif ()
  51. set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp")
  52. # c_check
  53. set(FU "")
  54. if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
  55. set(FU "_")
  56. endif()
  57. if(MINGW AND NOT MINGW64)
  58. set(FU "_")
  59. endif()
  60. set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
  61. if (${COMPILER_ID} STREQUAL "GNU")
  62. set(COMPILER_ID "GCC")
  63. endif ()
  64. string(TOUPPER ${ARCH} UC_ARCH)
  65. file(WRITE ${TARGET_CONF_TEMP}
  66. "#define OS_${HOST_OS}\t1\n"
  67. "#define ARCH_${UC_ARCH}\t1\n"
  68. "#define C_${COMPILER_ID}\t1\n"
  69. "#define __${BINARY}BIT__\t1\n"
  70. "#define FUNDERSCORE\t${FU}\n")
  71. if (${HOST_OS} STREQUAL "WINDOWSSTORE")
  72. file(APPEND ${TARGET_CONF_TEMP}
  73. "#define OS_WINNT\t1\n")
  74. endif ()
  75. # f_check
  76. if (NOT NOFORTRAN)
  77. include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
  78. else ()
  79. file(APPEND ${TARGET_CONF_TEMP}
  80. "#define BUNDERSCORE _\n"
  81. "#define NEEDBUNDERSCORE 1\n")
  82. set(BU "_")
  83. endif ()
  84. # Cannot run getarch on target if we are cross-compiling
  85. if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE"))
  86. # Write to config as getarch would
  87. if (DEFINED TARGET_CORE)
  88. set(TCORE ${TARGET_CORE})
  89. else()
  90. set(TCORE ${CORE})
  91. endif()
  92. # TODO: Set up defines that getarch sets up based on every other target
  93. # Perhaps this should be inside a different file as it grows larger
  94. file(APPEND ${TARGET_CONF_TEMP}
  95. "#define ${TCORE}\n"
  96. "#define CORE_${TCORE}\n"
  97. "#define CHAR_CORENAME \"${TCORE}\"\n")
  98. if ("${TCORE}" STREQUAL "CORE2")
  99. file(APPEND ${TARGET_CONF_TEMP}
  100. "#define L1_DATA_SIZE\t32768\n"
  101. "#define L1_DATA_LINESIZE\t64\n"
  102. "#define L2_SIZE\t1048576\n"
  103. "#define L2_LINESIZE\t64\n"
  104. "#define DTB_DEFAULT_ENTRIES\t256\n"
  105. "#define DTB_SIZE\t4096\n"
  106. "#define HAVE_CMOV\n"
  107. "#define HAVE_MMX\n"
  108. "#define HAVE_SSE\n"
  109. "#define HAVE_SSE2\n"
  110. "#define HAVE_SSE3\n"
  111. "#define HAVE_SSSE3\n"
  112. "#define SLOCAL_BUFFER_SIZE\t16384\n"
  113. "#define DLOCAL_BUFFER_SIZE\t16384\n"
  114. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  115. "#define ZLOCAL_BUFFER_SIZE\t16384\n")
  116. set(HAVE_SSE 1)
  117. set(HAVE_SSE2 1)
  118. set(HAVE_SSE3 1)
  119. set(HAVE_SSSE3 1)
  120. set(SBGEMM_UNROLL_M 8)
  121. set(SBGEMM_UNROLL_N 4)
  122. set(SGEMM_UNROLL_M 8)
  123. set(SGEMM_UNROLL_N 4)
  124. set(DGEMM_UNROLL_M 4)
  125. set(DGEMM_UNROLL_N 4)
  126. set(CGEMM_UNROLL_M 4)
  127. set(CGEMM_UNROLL_N 2)
  128. set(ZGEMM_UNROLL_M 2)
  129. set(ZGEMM_UNROLL_N 2)
  130. set(CGEMM3M_UNROLL_M 8)
  131. set(CGEMM3M_UNROLL_N 4)
  132. set(ZGEMM3M_UNROLL_M 4)
  133. set(ZGEMM3M_UNROLL_N 4)
  134. elseif ("${TCORE}" STREQUAL "ATOM")
  135. file(APPEND ${TARGET_CONF_TEMP}
  136. "#define L1_DATA_SIZE\t24576\n"
  137. "#define L1_DATA_LINESIZE\t64\n"
  138. "#define L2_SIZE\t524288\n"
  139. "#define L2_LINESIZE\t64\n"
  140. "#define DTB_DEFAULT_ENTRIES\t64\n"
  141. "#define DTB_SIZE\t4096\n"
  142. "#define HAVE_CMOV\n"
  143. "#define HAVE_MMX\n"
  144. "#define HAVE_SSE\n"
  145. "#define HAVE_SSE2\n"
  146. "#define HAVE_SSE3\n"
  147. "#define HAVE_SSSE3\n"
  148. "#define SLOCAL_BUFFER_SIZE\t16384\n"
  149. "#define DLOCAL_BUFFER_SIZE\t8192\n"
  150. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  151. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  152. set(HAVE_SSE 1)
  153. set(HAVE_SSE2 1)
  154. set(HAVE_SSE3 1)
  155. set(HAVE_SSSE3 1)
  156. set(SBGEMM_UNROLL_M 8)
  157. set(SBGEMM_UNROLL_N 4)
  158. set(SGEMM_UNROLL_M 8)
  159. set(SGEMM_UNROLL_N 4)
  160. set(DGEMM_UNROLL_M 4)
  161. set(DGEMM_UNROLL_N 2)
  162. set(CGEMM_UNROLL_M 4)
  163. set(CGEMM_UNROLL_N 2)
  164. set(ZGEMM_UNROLL_M 2)
  165. set(ZGEMM_UNROLL_N 1)
  166. set(CGEMM3M_UNROLL_M 8)
  167. set(CGEMM3M_UNROLL_N 4)
  168. set(ZGEMM3M_UNROLL_M 4)
  169. set(ZGEMM3M_UNROLL_N 4)
  170. elseif ("${TCORE}" STREQUAL "PRESCOTT")
  171. file(APPEND ${TARGET_CONF_TEMP}
  172. "#define L1_DATA_SIZE\t16384\n"
  173. "#define L1_DATA_LINESIZE\t64\n"
  174. "#define L2_SIZE\t1048576\n"
  175. "#define L2_LINESIZE\t64\n"
  176. "#define DTB_DEFAULT_ENTRIES\t64\n"
  177. "#define DTB_SIZE\t4096\n"
  178. "#define HAVE_CMOV\n"
  179. "#define HAVE_MMX\n"
  180. "#define HAVE_SSE\n"
  181. "#define HAVE_SSE2\n"
  182. "#define HAVE_SSE3\n"
  183. "#define SLOCAL_BUFFER_SIZE\t8192\n"
  184. "#define DLOCAL_BUFFER_SIZE\t8192\n"
  185. "#define CLOCAL_BUFFER_SIZE\t8192\n"
  186. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  187. set(HAVE_SSE 1)
  188. set(HAVE_SSE2 1)
  189. set(HAVE_SSE3 1)
  190. set(SBGEMM_UNROLL_M 8)
  191. set(SBGEMM_UNROLL_N 4)
  192. set(SGEMM_UNROLL_M 8)
  193. set(SGEMM_UNROLL_N 4)
  194. set(DGEMM_UNROLL_M 4)
  195. set(DGEMM_UNROLL_N 4)
  196. set(CGEMM_UNROLL_M 4)
  197. set(CGEMM_UNROLL_N 2)
  198. set(ZGEMM_UNROLL_M 2)
  199. set(ZGEMM_UNROLL_N 2)
  200. set(CGEMM3M_UNROLL_M 8)
  201. set(CGEMM3M_UNROLL_N 4)
  202. set(ZGEMM3M_UNROLL_M 4)
  203. set(ZGEMM3M_UNROLL_N 4)
  204. elseif ("${TCORE}" STREQUAL "NEHALEM")
  205. file(APPEND ${TARGET_CONF_TEMP}
  206. "#define L1_DATA_SIZE\t32768\n"
  207. "#define L1_DATA_LINESIZE\t64\n"
  208. "#define L2_SIZE\t262144\n"
  209. "#define L2_LINESIZE\t64\n"
  210. "#define DTB_DEFAULT_ENTRIES\t64\n"
  211. "#define DTB_SIZE\t4096\n"
  212. "#define HAVE_CMOV\n"
  213. "#define HAVE_MMX\n"
  214. "#define HAVE_SSE\n"
  215. "#define HAVE_SSE2\n"
  216. "#define HAVE_SSE3\n"
  217. "#define HAVE_SSSE3\n"
  218. "#define HAVE_SSE4_1\n"
  219. "#define HAVE_SSE4_2\n"
  220. "#define SLOCAL_BUFFER_SIZE\t65535\n"
  221. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  222. "#define CLOCAL_BUFFER_SIZE\t65536\n"
  223. "#define ZLOCAL_BUFFER_SIZE\t32768\n")
  224. set(HAVE_SSE 1)
  225. set(HAVE_SSE2 1)
  226. set(HAVE_SSE3 1)
  227. set(HAVE_SSSE3 1)
  228. set(HAVE_SSE4_1 1)
  229. set(HAVE_SSE4_2 1)
  230. set(SBGEMM_UNROLL_M 8)
  231. set(SBGEMM_UNROLL_N 4)
  232. set(SGEMM_UNROLL_M 4)
  233. set(SGEMM_UNROLL_N 8)
  234. set(DGEMM_UNROLL_M 2)
  235. set(DGEMM_UNROLL_N 8)
  236. set(CGEMM_UNROLL_M 2)
  237. set(CGEMM_UNROLL_N 4)
  238. set(ZGEMM_UNROLL_M 1)
  239. set(ZGEMM_UNROLL_N 4)
  240. set(CGEMM3M_UNROLL_M 4)
  241. set(CGEMM3M_UNROLL_N 8)
  242. set(ZGEMM3M_UNROLL_M 2)
  243. set(ZGEMM3M_UNROLL_N 8)
  244. elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
  245. file(APPEND ${TARGET_CONF_TEMP}
  246. "#define L1_DATA_SIZE\t32768\n"
  247. "#define L1_DATA_LINESIZE\t64\n"
  248. "#define L2_SIZE\t262144\n"
  249. "#define L2_LINESIZE\t64\n"
  250. "#define DTB_DEFAULT_ENTRIES\t64\n"
  251. "#define DTB_SIZE\t4096\n"
  252. "#define HAVE_CMOV\n"
  253. "#define HAVE_MMX\n"
  254. "#define HAVE_SSE\n"
  255. "#define HAVE_SSE2\n"
  256. "#define HAVE_SSE3\n"
  257. "#define HAVE_SSSE3\n"
  258. "#define HAVE_SSE4_1\n"
  259. "#define HAVE_SSE4_2\n"
  260. "#define HAVE_AVX\n"
  261. "#define SLOCAL_BUFFER_SIZE\t24576\n"
  262. "#define DLOCAL_BUFFER_SIZE\t16384\n"
  263. "#define CLOCAL_BUFFER_SIZE\t32768\n"
  264. "#define ZLOCAL_BUFFER_SIZE\t24576\n")
  265. set(HAVE_SSE 1)
  266. set(HAVE_SSE2 1)
  267. set(HAVE_SSE3 1)
  268. set(HAVE_SSSE3 1)
  269. set(HAVE_SSE4_1 1)
  270. set(HAVE_SSE4_2 1)
  271. set(HAVE_AVX 1)
  272. set(SBGEMM_UNROLL_M 8)
  273. set(SBGEMM_UNROLL_N 4)
  274. set(SGEMM_UNROLL_M 16)
  275. set(SGEMM_UNROLL_N 4)
  276. set(DGEMM_UNROLL_M 8)
  277. set(DGEMM_UNROLL_N 4)
  278. set(CGEMM_UNROLL_M 8)
  279. set(CGEMM_UNROLL_N 2)
  280. set(ZGEMM_UNROLL_M 1)
  281. set(ZGEMM_UNROLL_N 4)
  282. set(CGEMM3M_UNROLL_M 4)
  283. set(CGEMM3M_UNROLL_N 8)
  284. set(ZGEMM3M_UNROLL_M 2)
  285. set(ZGEMM3M_UNROLL_N 8)
  286. elseif ("${TCORE}" STREQUAL "HASWELL")
  287. file(APPEND ${TARGET_CONF_TEMP}
  288. "#define L1_DATA_SIZE\t32768\n"
  289. "#define L1_DATA_LINESIZE\t64\n"
  290. "#define L2_SIZE\t262144\n"
  291. "#define L2_LINESIZE\t64\n"
  292. "#define DTB_DEFAULT_ENTRIES\t64\n"
  293. "#define DTB_SIZE\t4096\n"
  294. "#define HAVE_CMOV\n"
  295. "#define HAVE_MMX\n"
  296. "#define HAVE_SSE\n"
  297. "#define HAVE_SSE2\n"
  298. "#define HAVE_SSE3\n"
  299. "#define HAVE_SSSE3\n"
  300. "#define HAVE_SSE4_1\n"
  301. "#define HAVE_SSE4_2\n"
  302. "#define HAVE_AVX\n"
  303. "#define HAVE_AVX2\n"
  304. "#define HAVE_FMA3\n"
  305. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  306. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  307. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  308. "#define ZLOCAL_BUFFER_SIZE\t12288\n")
  309. set(HAVE_SSE 1)
  310. set(HAVE_SSE2 1)
  311. set(HAVE_SSE3 1)
  312. set(HAVE_SSSE3 1)
  313. set(HAVE_SSE4_1 1)
  314. set(HAVE_SSE4_2 1)
  315. set(HAVE_AVX 1)
  316. set(HAVE_AVX2 1)
  317. set(HAVE_FMA3 1)
  318. set(SBGEMM_UNROLL_M 8)
  319. set(SBGEMM_UNROLL_N 4)
  320. set(SGEMM_UNROLL_M 8)
  321. set(SGEMM_UNROLL_N 4)
  322. set(DGEMM_UNROLL_M 4)
  323. set(DGEMM_UNROLL_N 8)
  324. set(CGEMM_UNROLL_M 8)
  325. set(CGEMM_UNROLL_N 2)
  326. set(ZGEMM_UNROLL_M 4)
  327. set(ZGEMM_UNROLL_N 2)
  328. set(CGEMM3M_UNROLL_M 8)
  329. set(CGEMM3M_UNROLL_N 4)
  330. set(ZGEMM3M_UNROLL_M 4)
  331. set(ZGEMM3M_UNROLL_N 4)
  332. elseif ("${TCORE}" STREQUAL "SKYLAKEX")
  333. file(APPEND ${TARGET_CONF_TEMP}
  334. "#define L1_DATA_SIZE\t32768\n"
  335. "#define L1_DATA_LINESIZE\t64\n"
  336. "#define L2_SIZE\t262144\n"
  337. "#define L2_LINESIZE\t64\n"
  338. "#define DTB_DEFAULT_ENTRIES\t64\n"
  339. "#define DTB_SIZE\t4096\n"
  340. "#define HAVE_CMOV\n"
  341. "#define HAVE_MMX\n"
  342. "#define HAVE_SSE\n"
  343. "#define HAVE_SSE2\n"
  344. "#define HAVE_SSE3\n"
  345. "#define HAVE_SSSE3\n"
  346. "#define HAVE_SSE4_1\n"
  347. "#define HAVE_SSE4_2\n"
  348. "#define HAVE_AVX\n"
  349. "#define HAVE_AVX2\n"
  350. "#define HAVE_FMA3\n"
  351. "#define HAVE_AVX512VL\n"
  352. "#define SLOCAL_BUFFER_SIZE\t28672\n"
  353. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  354. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  355. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  356. set(HAVE_CMOV 1)
  357. set(HAVE_MMX 1)
  358. set(HAVE_SSE 1)
  359. set(HAVE_SSE2 1)
  360. set(HAVE_SSE3 1)
  361. set(HAVE_SSSE3 1)
  362. set(HAVE_SSE4_1 1)
  363. set(HAVE_SSE4_2 1)
  364. set(HAVE_AVX 1)
  365. set(HAVE_AVX2 1)
  366. set(HAVE_FMA3 1)
  367. set(HAVE_AVX512VL 1)
  368. set(SBGEMM_UNROLL_M 8)
  369. set(SBGEMM_UNROLL_N 4)
  370. set(SGEMM_UNROLL_M 16)
  371. set(SGEMM_UNROLL_N 4)
  372. set(DGEMM_UNROLL_M 16)
  373. set(DGEMM_UNROLL_N 2)
  374. set(CGEMM_UNROLL_M 8)
  375. set(CGEMM_UNROLL_N 2)
  376. set(ZGEMM_UNROLL_M 4)
  377. set(ZGEMM_UNROLL_N 2)
  378. set(CGEMM3M_UNROLL_M 8)
  379. set(CGEMM3M_UNROLL_N 4)
  380. set(ZGEMM3M_UNROLL_M 4)
  381. set(ZGEMM3M_UNROLL_N 4)
  382. elseif ("${TCORE}" STREQUAL "COOPERLAKE")
  383. file(APPEND ${TARGET_CONF_TEMP}
  384. "#define L1_DATA_SIZE\t32768\n"
  385. "#define L1_DATA_LINESIZE\t64\n"
  386. "#define L2_SIZE\t262144\n"
  387. "#define L2_LINESIZE\t64\n"
  388. "#define DTB_DEFAULT_ENTRIES\t64\n"
  389. "#define DTB_SIZE\t4096\n"
  390. "#define HAVE_CMOV\n"
  391. "#define HAVE_MMX\n"
  392. "#define HAVE_SSE\n"
  393. "#define HAVE_SSE2\n"
  394. "#define HAVE_SSE3\n"
  395. "#define HAVE_SSSE3\n"
  396. "#define HAVE_SSE4_1\n"
  397. "#define HAVE_SSE4_2\n"
  398. "#define HAVE_AVX\n"
  399. "#define HAVE_AVX2\n"
  400. "#define HAVE_FMA3\n"
  401. "#define HAVE_AVX512VL\n"
  402. "#define HAVE_AVX512BF16\n"
  403. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  404. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  405. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  406. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  407. set(HAVE_CMOV 1)
  408. set(HAVE_MMX 1)
  409. set(HAVE_SSE 1)
  410. set(HAVE_SSE2 1)
  411. set(HAVE_SSE3 1)
  412. set(HAVE_SSSE3 1)
  413. set(HAVE_SSE4_1 1)
  414. set(HAVE_SSE4_2 1)
  415. set(HAVE_AVX 1)
  416. set(HAVE_AVX2 1)
  417. set(HAVE_FMA3 1)
  418. set(HAVE_AVX512VL 1)
  419. set(HAVE_AVX512BF16 1)
  420. set(SBGEMM_UNROLL_M 16)
  421. set(SBGEMM_UNROLL_N 4)
  422. set(SGEMM_UNROLL_M 16)
  423. set(SGEMM_UNROLL_N 4)
  424. set(DGEMM_UNROLL_M 16)
  425. set(DGEMM_UNROLL_N 2)
  426. set(CGEMM_UNROLL_M 8)
  427. set(CGEMM_UNROLL_N 2)
  428. set(ZGEMM_UNROLL_M 4)
  429. set(ZGEMM_UNROLL_N 2)
  430. set(CGEMM3M_UNROLL_M 8)
  431. set(CGEMM3M_UNROLL_N 4)
  432. set(ZGEMM3M_UNROLL_M 4)
  433. set(ZGEMM3M_UNROLL_N 4)
  434. elseif ("${TCORE}" STREQUAL "SAPPHIRERAPIDS")
  435. file(APPEND ${TARGET_CONF_TEMP}
  436. "#define L1_DATA_SIZE\t32768\n"
  437. "#define L1_DATA_LINESIZE\t64\n"
  438. "#define L2_SIZE\t262144\n"
  439. "#define L2_LINESIZE\t64\n"
  440. "#define DTB_DEFAULT_ENTRIES\t64\n"
  441. "#define DTB_SIZE\t4096\n"
  442. "#define HAVE_CMOV\n"
  443. "#define HAVE_MMX\n"
  444. "#define HAVE_SSE\n"
  445. "#define HAVE_SSE2\n"
  446. "#define HAVE_SSE3\n"
  447. "#define HAVE_SSSE3\n"
  448. "#define HAVE_SSE4_1\n"
  449. "#define HAVE_SSE4_2\n"
  450. "#define HAVE_AVX\n"
  451. "#define HAVE_AVX2\n"
  452. "#define HAVE_FMA3\n"
  453. "#define HAVE_AVX512VL\n"
  454. "#define HAVE_AVX512BF16\n"
  455. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  456. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  457. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  458. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  459. set(HAVE_CMOV 1)
  460. set(HAVE_MMX 1)
  461. set(HAVE_SSE 1)
  462. set(HAVE_SSE2 1)
  463. set(HAVE_SSE3 1)
  464. set(HAVE_SSSE3 1)
  465. set(HAVE_SSE4_1 1)
  466. set(HAVE_SSE4_2 1)
  467. set(HAVE_AVX 1)
  468. set(HAVE_AVX2 1)
  469. set(HAVE_FMA3 1)
  470. set(HAVE_AVX512VL 1)
  471. set(HAVE_AVX512BF16 1)
  472. set(SBGEMM_UNROLL_M 32)
  473. set(SBGEMM_UNROLL_N 16)
  474. set(SGEMM_UNROLL_M 16)
  475. set(SGEMM_UNROLL_N 4)
  476. set(DGEMM_UNROLL_M 16)
  477. set(DGEMM_UNROLL_N 2)
  478. set(CGEMM_UNROLL_M 8)
  479. set(CGEMM_UNROLL_N 2)
  480. set(ZGEMM_UNROLL_M 4)
  481. set(ZGEMM_UNROLL_N 2)
  482. set(CGEMM3M_UNROLL_M 8)
  483. set(CGEMM3M_UNROLL_N 4)
  484. set(ZGEMM3M_UNROLL_M 4)
  485. set(ZGEMM3M_UNROLL_N 4)
  486. elseif ("${TCORE}" STREQUAL "OPTERON")
  487. file(APPEND ${TARGET_CONF_TEMP}
  488. "#define L1_DATA_SIZE\t65536\n"
  489. "#define L1_DATA_LINESIZE\t64\n"
  490. "#define L2_SIZE\t1048576\n"
  491. "#define L2_LINESIZE\t64\n"
  492. "#define DTB_DEFAULT_ENTRIES\t32\n"
  493. "#define DTB_SIZE\t4096\n"
  494. "#define HAVE_3DNOW\n"
  495. "#define HAVE_3DNOWEX\n"
  496. "#define HAVE_MMX\n"
  497. "#define HAVE_SSE\n"
  498. "#define HAVE_SSE2\n"
  499. "#define SLOCAL_BUFFER_SIZE\t15360\n"
  500. "#define DLOCAL_BUFFER_SIZE\t15360\n"
  501. "#define CLOCAL_BUFFER_SIZE\t15360\n"
  502. "#define ZLOCAL_BUFFER_SIZE\t15360\n")
  503. set(HAVE_3DNOW 1)
  504. set(HAVE_3DNOWEX 1)
  505. set(HAVE_MMX 1)
  506. set(HAVE_SSE 1)
  507. set(HAVE_SSE2 1)
  508. set(SBGEMM_UNROLL_M 8)
  509. set(SBGEMM_UNROLL_N 4)
  510. set(SGEMM_UNROLL_M 8)
  511. set(SGEMM_UNROLL_N 4)
  512. set(DGEMM_UNROLL_M 4)
  513. set(DGEMM_UNROLL_N 4)
  514. set(CGEMM_UNROLL_M 4)
  515. set(CGEMM_UNROLL_N 2)
  516. set(ZGEMM_UNROLL_M 2)
  517. set(ZGEMM_UNROLL_N 2)
  518. set(CGEMM3M_UNROLL_M 8)
  519. set(CGEMM3M_UNROLL_N 4)
  520. set(ZGEMM3M_UNROLL_M 4)
  521. set(ZGEMM3M_UNROLL_N 4)
  522. elseif ("${TCORE}" STREQUAL "BARCELONA")
  523. file(APPEND ${TARGET_CONF_TEMP}
  524. "#define L1_DATA_SIZE\t32768\n"
  525. "#define L1_DATA_LINESIZE\t64\n"
  526. "#define L2_SIZE\t524288\n"
  527. "#define L2_LINESIZE\t64\n"
  528. "#define DTB_DEFAULT_ENTRIES\t64\n"
  529. "#define DTB_SIZE\t4096\n"
  530. "#define HAVE_MMX\n"
  531. "#define HAVE_SSE\n"
  532. "#define HAVE_SSE2\n"
  533. "#define HAVE_SSE3\n"
  534. "#define HAVE_SSE4A\n"
  535. "#define HAVE_MISALIGNSSE\n"
  536. "#define HAVE_128BITFPU\n"
  537. "#define HAVE_FASTMOVU\n"
  538. "#define SLOCAL_BUFFER_SIZE\t14336\n"
  539. "#define DLOCAL_BUFFER_SIZE\t14336\n"
  540. "#define CLOCAL_BUFFER_SIZE\t14336\n"
  541. "#define ZLOCAL_BUFFER_SIZE\t14336\n")
  542. set(HAVE_SSE 1)
  543. set(HAVE_SSE2 1)
  544. set(HAVE_SSE3 1)
  545. set(HAVE_SSE4A 1)
  546. set(HAVE_MISALIGNSSE 1)
  547. set(HAVE_128BITFPU 1)
  548. set(HAVE_FASTMOVU 1)
  549. set(SBGEMM_UNROLL_M 8)
  550. set(SBGEMM_UNROLL_N 4)
  551. set(SGEMM_UNROLL_M 8)
  552. set(SGEMM_UNROLL_N 4)
  553. set(DGEMM_UNROLL_M 4)
  554. set(DGEMM_UNROLL_N 4)
  555. set(CGEMM_UNROLL_M 4)
  556. set(CGEMM_UNROLL_N 2)
  557. set(ZGEMM_UNROLL_M 2)
  558. set(ZGEMM_UNROLL_N 2)
  559. set(CGEMM3M_UNROLL_M 8)
  560. set(CGEMM3M_UNROLL_N 4)
  561. set(ZGEMM3M_UNROLL_M 4)
  562. set(ZGEMM3M_UNROLL_N 4)
  563. elseif ("${TCORE}" STREQUAL "BULLDOZER")
  564. file(APPEND ${TARGET_CONF_TEMP}
  565. "#define L1_DATA_SIZE\t49152\n"
  566. "#define L1_DATA_LINESIZE\t64\n"
  567. "#define L2_SIZE\t1024000\n"
  568. "#define L2_LINESIZE\t64\n"
  569. "#define DTB_DEFAULT_ENTRIES\t32\n"
  570. "#define DTB_SIZE\t4096\n"
  571. "#define HAVE_MMX\n"
  572. "#define HAVE_SSE\n"
  573. "#define HAVE_SSE2\n"
  574. "#define HAVE_SSE3\n"
  575. "#define HAVE_SSE4A\n"
  576. "#define HAVE_AVX\n"
  577. "#define HAVE_MISALIGNSSE\n"
  578. "#define HAVE_128BITFPU\n"
  579. "#define HAVE_FASTMOVU\n"
  580. "#define SLOCAL_BUFFER_SIZE\t5376\n"
  581. "#define DLOCAL_BUFFER_SIZE\t5376\n"
  582. "#define CLOCAL_BUFFER_SIZE\t14336\n"
  583. "#define ZLOCAL_BUFFER_SIZE\t14336\n")
  584. set(HAVE_SSE 1)
  585. set(HAVE_SSE2 1)
  586. set(HAVE_SSE3 1)
  587. set(HAVE_SSE4A 1)
  588. set(HAVE_AVX 1)
  589. set(HAVE_MISALIGNSSE 1)
  590. set(HAVE_128BITFPU 1)
  591. set(HAVE_FASTMOVU 1)
  592. set(SBGEMM_UNROLL_M 8)
  593. set(SBGEMM_UNROLL_N 4)
  594. set(SGEMM_UNROLL_M 16)
  595. set(SGEMM_UNROLL_N 2)
  596. set(DGEMM_UNROLL_M 8)
  597. set(DGEMM_UNROLL_N 2)
  598. set(CGEMM_UNROLL_M 2)
  599. set(CGEMM_UNROLL_N 2)
  600. set(ZGEMM_UNROLL_M 2)
  601. set(ZGEMM_UNROLL_N 2)
  602. set(CGEMM3M_UNROLL_M 8)
  603. set(CGEMM3M_UNROLL_N 4)
  604. set(ZGEMM3M_UNROLL_M 4)
  605. set(ZGEMM3M_UNROLL_N 4)
  606. elseif ("${TCORE}" STREQUAL "PILEDRIVER")
  607. file(APPEND ${TARGET_CONF_TEMP}
  608. "#define L1_DATA_SIZE\t16384\n"
  609. "#define L1_DATA_LINESIZE\t64\n"
  610. "#define L2_SIZE\t2097152\n"
  611. "#define L2_LINESIZE\t64\n"
  612. "#define DTB_DEFAULT_ENTRIES\t64\n"
  613. "#define DTB_SIZE\t4096\n"
  614. "#define HAVE_MMX\n"
  615. "#define HAVE_SSE\n"
  616. "#define HAVE_SSE2\n"
  617. "#define HAVE_SSE3\n"
  618. "#define HAVE_SSE4_1\n"
  619. "#define HAVE_SSE4_2\n"
  620. "#define HAVE_SSE4A\n"
  621. "#define HAVE_AVX\n"
  622. "#define HAVE_MISALIGNSSE\n"
  623. "#define HAVE_128BITFPU\n"
  624. "#define HAVE_FASTMOVU\n"
  625. "#define HAVE_CFLUSH\n"
  626. "#define HAVE_FMA3\n"
  627. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  628. "#define DLOCAL_BUFFER_SIZE\t5376\n"
  629. "#define CLOCAL_BUFFER_SIZE\t10752\n"
  630. "#define ZLOCAL_BUFFER_SIZE\t10752\n")
  631. set(HAVE_SSE 1)
  632. set(HAVE_SSE2 1)
  633. set(HAVE_SSE3 1)
  634. set(HAVE_SSE4_1 1)
  635. set(HAVE_SSE4_2 1)
  636. set(HAVE_SSE4A 1)
  637. set(HAVE_AVX 1)
  638. set(HAVE_FMA3 1)
  639. set(HAVE_MISALIGNSSE 1)
  640. set(HAVE_128BITFPU 1)
  641. set(HAVE_FASTMOVU 1)
  642. set(HAVE_CFLUSH 1)
  643. set(SBGEMM_UNROLL_M 8)
  644. set(SBGEMM_UNROLL_N 4)
  645. set(SGEMM_UNROLL_M 16)
  646. set(SGEMM_UNROLL_N 2)
  647. set(DGEMM_UNROLL_M 8)
  648. set(DGEMM_UNROLL_N 2)
  649. set(CGEMM_UNROLL_M 4)
  650. set(CGEMM_UNROLL_N 2)
  651. set(ZGEMM_UNROLL_M 2)
  652. set(ZGEMM_UNROLL_N 2)
  653. set(CGEMM3M_UNROLL_M 8)
  654. set(CGEMM3M_UNROLL_N 4)
  655. set(ZGEMM3M_UNROLL_M 4)
  656. set(ZGEMM3M_UNROLL_N 4)
  657. elseif ("${TCORE}" STREQUAL "STEAMROLLER")
  658. file(APPEND ${TARGET_CONF_TEMP}
  659. "#define L1_DATA_SIZE\t16384\n"
  660. "#define L1_DATA_LINESIZE\t64\n"
  661. "#define L2_SIZE\t2097152\n"
  662. "#define L2_LINESIZE\t64\n"
  663. "#define DTB_DEFAULT_ENTRIES\t64\n"
  664. "#define DTB_SIZE\t4096\n"
  665. "#define HAVE_MMX\n"
  666. "#define HAVE_SSE\n"
  667. "#define HAVE_SSE2\n"
  668. "#define HAVE_SSE3\n"
  669. "#define HAVE_SSE4_1\n"
  670. "#define HAVE_SSE4_2\n"
  671. "#define HAVE_SSE4A\n"
  672. "#define HAVE_AVX\n"
  673. "#define HAVE_MISALIGNSSE\n"
  674. "#define HAVE_128BITFPU\n"
  675. "#define HAVE_FASTMOVU\n"
  676. "#define HAVE_CFLUSH\n"
  677. "#define HAVE_FMA3\n"
  678. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  679. "#define DLOCAL_BUFFER_SIZE\t5120\n"
  680. "#define CLOCAL_BUFFER_SIZE\t10240\n"
  681. "#define ZLOCAL_BUFFER_SIZE\t10240\n")
  682. set(HAVE_SSE 1)
  683. set(HAVE_SSE2 1)
  684. set(HAVE_SSE3 1)
  685. set(HAVE_SSE4_1 1)
  686. set(HAVE_SSE4_2 1)
  687. set(HAVE_SSE4A 1)
  688. set(HAVE_AVX 1)
  689. set(HAVE_FMA3 1)
  690. set(HAVE_MISALIGNSSE 1)
  691. set(HAVE_128BITFPU 1)
  692. set(HAVE_FASTMOVU 1)
  693. set(HAVE_CFLUSH 1)
  694. set(SBGEMM_UNROLL_M 8)
  695. set(SBGEMM_UNROLL_N 4)
  696. set(SGEMM_UNROLL_M 16)
  697. set(SGEMM_UNROLL_N 2)
  698. set(DGEMM_UNROLL_M 8)
  699. set(DGEMM_UNROLL_N 2)
  700. set(CGEMM_UNROLL_M 4)
  701. set(CGEMM_UNROLL_N 2)
  702. set(ZGEMM_UNROLL_M 2)
  703. set(ZGEMM_UNROLL_N 2)
  704. set(CGEMM3M_UNROLL_M 8)
  705. set(CGEMM3M_UNROLL_N 4)
  706. set(ZGEMM3M_UNROLL_M 4)
  707. set(ZGEMM3M_UNROLL_N 4)
  708. elseif ("${TCORE}" STREQUAL "EXCAVATOR")
  709. file(APPEND ${TARGET_CONF_TEMP}
  710. "#define L1_DATA_SIZE\t16384\n"
  711. "#define L1_DATA_LINESIZE\t64\n"
  712. "#define L2_SIZE\t2097152\n"
  713. "#define L2_LINESIZE\t64\n"
  714. "#define DTB_DEFAULT_ENTRIES\t64\n"
  715. "#define DTB_SIZE\t4096\n"
  716. "#define HAVE_MMX\n"
  717. "#define HAVE_SSE\n"
  718. "#define HAVE_SSE2\n"
  719. "#define HAVE_SSE3\n"
  720. "#define HAVE_SSE4_1\n"
  721. "#define HAVE_SSE4_2\n"
  722. "#define HAVE_SSE4A\n"
  723. "#define HAVE_AVX\n"
  724. "#define HAVE_MISALIGNSSE\n"
  725. "#define HAVE_128BITFPU\n"
  726. "#define HAVE_FASTMOVU\n"
  727. "#define HAVE_CFLUSH\n"
  728. "#define HAVE_FMA3\n"
  729. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  730. "#define DLOCAL_BUFFER_SIZE\t5120\n"
  731. "#define CLOCAL_BUFFER_SIZE\t10240\n"
  732. "#define ZLOCAL_BUFFER_SIZE\t10240\n")
  733. set(HAVE_SSE 1)
  734. set(HAVE_SSE2 1)
  735. set(HAVE_SSE3 1)
  736. set(HAVE_SSE4_1 1)
  737. set(HAVE_SSE4_2 1)
  738. set(HAVE_SSE4A 1)
  739. set(HAVE_AVX 1)
  740. set(HAVE_FMA3 1)
  741. set(HAVE_MISALIGNSSE 1)
  742. set(HAVE_128BITFPU 1)
  743. set(HAVE_FASTMOVU 1)
  744. set(HAVE_CFLUSH 1)
  745. set(SBGEMM_UNROLL_M 8)
  746. set(SBGEMM_UNROLL_N 4)
  747. set(SGEMM_UNROLL_M 16)
  748. set(SGEMM_UNROLL_N 2)
  749. set(DGEMM_UNROLL_M 8)
  750. set(DGEMM_UNROLL_N 2)
  751. set(CGEMM_UNROLL_M 4)
  752. set(CGEMM_UNROLL_N 2)
  753. set(ZGEMM_UNROLL_M 2)
  754. set(ZGEMM_UNROLL_N 2)
  755. set(CGEMM3M_UNROLL_M 8)
  756. set(CGEMM3M_UNROLL_N 4)
  757. set(ZGEMM3M_UNROLL_M 4)
  758. set(ZGEMM3M_UNROLL_N 4)
  759. elseif ("${TCORE}" STREQUAL "ZEN")
  760. file(APPEND ${TARGET_CONF_TEMP}
  761. "#define L1_DATA_SIZE\t32768\n"
  762. "#define L1_DATA_LINESIZE\t64\n"
  763. "#define L2_SIZE\t524288\n"
  764. "#define L2_LINESIZE\t64\n"
  765. "#define DTB_DEFAULT_ENTRIES\t64\n"
  766. "#define DTB_SIZE\t4096\n"
  767. "#define HAVE_MMX\n"
  768. "#define HAVE_SSE\n"
  769. "#define HAVE_SSE2\n"
  770. "#define HAVE_SSE3\n"
  771. "#define HAVE_SSE4_1\n"
  772. "#define HAVE_SSE4_2\n"
  773. "#define HAVE_SSE4A\n"
  774. "#define HAVE_MISALIGNSSE\n"
  775. "#define HAVE_128BITFPU\n"
  776. "#define HAVE_FASTMOVU\n"
  777. "#define HAVE_CFLUSH\n"
  778. "#define HAVE_AVX\n"
  779. "#define HAVE_AVX2\n"
  780. "#define HAVE_FMA3\n"
  781. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  782. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  783. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  784. "#define ZLOCAL_BUFFER_SIZE\t12288\n")
  785. set(HAVE_SSE 1)
  786. set(HAVE_SSE2 1)
  787. set(HAVE_SSE3 1)
  788. set(HAVE_SSE4_1 1)
  789. set(HAVE_SSE4_2 1)
  790. set(HAVE_AVX 1)
  791. set(HAVE_AVX2 1)
  792. set(HAVE_FMA3 1)
  793. set(HAVE_SSE4A 1)
  794. set(HAVE_MISALIGNSSE 1)
  795. set(HAVE_128BITFPU 1)
  796. set(HAVE_FASTMOVU 1)
  797. set(HAVE_CFLUSH 1)
  798. set(SBGEMM_UNROLL_M 8)
  799. set(SBGEMM_UNROLL_N 4)
  800. set(SGEMM_UNROLL_M 8)
  801. set(SGEMM_UNROLL_N 4)
  802. set(DGEMM_UNROLL_M 4)
  803. set(DGEMM_UNROLL_N 8)
  804. set(CGEMM_UNROLL_M 8)
  805. set(CGEMM_UNROLL_N 2)
  806. set(ZGEMM_UNROLL_M 4)
  807. set(ZGEMM_UNROLL_N 2)
  808. set(CGEMM3M_UNROLL_M 8)
  809. set(CGEMM3M_UNROLL_N 4)
  810. set(ZGEMM3M_UNROLL_M 4)
  811. set(ZGEMM3M_UNROLL_N 4)
  812. elseif ("${TCORE}" STREQUAL "ARMV5")
  813. file(APPEND ${TARGET_CONF_TEMP}
  814. "#define L1_DATA_SIZE\t65536\n"
  815. "#define L1_DATA_LINESIZE\t32\n"
  816. "#define L2_SIZE\t512488\n"
  817. "#define L2_LINESIZE\t32\n"
  818. "#define DTB_DEFAULT_ENTRIES\t64\n"
  819. "#define DTB_SIZE\t4096\n"
  820. "#define L2_ASSOCIATIVE\t4\n")
  821. set(SGEMM_UNROLL_M 2)
  822. set(SGEMM_UNROLL_N 2)
  823. set(DGEMM_UNROLL_M 2)
  824. set(DGEMM_UNROLL_N 2)
  825. set(CGEMM_UNROLL_M 2)
  826. set(CGEMM_UNROLL_N 2)
  827. set(ZGEMM_UNROLL_M 2)
  828. set(ZGEMM_UNROLL_N 2)
  829. elseif ("${TCORE}" STREQUAL "ARMV6")
  830. file(APPEND ${TARGET_CONF_TEMP}
  831. "#define L1_DATA_SIZE\t65536\n"
  832. "#define L1_DATA_LINESIZE\t32\n"
  833. "#define L2_SIZE\t512488\n"
  834. "#define L2_LINESIZE\t32\n"
  835. "#define DTB_DEFAULT_ENTRIES\t64\n"
  836. "#define DTB_SIZE\t4096\n"
  837. "#define L2_ASSOCIATIVE\t4\n"
  838. "#define HAVE_VFP\n")
  839. set(SGEMM_UNROLL_M 4)
  840. set(SGEMM_UNROLL_N 2)
  841. set(DGEMM_UNROLL_M 4)
  842. set(DGEMM_UNROLL_N 2)
  843. set(CGEMM_UNROLL_M 2)
  844. set(CGEMM_UNROLL_N 2)
  845. set(ZGEMM_UNROLL_M 2)
  846. set(ZGEMM_UNROLL_N 2)
  847. elseif ("${TCORE}" STREQUAL "ARMV7")
  848. file(APPEND ${TARGET_CONF_TEMP}
  849. "#define L1_DATA_SIZE\t65536\n"
  850. "#define L1_DATA_LINESIZE\t32\n"
  851. "#define L2_SIZE\t512488\n"
  852. "#define L2_LINESIZE\t32\n"
  853. "#define DTB_DEFAULT_ENTRIES\t64\n"
  854. "#define DTB_SIZE\t4096\n"
  855. "#define L2_ASSOCIATIVE\t4\n"
  856. "#define HAVE_VFPV3\n"
  857. "#define HAVE_VFP\n")
  858. set(SGEMM_UNROLL_M 4)
  859. set(SGEMM_UNROLL_N 4)
  860. set(DGEMM_UNROLL_M 4)
  861. set(DGEMM_UNROLL_N 4)
  862. set(CGEMM_UNROLL_M 2)
  863. set(CGEMM_UNROLL_N 2)
  864. set(ZGEMM_UNROLL_M 2)
  865. set(ZGEMM_UNROLL_N 2)
  866. elseif ("${TCORE}" STREQUAL "ARMV8")
  867. file(APPEND ${TARGET_CONF_TEMP}
  868. "#define L1_DATA_SIZE\t32768\n"
  869. "#define L1_DATA_LINESIZE\t64\n"
  870. "#define L2_SIZE\t262144\n"
  871. "#define L2_LINESIZE\t64\n"
  872. "#define DTB_DEFAULT_ENTRIES\t64\n"
  873. "#define DTB_SIZE\t4096\n"
  874. "#define L2_ASSOCIATIVE\t32\n"
  875. "#define ARMV8\n")
  876. set(SGEMM_UNROLL_M 16)
  877. set(SGEMM_UNROLL_N 4)
  878. set(DGEMM_UNROLL_M 8)
  879. set(DGEMM_UNROLL_N 4)
  880. set(CGEMM_UNROLL_M 8)
  881. set(CGEMM_UNROLL_N 4)
  882. set(ZGEMM_UNROLL_M 4)
  883. set(ZGEMM_UNROLL_N 4)
  884. set(SYMV_P 16)
  885. elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53" OR "${TCORE}" STREQUAL "CORTEXA55")
  886. file(APPEND ${TARGET_CONF_TEMP}
  887. "#define L1_CODE_SIZE\t32768\n"
  888. "#define L1_CODE_LINESIZE\t64\n"
  889. "#define L1_CODE_ASSOCIATIVE\t3\n"
  890. "#define L1_DATA_SIZE\t32768\n"
  891. "#define L1_DATA_LINESIZE\t64\n"
  892. "#define L1_DATA_ASSOCIATIVE\t2\n"
  893. "#define L2_SIZE\t262144\n"
  894. "#define L2_LINESIZE\t64\n"
  895. "#define L2_ASSOCIATIVE\t16\n"
  896. "#define DTB_DEFAULT_ENTRIES\t64\n"
  897. "#define DTB_SIZE\t4096\n"
  898. "#define HAVE_VFPV4\n"
  899. "#define HAVE_VFPV3\n"
  900. "#define HAVE_VFP\n"
  901. "#define HAVE_NEON\n"
  902. "#define ARMV8\n")
  903. if ("${TCORE}" STREQUAL "CORTEXA57")
  904. set(SGEMM_UNROLL_M 16)
  905. set(SGEMM_UNROLL_N 4)
  906. else ()
  907. set(SGEMM_UNROLL_M 8)
  908. set(SGEMM_UNROLL_N 8)
  909. endif ()
  910. if ("${TCORE}" STREQUAL "CORTEXA53")
  911. set(DGEMM_UNROLL_M 4)
  912. else ()
  913. set(DGEMM_UNROLL_M 8)
  914. endif ()
  915. set(DGEMM_UNROLL_N 4)
  916. set(CGEMM_UNROLL_M 8)
  917. set(CGEMM_UNROLL_N 4)
  918. set(ZGEMM_UNROLL_M 4)
  919. set(ZGEMM_UNROLL_N 4)
  920. set(SYMV_P 16)
  921. elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73" OR "${TCORE}" STREQUAL "CORTEXA76")
  922. file(APPEND ${TARGET_CONF_TEMP}
  923. "#define L1_CODE_SIZE\t49152\n"
  924. "#define L1_CODE_LINESIZE\t64\n"
  925. "#define L1_CODE_ASSOCIATIVE\t3\n"
  926. "#define L1_DATA_SIZE\t32768\n"
  927. "#define L1_DATA_LINESIZE\t64\n"
  928. "#define L1_DATA_ASSOCIATIVE\t2\n"
  929. "#define L2_SIZE\t524288\n"
  930. "#define L2_LINESIZE\t64\n"
  931. "#define L2_ASSOCIATIVE\t16\n"
  932. "#define DTB_DEFAULT_ENTRIES\t64\n"
  933. "#define DTB_SIZE\t4096\n"
  934. "#define HAVE_VFPV4\n"
  935. "#define HAVE_VFPV3\n"
  936. "#define HAVE_VFP\n"
  937. "#define HAVE_NEON\n"
  938. "#define ARMV8\n")
  939. set(SGEMM_UNROLL_M 16)
  940. set(SGEMM_UNROLL_N 4)
  941. set(DGEMM_UNROLL_M 8)
  942. set(DGEMM_UNROLL_N 4)
  943. set(CGEMM_UNROLL_M 8)
  944. set(CGEMM_UNROLL_N 4)
  945. set(ZGEMM_UNROLL_M 4)
  946. set(ZGEMM_UNROLL_N 4)
  947. set(SYMV_P 16)
  948. elseif ("${TCORE}" STREQUAL "NEOVERSEN1")
  949. file(APPEND ${TARGET_CONF_TEMP}
  950. "#define L1_CODE_SIZE\t65536\n"
  951. "#define L1_CODE_LINESIZE\t64\n"
  952. "#define L1_CODE_ASSOCIATIVE\t4\n"
  953. "#define L1_DATA_SIZE\t65536\n"
  954. "#define L1_DATA_LINESIZE\t64\n"
  955. "#define L1_DATA_ASSOCIATIVE\t4\n"
  956. "#define L2_SIZE\t1048576\n\n"
  957. "#define L2_LINESIZE\t64\n"
  958. "#define L2_ASSOCIATIVE\t8\n"
  959. "#define DTB_DEFAULT_ENTRIES\t48\n"
  960. "#define DTB_SIZE\t4096\n"
  961. "#define HAVE_VFPV4\n"
  962. "#define HAVE_VFPV3\n"
  963. "#define HAVE_VFP\n"
  964. "#define HAVE_NEON\n"
  965. "#define ARMV8\n")
  966. set(SGEMM_UNROLL_M 16)
  967. set(SGEMM_UNROLL_N 4)
  968. set(DGEMM_UNROLL_M 8)
  969. set(DGEMM_UNROLL_N 4)
  970. set(CGEMM_UNROLL_M 8)
  971. set(CGEMM_UNROLL_N 4)
  972. set(ZGEMM_UNROLL_M 4)
  973. set(ZGEMM_UNROLL_N 4)
  974. set(SYMV_P 16)
  975. elseif ("${TCORE}" STREQUAL "NEOVERSEV1")
  976. file(APPEND ${TARGET_CONF_TEMP}
  977. "#define L1_CODE_SIZE\t65536\n"
  978. "#define L1_CODE_LINESIZE\t64\n"
  979. "#define L1_CODE_ASSOCIATIVE\t4\n"
  980. "#define L1_DATA_SIZE\t65536\n"
  981. "#define L1_DATA_LINESIZE\t64\n"
  982. "#define L1_DATA_ASSOCIATIVE\t4\n"
  983. "#define L2_SIZE\t1048576\n\n"
  984. "#define L2_LINESIZE\t64\n"
  985. "#define L2_ASSOCIATIVE\t8\n"
  986. "#define DTB_DEFAULT_ENTRIES\t48\n"
  987. "#define DTB_SIZE\t4096\n"
  988. "#define HAVE_VFPV4\n"
  989. "#define HAVE_VFPV3\n"
  990. "#define HAVE_VFP\n"
  991. "#define HAVE_NEON\n"
  992. "#define HAVE_SVE\n"
  993. "#define ARMV8\n")
  994. set(SGEMM_UNROLL_M 16)
  995. set(SGEMM_UNROLL_N 4)
  996. set(DGEMM_UNROLL_M 8)
  997. set(DGEMM_UNROLL_N 4)
  998. set(CGEMM_UNROLL_M 8)
  999. set(CGEMM_UNROLL_N 4)
  1000. set(ZGEMM_UNROLL_M 4)
  1001. set(ZGEMM_UNROLL_N 4)
  1002. set(SYMV_P 16)
  1003. elseif ("${TCORE}" STREQUAL "NEOVERSEN2")
  1004. file(APPEND ${TARGET_CONF_TEMP}
  1005. "#define L1_CODE_SIZE\t65536\n"
  1006. "#define L1_CODE_LINESIZE\t64\n"
  1007. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1008. "#define L1_DATA_SIZE\t65536\n"
  1009. "#define L1_DATA_LINESIZE\t64\n"
  1010. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1011. "#define L2_SIZE\t1048576\n\n"
  1012. "#define L2_LINESIZE\t64\n"
  1013. "#define L2_ASSOCIATIVE\t8\n"
  1014. "#define DTB_DEFAULT_ENTRIES\t48\n"
  1015. "#define DTB_SIZE\t4096\n"
  1016. "#define HAVE_VFPV4\n"
  1017. "#define HAVE_VFPV3\n"
  1018. "#define HAVE_VFP\n"
  1019. "#define HAVE_NEON\n"
  1020. "#define HAVE_SVE\n"
  1021. "#define ARMV8\n")
  1022. set(SGEMM_UNROLL_M 16)
  1023. set(SGEMM_UNROLL_N 4)
  1024. set(DGEMM_UNROLL_M 8)
  1025. set(DGEMM_UNROLL_N 4)
  1026. set(CGEMM_UNROLL_M 8)
  1027. set(CGEMM_UNROLL_N 4)
  1028. set(ZGEMM_UNROLL_M 4)
  1029. set(ZGEMM_UNROLL_N 4)
  1030. set(SYMV_P 16)
  1031. elseif ("${TCORE}" STREQUAL "FALKOR")
  1032. file(APPEND ${TARGET_CONF_TEMP}
  1033. "#define L1_CODE_SIZE\t65536\n"
  1034. "#define L1_CODE_LINESIZE\t64\n"
  1035. "#define L1_CODE_ASSOCIATIVE\t3\n"
  1036. "#define L1_DATA_SIZE\t32768\n"
  1037. "#define L1_DATA_LINESIZE\t128\n"
  1038. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1039. "#define L2_SIZE\t524288\n"
  1040. "#define L2_LINESIZE\t64\n"
  1041. "#define L2_ASSOCIATIVE\t16\n"
  1042. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1043. "#define DTB_SIZE\t4096\n"
  1044. "#define HAVE_VFPV4\n"
  1045. "#define HAVE_VFPV3\n"
  1046. "#define HAVE_VFP\n"
  1047. "#define HAVE_NEON\n"
  1048. "#define ARMV8\n")
  1049. set(SGEMM_UNROLL_M 16)
  1050. set(SGEMM_UNROLL_N 4)
  1051. set(DGEMM_UNROLL_M 8)
  1052. set(DGEMM_UNROLL_N 4)
  1053. set(CGEMM_UNROLL_M 8)
  1054. set(CGEMM_UNROLL_N 4)
  1055. set(ZGEMM_UNROLL_M 4)
  1056. set(ZGEMM_UNROLL_N 4)
  1057. set(SYMV_P 16)
  1058. elseif ("${TCORE}" STREQUAL "THUNDERX")
  1059. file(APPEND ${TARGET_CONF_TEMP}
  1060. "#define L1_CODE_SIZE\t32768\n"
  1061. "#define L1_CODE_LINESIZE\t64\n"
  1062. "#define L1_CODE_ASSOCIATIVE\t3\n"
  1063. "#define L1_DATA_SIZE\t32768\n"
  1064. "#define L1_DATA_LINESIZE\t128\n"
  1065. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1066. "#define L2_SIZE\t167772164\n"
  1067. "#define L2_LINESIZE\t128\n"
  1068. "#define L2_ASSOCIATIVE\t16\n"
  1069. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1070. "#define DTB_SIZE\t4096\n"
  1071. "#define HAVE_VFPV4\n"
  1072. "#define HAVE_VFPV3\n"
  1073. "#define HAVE_VFP\n"
  1074. "#define HAVE_NEON\n"
  1075. "#define ARMV8\n")
  1076. set(SGEMM_UNROLL_M 4)
  1077. set(SGEMM_UNROLL_N 4)
  1078. set(DGEMM_UNROLL_M 2)
  1079. set(DGEMM_UNROLL_N 2)
  1080. set(CGEMM_UNROLL_M 2)
  1081. set(CGEMM_UNROLL_N 2)
  1082. set(ZGEMM_UNROLL_M 2)
  1083. set(ZGEMM_UNROLL_N 2)
  1084. set(SYMV_P 16)
  1085. elseif ("${TCORE}" STREQUAL "THUNDERX2T99")
  1086. file(APPEND ${TARGET_CONF_TEMP}
  1087. "#define L1_CODE_SIZE\t32768\n"
  1088. "#define L1_CODE_LINESIZE\t64\n"
  1089. "#define L1_CODE_ASSOCIATIVE\t8\n"
  1090. "#define L1_DATA_SIZE\t32768\n"
  1091. "#define L1_DATA_LINESIZE\t64\n"
  1092. "#define L1_DATA_ASSOCIATIVE\t8\n"
  1093. "#define L2_SIZE\t262144\n"
  1094. "#define L2_LINESIZE\t64\n"
  1095. "#define L2_ASSOCIATIVE\t8\n"
  1096. "#define L3_SIZE\t33554432\n"
  1097. "#define L3_LINESIZE\t64\n"
  1098. "#define L3_ASSOCIATIVE\t32\n"
  1099. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1100. "#define DTB_SIZE\t4096\n"
  1101. "#define ARMV8\n")
  1102. set(SGEMM_UNROLL_M 16)
  1103. set(SGEMM_UNROLL_N 4)
  1104. set(DGEMM_UNROLL_M 8)
  1105. set(DGEMM_UNROLL_N 4)
  1106. set(CGEMM_UNROLL_M 8)
  1107. set(CGEMM_UNROLL_N 4)
  1108. set(ZGEMM_UNROLL_M 4)
  1109. set(ZGEMM_UNROLL_N 4)
  1110. set(SYMV_P 16)
  1111. elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
  1112. file(APPEND ${TARGET_CONF_TEMP}
  1113. "#define THUNDERX3T110\n"
  1114. "#define L1_CODE_SIZE\t65536\n"
  1115. "#define L1_CODE_LINESIZE\t64\n"
  1116. "#define L1_CODE_ASSOCIATIVE\t8\n"
  1117. "#define L1_DATA_SIZE\t65536\n"
  1118. "#define L1_DATA_LINESIZE\t64\n"
  1119. "#define L1_DATA_ASSOCIATIVE\t8\n"
  1120. "#define L2_SIZE\t524288\n"
  1121. "#define L2_LINESIZE\t64\n"
  1122. "#define L2_ASSOCIATIVE\t8\n"
  1123. "#define L3_SIZE\t94371840\n"
  1124. "#define L3_LINESIZE\t64\n"
  1125. "#define L3_ASSOCIATIVE\t32\n"
  1126. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1127. "#define DTB_SIZE\t4096\n"
  1128. "#define ARMV8\n")
  1129. set(SGEMM_UNROLL_M 16)
  1130. set(SGEMM_UNROLL_N 4)
  1131. set(DGEMM_UNROLL_M 8)
  1132. set(DGEMM_UNROLL_N 4)
  1133. set(CGEMM_UNROLL_M 8)
  1134. set(CGEMM_UNROLL_N 4)
  1135. set(ZGEMM_UNROLL_M 4)
  1136. set(ZGEMM_UNROLL_N 4)
  1137. set(SYMV_P 16)
  1138. elseif ("${TCORE}" STREQUAL "TSV110")
  1139. file(APPEND ${TARGET_CONF_TEMP}
  1140. "#define ARMV8\n"
  1141. "#define L1_CODE_SIZE\t65536\n"
  1142. "#define L1_CODE_LINESIZE\t64\n"
  1143. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1144. "#define L1_DATA_SIZE\t65536\n"
  1145. "#define L1_DATA_LINESIZE\t64\n"
  1146. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1147. "#define L2_SIZE\t524288\n"
  1148. "#define L2_LINESIZE\t64\n"
  1149. "#define L2_ASSOCIATIVE\t8\n"
  1150. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1151. "#define DTB_SIZE\t4096\n")
  1152. set(SGEMM_UNROLL_M 16)
  1153. set(SGEMM_UNROLL_N 4)
  1154. set(DGEMM_UNROLL_M 8)
  1155. set(DGEMM_UNROLL_N 4)
  1156. set(CGEMM_UNROLL_M 8)
  1157. set(CGEMM_UNROLL_N 4)
  1158. set(ZGEMM_UNROLL_M 4)
  1159. set(ZGEMM_UNROLL_N 4)
  1160. set(SYMV_P 16)
  1161. elseif ("${TCORE}" STREQUAL "EMAG8180")
  1162. file(APPEND ${TARGET_CONF_TEMP}
  1163. "#define ARMV8\n"
  1164. "#define L1_CODE_SIZE\t32768\n"
  1165. "#define L1_CODE_LINESIZE\t64\n"
  1166. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1167. "#define L1_DATA_SIZE\t32768\n"
  1168. "#define L1_DATA_LINESIZE\t64\n"
  1169. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1170. "#define L2_SIZE\t5262144\n"
  1171. "#define L2_LINESIZE\t64\n"
  1172. "#define L2_ASSOCIATIVE\t8\n"
  1173. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1174. "#define DTB_SIZE\t4096\n")
  1175. set(SGEMM_UNROLL_M 16)
  1176. set(SGEMM_UNROLL_N 4)
  1177. set(DGEMM_UNROLL_M 8)
  1178. set(DGEMM_UNROLL_N 4)
  1179. set(CGEMM_UNROLL_M 8)
  1180. set(CGEMM_UNROLL_N 4)
  1181. set(ZGEMM_UNROLL_M 4)
  1182. set(ZGEMM_UNROLL_N 4)
  1183. set(SYMV_P 16)
  1184. elseif ("${TCORE}" STREQUAL "VORTEX")
  1185. file(APPEND ${TARGET_CONF_TEMP}
  1186. "#define ARMV8\n"
  1187. "#define L1_CODE_SIZE\t32768\n"
  1188. "#define L1_CODE_LINESIZE\t64\n"
  1189. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1190. "#define L1_DATA_SIZE\t32768\n"
  1191. "#define L1_DATA_LINESIZE\t64\n"
  1192. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1193. "#define L2_SIZE\t5262144\n"
  1194. "#define L2_LINESIZE\t64\n"
  1195. "#define L2_ASSOCIATIVE\t8\n"
  1196. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1197. "#define DTB_SIZE\t4096\n")
  1198. set(SGEMM_UNROLL_M 16)
  1199. set(SGEMM_UNROLL_N 4)
  1200. set(DGEMM_UNROLL_M 8)
  1201. set(DGEMM_UNROLL_N 4)
  1202. set(CGEMM_UNROLL_M 8)
  1203. set(CGEMM_UNROLL_N 4)
  1204. set(ZGEMM_UNROLL_M 4)
  1205. set(ZGEMM_UNROLL_N 4)
  1206. set(SYMV_P 16)
  1207. elseif ("${TCORE}" STREQUAL "A64FX")
  1208. file(APPEND ${TARGET_CONF_TEMP}
  1209. "#define L1_CODE_SIZE\t65536\n"
  1210. "#define L1_CODE_LINESIZE\t256\n"
  1211. "#define L1_CODE_ASSOCIATIVE\t8\n"
  1212. "#define L1_DATA_SIZE\t32768\n"
  1213. "#define L1_DATA_LINESIZE\t256\n"
  1214. "#define L1_DATA_ASSOCIATIVE\t8\n"
  1215. "#define L2_SIZE\t8388608\n\n"
  1216. "#define L2_LINESIZE\t256\n"
  1217. "#define L2_ASSOCIATIVE\t8\n"
  1218. "#define L3_SIZE\t0\n\n"
  1219. "#define L3_LINESIZE\t0\n\n"
  1220. "#define L3_ASSOCIATIVE\t0\n\n"
  1221. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1222. "#define DTB_SIZE\t4096\n"
  1223. "#define HAVE_VFPV4\n"
  1224. "#define HAVE_VFPV3\n"
  1225. "#define HAVE_VFP\n"
  1226. "#define HAVE_NEON\n"
  1227. "#define HAVE_SVE\n"
  1228. "#define ARMV8\n")
  1229. set(SGEMM_UNROLL_M 4)
  1230. set(SGEMM_UNROLL_N 8)
  1231. set(DGEMM_UNROLL_M 2)
  1232. set(DGEMM_UNROLL_N 8)
  1233. set(CGEMM_UNROLL_M 2)
  1234. set(CGEMM_UNROLL_N 4)
  1235. set(ZGEMM_UNROLL_M 2)
  1236. set(ZGEMM_UNROLL_N 4)
  1237. set(SYMV_P 16)
  1238. elseif ("${TCORE}" STREQUAL "P5600")
  1239. file(APPEND ${TARGET_CONF_TEMP}
  1240. "#define L2_SIZE 1048576\n"
  1241. "#define DTB_SIZE 4096\n"
  1242. "#define DTB_DEFAULT_ENTRIES 64\n")
  1243. set(SGEMM_UNROLL_M 2)
  1244. set(SGEMM_UNROLL_N 2)
  1245. set(DGEMM_UNROLL_M 2)
  1246. set(DGEMM_UNROLL_N 2)
  1247. set(CGEMM_UNROLL_M 2)
  1248. set(CGEMM_UNROLL_N 2)
  1249. set(ZGEMM_UNROLL_M 2)
  1250. set(ZGEMM_UNROLL_N 2)
  1251. set(SYMV_P 16)
  1252. elseif ("${TCORE}" MATCHES "MIPS")
  1253. file(APPEND ${TARGET_CONF_TEMP}
  1254. "#define L2_SIZE 262144\n"
  1255. "#define DTB_SIZE 4096\n"
  1256. "#define DTB_DEFAULT_ENTRIES 64\n")
  1257. set(SGEMM_UNROLL_M 2)
  1258. set(SGEMM_UNROLL_N 2)
  1259. set(DGEMM_UNROLL_M 2)
  1260. set(DGEMM_UNROLL_N 2)
  1261. set(CGEMM_UNROLL_M 2)
  1262. set(CGEMM_UNROLL_N 2)
  1263. set(ZGEMM_UNROLL_M 2)
  1264. set(ZGEMM_UNROLL_N 2)
  1265. set(SYMV_P 16)
  1266. elseif ("${TCORE}" STREQUAL "POWER6")
  1267. file(APPEND ${TARGET_CONF_TEMP}
  1268. "#define L1_DATA_SIZE 32768\n"
  1269. "#define L1_DATA_LINESIZE 128\n"
  1270. "#define L2_SIZE 524288\n"
  1271. "#define L2_LINESIZE 128 \n"
  1272. "#define DTB_DEFAULT_ENTRIES 128\n"
  1273. "#define DTB_SIZE 4096\n"
  1274. "#define L2_ASSOCIATIVE 8\n")
  1275. set(SGEMM_UNROLL_M 4)
  1276. set(SGEMM_UNROLL_N 4)
  1277. set(DGEMM_UNROLL_M 4)
  1278. set(DGEMM_UNROLL_N 4)
  1279. set(CGEMM_UNROLL_M 2)
  1280. set(CGEMM_UNROLL_N 4)
  1281. set(ZGEMM_UNROLL_M 2)
  1282. set(ZGEMM_UNROLL_N 4)
  1283. set(SYMV_P 8)
  1284. elseif ("${TCORE}" STREQUAL "POWER8")
  1285. file(APPEND ${TARGET_CONF_TEMP}
  1286. "#define L1_DATA_SIZE 32768\n"
  1287. "#define L1_DATA_LINESIZE 128\n"
  1288. "#define L2_SIZE 524288\n"
  1289. "#define L2_LINESIZE 128 \n"
  1290. "#define DTB_DEFAULT_ENTRIES 128\n"
  1291. "#define DTB_SIZE 4096\n"
  1292. "#define L2_ASSOCIATIVE 8\n")
  1293. set(SGEMM_UNROLL_M 16)
  1294. set(SGEMM_UNROLL_N 8)
  1295. set(DGEMM_UNROLL_M 16)
  1296. set(DGEMM_UNROLL_N 4)
  1297. set(CGEMM_UNROLL_M 8)
  1298. set(CGEMM_UNROLL_N 4)
  1299. set(ZGEMM_UNROLL_M 8)
  1300. set(ZGEMM_UNROLL_N 2)
  1301. set(SYMV_P 8)
  1302. elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10")
  1303. file(APPEND ${TARGET_CONF_TEMP}
  1304. "#define L1_DATA_SIZE 32768\n"
  1305. "#define L1_DATA_LINESIZE 128\n"
  1306. "#define L2_SIZE 524288\n"
  1307. "#define L2_LINESIZE 128 \n"
  1308. "#define DTB_DEFAULT_ENTRIES 128\n"
  1309. "#define DTB_SIZE 4096\n"
  1310. "#define L2_ASSOCIATIVE 8\n")
  1311. set(SGEMM_UNROLL_M 16)
  1312. set(SGEMM_UNROLL_N 8)
  1313. set(DGEMM_UNROLL_M 16)
  1314. set(DGEMM_UNROLL_N 4)
  1315. set(CGEMM_UNROLL_M 8)
  1316. set(CGEMM_UNROLL_N 4)
  1317. set(ZGEMM_UNROLL_M 8)
  1318. set(ZGEMM_UNROLL_N 2)
  1319. set(SYMV_P 8)
  1320. elseif ("${TCORE}" STREQUAL "GENERIC")
  1321. file(APPEND ${TARGET_CONF_TEMP}
  1322. "#define L1_DATA_SIZE 32768\n"
  1323. "#define L1_DATA_LINESIZE 128\n"
  1324. "#define L2_SIZE 524288\n"
  1325. "#define L2_LINESIZE 128 \n"
  1326. "#define DTB_DEFAULT_ENTRIES 128\n"
  1327. "#define DTB_SIZE 4096\n"
  1328. "#define L2_ASSOCIATIVE 8\n")
  1329. elseif ("${TCORE}" STREQUAL "RISCV64_GENERIC")
  1330. file(APPEND ${TARGET_CONF_TEMP}
  1331. "#define L1_DATA_SIZE 32768\n"
  1332. "#define L1_DATA_LINESIZE 32\n"
  1333. "#define L2_SIZE 1048576\n"
  1334. "#define L2_LINESIZE 32 \n"
  1335. "#define DTB_DEFAULT_ENTRIES 128\n"
  1336. "#define DTB_SIZE 4096\n"
  1337. "#define L2_ASSOCIATIVE 4\n")
  1338. elseif ("${TCORE}" STREQUAL "LOONGSONGENERIC")
  1339. file(APPEND ${TARGET_CONF_TEMP}
  1340. "#define DTB_DEFAULT_ENTRIES 64\n")
  1341. set(SGEMM_UNROLL_M 2)
  1342. set(SGEMM_UNROLL_N 8)
  1343. set(DGEMM_UNROLL_M 2)
  1344. set(DGEMM_UNROLL_N 8)
  1345. set(CGEMM_UNROLL_M 1)
  1346. set(CGEMM_UNROLL_N 4)
  1347. set(ZGEMM_UNROLL_M 1)
  1348. set(ZGEMM_UNROLL_N 4)
  1349. set(CGEMM3M_UNROLL_M 2)
  1350. set(CGEMM3M_UNROLL_N 8)
  1351. set(ZGEMM3M_UNROLL_M 2)
  1352. set(ZGEMM3M_UNROLL_N 8)
  1353. elseif ("${TCORE}" STREQUAL "LOONGSON2K1000")
  1354. file(APPEND ${TARGET_CONF_TEMP}
  1355. "#define DTB_DEFAULT_ENTRIES 64\n")
  1356. set(HAVE_LSX 1)
  1357. set(SGEMM_UNROLL_M 2)
  1358. set(SGEMM_UNROLL_N 8)
  1359. set(DGEMM_UNROLL_M 8)
  1360. set(DGEMM_UNROLL_N 4)
  1361. set(CGEMM_UNROLL_M 8)
  1362. set(CGEMM_UNROLL_N 4)
  1363. set(ZGEMM_UNROLL_M 4)
  1364. set(ZGEMM_UNROLL_N 4)
  1365. set(CGEMM3M_UNROLL_M 2)
  1366. set(CGEMM3M_UNROLL_N 8)
  1367. set(ZGEMM3M_UNROLL_M 8)
  1368. set(ZGEMM3M_UNROLL_N 4)
  1369. elseif ("${TCORE}" STREQUAL "LOONGSON3R5")
  1370. file(APPEND ${TARGET_CONF_TEMP}
  1371. "#define DTB_DEFAULT_ENTRIES 64\n")
  1372. set(HAVE_LASX 1)
  1373. set(HAVE_LSX 1)
  1374. set(SGEMM_UNROLL_M 16)
  1375. set(SGEMM_UNROLL_N 8)
  1376. set(DGEMM_UNROLL_M 16)
  1377. set(DGEMM_UNROLL_N 6)
  1378. set(CGEMM_UNROLL_M 16)
  1379. set(CGEMM_UNROLL_N 4)
  1380. set(ZGEMM_UNROLL_M 8)
  1381. set(ZGEMM_UNROLL_N 4)
  1382. set(CGEMM3M_UNROLL_M 16)
  1383. set(CGEMM3M_UNROLL_N 8)
  1384. set(ZGEMM3M_UNROLL_M 16)
  1385. set(ZGEMM3M_UNROLL_N 6)
  1386. endif()
  1387. set(SBGEMM_UNROLL_M 8)
  1388. set(SBGEMM_UNROLL_N 4)
  1389. # Or should this actually be NUM_CORES?
  1390. if (${NUM_THREADS} GREATER 0)
  1391. file(APPEND ${TARGET_CONF_TEMP} "#define NUM_CORES\t${NUM_THREADS}\n")
  1392. endif()
  1393. # GetArch_2nd
  1394. foreach(float_char S;D;Q;C;Z;X)
  1395. if (NOT DEFINED ${float_char}GEMM_UNROLL_M)
  1396. set(${float_char}GEMM_UNROLL_M 2)
  1397. endif()
  1398. if (NOT DEFINED ${float_char}GEMM_UNROLL_N)
  1399. set(${float_char}GEMM_UNROLL_N 2)
  1400. endif()
  1401. endforeach()
  1402. file(APPEND ${TARGET_CONF_TEMP}
  1403. "#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n")
  1404. # Move to where gen_config_h would place it
  1405. file(MAKE_DIRECTORY ${TARGET_CONF_DIR})
  1406. file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}")
  1407. else(NOT CMAKE_CROSSCOMPILING)
  1408. # compile getarch
  1409. set(GETARCH_SRC
  1410. ${PROJECT_SOURCE_DIR}/getarch.c
  1411. ${CPUIDEMU}
  1412. )
  1413. if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
  1414. #Use generic for MSVC now
  1415. message(STATUS "MSVC")
  1416. set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
  1417. else()
  1418. list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
  1419. if (DEFINED TARGET_CORE)
  1420. set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_${TARGET_CORE})
  1421. endif ()
  1422. endif ()
  1423. if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1424. # disable WindowsStore strict CRT checks
  1425. set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
  1426. endif ()
  1427. set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
  1428. set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
  1429. file(MAKE_DIRECTORY "${GETARCH_DIR}")
  1430. configure_file("${TARGET_CONF_TEMP}" "${GETARCH_DIR}/${TARGET_CONF}" COPYONLY)
  1431. if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1432. if (CMAKE_ASM_COMPILER_ID STREQUAL "")
  1433. try_compile(GETARCH_RESULT "${GETARCH_DIR}"
  1434. SOURCES ${GETARCH_SRC}
  1435. CMAKE_FLAGS "-DCMAKE_ASM_COMPILER=${CMAKE_C_COMPILER}"
  1436. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1437. OUTPUT_VARIABLE GETARCH_LOG
  1438. COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
  1439. )
  1440. else()
  1441. try_compile(GETARCH_RESULT "${GETARCH_DIR}"
  1442. SOURCES ${GETARCH_SRC}
  1443. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1444. OUTPUT_VARIABLE GETARCH_LOG
  1445. COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
  1446. )
  1447. endif()
  1448. if (NOT ${GETARCH_RESULT})
  1449. MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
  1450. endif ()
  1451. endif ()
  1452. unset (HAVE_AVX2)
  1453. unset (HAVE_AVX)
  1454. unset (HAVE_FMA3)
  1455. unset (HAVE_MMX)
  1456. unset (HAVE_SSE)
  1457. unset (HAVE_SSE2)
  1458. unset (HAVE_SSE3)
  1459. unset (HAVE_SSSE3)
  1460. unset (HAVE_SSE4A)
  1461. unset (HAVE_SSE4_1)
  1462. unset (HAVE_SSE4_2)
  1463. unset (HAVE_NEON)
  1464. unset (HAVE_VFP)
  1465. unset (HAVE_VFPV3)
  1466. unset (HAVE_VFPV4)
  1467. message(STATUS "Running getarch")
  1468. # use the cmake binary w/ the -E param to run a shell command in a cross-platform way
  1469. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
  1470. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
  1471. message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
  1472. # append config data from getarch to the TARGET file and read in CMake vars
  1473. file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH_CONF_OUT})
  1474. ParseGetArchVars(${GETARCH_MAKE_OUT})
  1475. set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
  1476. set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
  1477. file(MAKE_DIRECTORY "${GETARCH2_DIR}")
  1478. configure_file("${TARGET_CONF_TEMP}" "${GETARCH2_DIR}/${TARGET_CONF}" COPYONLY)
  1479. if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1480. try_compile(GETARCH2_RESULT "${GETARCH2_DIR}"
  1481. SOURCES "${PROJECT_SOURCE_DIR}/getarch_2nd.c"
  1482. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1483. OUTPUT_VARIABLE GETARCH2_LOG
  1484. COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}"
  1485. )
  1486. if (NOT ${GETARCH2_RESULT})
  1487. MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
  1488. endif ()
  1489. endif ()
  1490. # use the cmake binary w/ the -E param to run a shell command in a cross-platform way
  1491. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
  1492. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
  1493. # append config data from getarch_2nd to the TARGET file and read in CMake vars
  1494. file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH2_CONF_OUT})
  1495. configure_file("${TARGET_CONF_TEMP}" "${TARGET_CONF_DIR}/${TARGET_CONF}" COPYONLY)
  1496. ParseGetArchVars(${GETARCH2_MAKE_OUT})
  1497. endif()