You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prebuild.cmake 45 kB

5 years ago
5 years ago
2 years ago
2 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439
  1. ##
  2. ## Author: Hank Anderson <hank@statease.com>
  3. ## Description: Ported from OpenBLAS/Makefile.prebuild
  4. ## This is triggered by system.cmake and runs before any of the code is built.
  5. ## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
  6. ## Next it runs f_check and appends some fortran information to the files.
  7. ## Then it runs getarch and getarch_2nd for even more environment information.
  8. ## Finally it builds gen_config_h for use at build time to generate config.h.
  9. # CMake vars set by this file:
  10. # CORE
  11. # LIBCORE
  12. # NUM_CORES
  13. # HAVE_MMX
  14. # HAVE_SSE
  15. # HAVE_SSE2
  16. # HAVE_SSE3
  17. # MAKE
  18. # SBGEMM_UNROLL_M
  19. # SBGEMM_UNROLL_N
  20. # SGEMM_UNROLL_M
  21. # SGEMM_UNROLL_N
  22. # DGEMM_UNROLL_M
  23. # DGEMM_UNROLL_M
  24. # QGEMM_UNROLL_N
  25. # QGEMM_UNROLL_N
  26. # CGEMM_UNROLL_M
  27. # CGEMM_UNROLL_M
  28. # ZGEMM_UNROLL_N
  29. # ZGEMM_UNROLL_N
  30. # XGEMM_UNROLL_M
  31. # XGEMM_UNROLL_N
  32. # CGEMM3M_UNROLL_M
  33. # CGEMM3M_UNROLL_N
  34. # ZGEMM3M_UNROLL_M
  35. # ZGEMM3M_UNROLL_M
  36. # XGEMM3M_UNROLL_N
  37. # XGEMM3M_UNROLL_N
  38. # CPUIDEMU = ../../cpuid/table.o
  39. if (DEFINED CPUIDEMU)
  40. set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
  41. endif ()
  42. if (BUILD_KERNEL)
  43. # set the C flags for just this file
  44. set(GETARCH2_FLAGS "-DBUILD_KERNEL")
  45. set(TARGET_CONF "config_kernel.h")
  46. set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE})
  47. else()
  48. set(TARGET_CONF "config.h")
  49. set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR})
  50. endif ()
  51. set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp")
  52. # c_check
  53. set(FU "")
  54. if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
  55. set(FU "_")
  56. endif()
  57. if(MINGW AND NOT MINGW64)
  58. set(FU "_")
  59. endif()
  60. set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
  61. if (${COMPILER_ID} STREQUAL "GNU")
  62. set(COMPILER_ID "GCC")
  63. endif ()
  64. string(TOUPPER ${ARCH} UC_ARCH)
  65. file(WRITE ${TARGET_CONF_TEMP}
  66. "#define OS_${HOST_OS}\t1\n"
  67. "#define ARCH_${UC_ARCH}\t1\n"
  68. "#define C_${COMPILER_ID}\t1\n"
  69. "#define __${BINARY}BIT__\t1\n"
  70. "#define FUNDERSCORE\t${FU}\n")
  71. if (${HOST_OS} STREQUAL "WINDOWSSTORE")
  72. file(APPEND ${TARGET_CONF_TEMP}
  73. "#define OS_WINNT\t1\n")
  74. endif ()
  75. # f_check
  76. if (NOT NOFORTRAN)
  77. include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
  78. else ()
  79. file(APPEND ${TARGET_CONF_TEMP}
  80. "#define BUNDERSCORE _\n"
  81. "#define NEEDBUNDERSCORE 1\n")
  82. set(BU "_")
  83. endif ()
  84. # Cannot run getarch on target if we are cross-compiling
  85. if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE"))
  86. # Write to config as getarch would
  87. if (DEFINED TARGET_CORE)
  88. set(TCORE ${TARGET_CORE})
  89. else()
  90. set(TCORE ${CORE})
  91. endif()
  92. # TODO: Set up defines that getarch sets up based on every other target
  93. # Perhaps this should be inside a different file as it grows larger
  94. file(APPEND ${TARGET_CONF_TEMP}
  95. "#define ${TCORE}\n"
  96. "#define CORE_${TCORE}\n"
  97. "#define CHAR_CORENAME \"${TCORE}\"\n")
  98. if ("${TCORE}" STREQUAL "CORE2")
  99. file(APPEND ${TARGET_CONF_TEMP}
  100. "#define L1_DATA_SIZE\t32768\n"
  101. "#define L1_DATA_LINESIZE\t64\n"
  102. "#define L2_SIZE\t1048576\n"
  103. "#define L2_LINESIZE\t64\n"
  104. "#define DTB_DEFAULT_ENTRIES\t256\n"
  105. "#define DTB_SIZE\t4096\n"
  106. "#define HAVE_CMOV\n"
  107. "#define HAVE_MMX\n"
  108. "#define HAVE_SSE\n"
  109. "#define HAVE_SSE2\n"
  110. "#define HAVE_SSE3\n"
  111. "#define HAVE_SSSE3\n"
  112. "#define SLOCAL_BUFFER_SIZE\t16384\n"
  113. "#define DLOCAL_BUFFER_SIZE\t16384\n"
  114. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  115. "#define ZLOCAL_BUFFER_SIZE\t16384\n")
  116. set(HAVE_SSE 1)
  117. set(HAVE_SSE2 1)
  118. set(HAVE_SSE3 1)
  119. set(HAVE_SSSE3 1)
  120. set(SBGEMM_UNROLL_M 8)
  121. set(SBGEMM_UNROLL_N 4)
  122. set(SGEMM_UNROLL_M 8)
  123. set(SGEMM_UNROLL_N 4)
  124. set(DGEMM_UNROLL_M 4)
  125. set(DGEMM_UNROLL_N 4)
  126. set(CGEMM_UNROLL_M 4)
  127. set(CGEMM_UNROLL_N 2)
  128. set(ZGEMM_UNROLL_M 2)
  129. set(ZGEMM_UNROLL_N 2)
  130. set(CGEMM3M_UNROLL_M 8)
  131. set(CGEMM3M_UNROLL_N 4)
  132. set(ZGEMM3M_UNROLL_M 4)
  133. set(ZGEMM3M_UNROLL_N 4)
  134. elseif ("${TCORE}" STREQUAL "ATOM")
  135. file(APPEND ${TARGET_CONF_TEMP}
  136. "#define L1_DATA_SIZE\t24576\n"
  137. "#define L1_DATA_LINESIZE\t64\n"
  138. "#define L2_SIZE\t524288\n"
  139. "#define L2_LINESIZE\t64\n"
  140. "#define DTB_DEFAULT_ENTRIES\t64\n"
  141. "#define DTB_SIZE\t4096\n"
  142. "#define HAVE_CMOV\n"
  143. "#define HAVE_MMX\n"
  144. "#define HAVE_SSE\n"
  145. "#define HAVE_SSE2\n"
  146. "#define HAVE_SSE3\n"
  147. "#define HAVE_SSSE3\n"
  148. "#define SLOCAL_BUFFER_SIZE\t16384\n"
  149. "#define DLOCAL_BUFFER_SIZE\t8192\n"
  150. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  151. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  152. set(HAVE_SSE 1)
  153. set(HAVE_SSE2 1)
  154. set(HAVE_SSE3 1)
  155. set(HAVE_SSSE3 1)
  156. set(SBGEMM_UNROLL_M 8)
  157. set(SBGEMM_UNROLL_N 4)
  158. set(SGEMM_UNROLL_M 8)
  159. set(SGEMM_UNROLL_N 4)
  160. set(DGEMM_UNROLL_M 4)
  161. set(DGEMM_UNROLL_N 2)
  162. set(CGEMM_UNROLL_M 4)
  163. set(CGEMM_UNROLL_N 2)
  164. set(ZGEMM_UNROLL_M 2)
  165. set(ZGEMM_UNROLL_N 1)
  166. set(CGEMM3M_UNROLL_M 8)
  167. set(CGEMM3M_UNROLL_N 4)
  168. set(ZGEMM3M_UNROLL_M 4)
  169. set(ZGEMM3M_UNROLL_N 4)
  170. elseif ("${TCORE}" STREQUAL "PRESCOTT")
  171. file(APPEND ${TARGET_CONF_TEMP}
  172. "#define L1_DATA_SIZE\t16384\n"
  173. "#define L1_DATA_LINESIZE\t64\n"
  174. "#define L2_SIZE\t1048576\n"
  175. "#define L2_LINESIZE\t64\n"
  176. "#define DTB_DEFAULT_ENTRIES\t64\n"
  177. "#define DTB_SIZE\t4096\n"
  178. "#define HAVE_CMOV\n"
  179. "#define HAVE_MMX\n"
  180. "#define HAVE_SSE\n"
  181. "#define HAVE_SSE2\n"
  182. "#define HAVE_SSE3\n"
  183. "#define SLOCAL_BUFFER_SIZE\t8192\n"
  184. "#define DLOCAL_BUFFER_SIZE\t8192\n"
  185. "#define CLOCAL_BUFFER_SIZE\t8192\n"
  186. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  187. set(HAVE_SSE 1)
  188. set(HAVE_SSE2 1)
  189. set(HAVE_SSE3 1)
  190. set(SBGEMM_UNROLL_M 8)
  191. set(SBGEMM_UNROLL_N 4)
  192. set(SGEMM_UNROLL_M 8)
  193. set(SGEMM_UNROLL_N 4)
  194. set(DGEMM_UNROLL_M 4)
  195. set(DGEMM_UNROLL_N 4)
  196. set(CGEMM_UNROLL_M 4)
  197. set(CGEMM_UNROLL_N 2)
  198. set(ZGEMM_UNROLL_M 2)
  199. set(ZGEMM_UNROLL_N 2)
  200. set(CGEMM3M_UNROLL_M 8)
  201. set(CGEMM3M_UNROLL_N 4)
  202. set(ZGEMM3M_UNROLL_M 4)
  203. set(ZGEMM3M_UNROLL_N 4)
  204. elseif ("${TCORE}" STREQUAL "NEHALEM")
  205. file(APPEND ${TARGET_CONF_TEMP}
  206. "#define L1_DATA_SIZE\t32768\n"
  207. "#define L1_DATA_LINESIZE\t64\n"
  208. "#define L2_SIZE\t262144\n"
  209. "#define L2_LINESIZE\t64\n"
  210. "#define DTB_DEFAULT_ENTRIES\t64\n"
  211. "#define DTB_SIZE\t4096\n"
  212. "#define HAVE_CMOV\n"
  213. "#define HAVE_MMX\n"
  214. "#define HAVE_SSE\n"
  215. "#define HAVE_SSE2\n"
  216. "#define HAVE_SSE3\n"
  217. "#define HAVE_SSSE3\n"
  218. "#define HAVE_SSE4_1\n"
  219. "#define HAVE_SSE4_2\n"
  220. "#define SLOCAL_BUFFER_SIZE\t65535\n"
  221. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  222. "#define CLOCAL_BUFFER_SIZE\t65536\n"
  223. "#define ZLOCAL_BUFFER_SIZE\t32768\n")
  224. set(HAVE_SSE 1)
  225. set(HAVE_SSE2 1)
  226. set(HAVE_SSE3 1)
  227. set(HAVE_SSSE3 1)
  228. set(HAVE_SSE4_1 1)
  229. set(HAVE_SSE4_2 1)
  230. set(SBGEMM_UNROLL_M 8)
  231. set(SBGEMM_UNROLL_N 4)
  232. set(SGEMM_UNROLL_M 4)
  233. set(SGEMM_UNROLL_N 8)
  234. set(DGEMM_UNROLL_M 2)
  235. set(DGEMM_UNROLL_N 8)
  236. set(CGEMM_UNROLL_M 2)
  237. set(CGEMM_UNROLL_N 4)
  238. set(ZGEMM_UNROLL_M 1)
  239. set(ZGEMM_UNROLL_N 4)
  240. set(CGEMM3M_UNROLL_M 4)
  241. set(CGEMM3M_UNROLL_N 8)
  242. set(ZGEMM3M_UNROLL_M 2)
  243. set(ZGEMM3M_UNROLL_N 8)
  244. elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
  245. file(APPEND ${TARGET_CONF_TEMP}
  246. "#define L1_DATA_SIZE\t32768\n"
  247. "#define L1_DATA_LINESIZE\t64\n"
  248. "#define L2_SIZE\t262144\n"
  249. "#define L2_LINESIZE\t64\n"
  250. "#define DTB_DEFAULT_ENTRIES\t64\n"
  251. "#define DTB_SIZE\t4096\n"
  252. "#define HAVE_CMOV\n"
  253. "#define HAVE_MMX\n"
  254. "#define HAVE_SSE\n"
  255. "#define HAVE_SSE2\n"
  256. "#define HAVE_SSE3\n"
  257. "#define HAVE_SSSE3\n"
  258. "#define HAVE_SSE4_1\n"
  259. "#define HAVE_SSE4_2\n"
  260. "#define HAVE_AVX\n"
  261. "#define SLOCAL_BUFFER_SIZE\t24576\n"
  262. "#define DLOCAL_BUFFER_SIZE\t16384\n"
  263. "#define CLOCAL_BUFFER_SIZE\t32768\n"
  264. "#define ZLOCAL_BUFFER_SIZE\t24576\n")
  265. set(HAVE_SSE 1)
  266. set(HAVE_SSE2 1)
  267. set(HAVE_SSE3 1)
  268. set(HAVE_SSSE3 1)
  269. set(HAVE_SSE4_1 1)
  270. set(HAVE_SSE4_2 1)
  271. set(HAVE_AVX 1)
  272. set(SBGEMM_UNROLL_M 8)
  273. set(SBGEMM_UNROLL_N 4)
  274. set(SGEMM_UNROLL_M 16)
  275. set(SGEMM_UNROLL_N 4)
  276. set(DGEMM_UNROLL_M 8)
  277. set(DGEMM_UNROLL_N 4)
  278. set(CGEMM_UNROLL_M 8)
  279. set(CGEMM_UNROLL_N 2)
  280. set(ZGEMM_UNROLL_M 1)
  281. set(ZGEMM_UNROLL_N 4)
  282. set(CGEMM3M_UNROLL_M 4)
  283. set(CGEMM3M_UNROLL_N 8)
  284. set(ZGEMM3M_UNROLL_M 2)
  285. set(ZGEMM3M_UNROLL_N 8)
  286. elseif ("${TCORE}" STREQUAL "HASWELL")
  287. file(APPEND ${TARGET_CONF_TEMP}
  288. "#define L1_DATA_SIZE\t32768\n"
  289. "#define L1_DATA_LINESIZE\t64\n"
  290. "#define L2_SIZE\t262144\n"
  291. "#define L2_LINESIZE\t64\n"
  292. "#define DTB_DEFAULT_ENTRIES\t64\n"
  293. "#define DTB_SIZE\t4096\n"
  294. "#define HAVE_CMOV\n"
  295. "#define HAVE_MMX\n"
  296. "#define HAVE_SSE\n"
  297. "#define HAVE_SSE2\n"
  298. "#define HAVE_SSE3\n"
  299. "#define HAVE_SSSE3\n"
  300. "#define HAVE_SSE4_1\n"
  301. "#define HAVE_SSE4_2\n"
  302. "#define HAVE_AVX\n"
  303. "#define HAVE_AVX2\n"
  304. "#define HAVE_FMA3\n"
  305. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  306. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  307. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  308. "#define ZLOCAL_BUFFER_SIZE\t12288\n")
  309. set(HAVE_SSE 1)
  310. set(HAVE_SSE2 1)
  311. set(HAVE_SSE3 1)
  312. set(HAVE_SSSE3 1)
  313. set(HAVE_SSE4_1 1)
  314. set(HAVE_SSE4_2 1)
  315. set(HAVE_AVX 1)
  316. set(HAVE_AVX2 1)
  317. set(HAVE_FMA3 1)
  318. set(SBGEMM_UNROLL_M 8)
  319. set(SBGEMM_UNROLL_N 4)
  320. set(SGEMM_UNROLL_M 8)
  321. set(SGEMM_UNROLL_N 4)
  322. set(DGEMM_UNROLL_M 4)
  323. set(DGEMM_UNROLL_N 8)
  324. set(CGEMM_UNROLL_M 8)
  325. set(CGEMM_UNROLL_N 2)
  326. set(ZGEMM_UNROLL_M 4)
  327. set(ZGEMM_UNROLL_N 2)
  328. set(CGEMM3M_UNROLL_M 8)
  329. set(CGEMM3M_UNROLL_N 4)
  330. set(ZGEMM3M_UNROLL_M 4)
  331. set(ZGEMM3M_UNROLL_N 4)
  332. elseif ("${TCORE}" STREQUAL "SKYLAKEX")
  333. file(APPEND ${TARGET_CONF_TEMP}
  334. "#define L1_DATA_SIZE\t32768\n"
  335. "#define L1_DATA_LINESIZE\t64\n"
  336. "#define L2_SIZE\t262144\n"
  337. "#define L2_LINESIZE\t64\n"
  338. "#define DTB_DEFAULT_ENTRIES\t64\n"
  339. "#define DTB_SIZE\t4096\n"
  340. "#define HAVE_CMOV\n"
  341. "#define HAVE_MMX\n"
  342. "#define HAVE_SSE\n"
  343. "#define HAVE_SSE2\n"
  344. "#define HAVE_SSE3\n"
  345. "#define HAVE_SSSE3\n"
  346. "#define HAVE_SSE4_1\n"
  347. "#define HAVE_SSE4_2\n"
  348. "#define HAVE_AVX\n"
  349. "#define HAVE_AVX2\n"
  350. "#define HAVE_FMA3\n"
  351. "#define HAVE_AVX512VL\n"
  352. "#define SLOCAL_BUFFER_SIZE\t28672\n"
  353. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  354. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  355. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  356. set(HAVE_CMOV 1)
  357. set(HAVE_MMX 1)
  358. set(HAVE_SSE 1)
  359. set(HAVE_SSE2 1)
  360. set(HAVE_SSE3 1)
  361. set(HAVE_SSSE3 1)
  362. set(HAVE_SSE4_1 1)
  363. set(HAVE_SSE4_2 1)
  364. set(HAVE_AVX 1)
  365. set(HAVE_AVX2 1)
  366. set(HAVE_FMA3 1)
  367. set(HAVE_AVX512VL 1)
  368. set(SBGEMM_UNROLL_M 8)
  369. set(SBGEMM_UNROLL_N 4)
  370. set(SGEMM_UNROLL_M 16)
  371. set(SGEMM_UNROLL_N 4)
  372. set(DGEMM_UNROLL_M 16)
  373. set(DGEMM_UNROLL_N 2)
  374. set(CGEMM_UNROLL_M 8)
  375. set(CGEMM_UNROLL_N 2)
  376. set(ZGEMM_UNROLL_M 4)
  377. set(ZGEMM_UNROLL_N 2)
  378. set(CGEMM3M_UNROLL_M 8)
  379. set(CGEMM3M_UNROLL_N 4)
  380. set(ZGEMM3M_UNROLL_M 4)
  381. set(ZGEMM3M_UNROLL_N 4)
  382. elseif ("${TCORE}" STREQUAL "COOPERLAKE")
  383. file(APPEND ${TARGET_CONF_TEMP}
  384. "#define L1_DATA_SIZE\t32768\n"
  385. "#define L1_DATA_LINESIZE\t64\n"
  386. "#define L2_SIZE\t262144\n"
  387. "#define L2_LINESIZE\t64\n"
  388. "#define DTB_DEFAULT_ENTRIES\t64\n"
  389. "#define DTB_SIZE\t4096\n"
  390. "#define HAVE_CMOV\n"
  391. "#define HAVE_MMX\n"
  392. "#define HAVE_SSE\n"
  393. "#define HAVE_SSE2\n"
  394. "#define HAVE_SSE3\n"
  395. "#define HAVE_SSSE3\n"
  396. "#define HAVE_SSE4_1\n"
  397. "#define HAVE_SSE4_2\n"
  398. "#define HAVE_AVX\n"
  399. "#define HAVE_AVX2\n"
  400. "#define HAVE_FMA3\n"
  401. "#define HAVE_AVX512VL\n"
  402. "#define HAVE_AVX512BF16\n"
  403. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  404. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  405. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  406. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  407. set(HAVE_CMOV 1)
  408. set(HAVE_MMX 1)
  409. set(HAVE_SSE 1)
  410. set(HAVE_SSE2 1)
  411. set(HAVE_SSE3 1)
  412. set(HAVE_SSSE3 1)
  413. set(HAVE_SSE4_1 1)
  414. set(HAVE_SSE4_2 1)
  415. set(HAVE_AVX 1)
  416. set(HAVE_AVX2 1)
  417. set(HAVE_FMA3 1)
  418. set(HAVE_AVX512VL 1)
  419. set(HAVE_AVX512BF16 1)
  420. set(SBGEMM_UNROLL_M 16)
  421. set(SBGEMM_UNROLL_N 4)
  422. set(SGEMM_UNROLL_M 16)
  423. set(SGEMM_UNROLL_N 4)
  424. set(DGEMM_UNROLL_M 16)
  425. set(DGEMM_UNROLL_N 2)
  426. set(CGEMM_UNROLL_M 8)
  427. set(CGEMM_UNROLL_N 2)
  428. set(ZGEMM_UNROLL_M 4)
  429. set(ZGEMM_UNROLL_N 2)
  430. set(CGEMM3M_UNROLL_M 8)
  431. set(CGEMM3M_UNROLL_N 4)
  432. set(ZGEMM3M_UNROLL_M 4)
  433. set(ZGEMM3M_UNROLL_N 4)
  434. elseif ("${TCORE}" STREQUAL "SAPPHIRERAPIDS")
  435. file(APPEND ${TARGET_CONF_TEMP}
  436. "#define L1_DATA_SIZE\t32768\n"
  437. "#define L1_DATA_LINESIZE\t64\n"
  438. "#define L2_SIZE\t262144\n"
  439. "#define L2_LINESIZE\t64\n"
  440. "#define DTB_DEFAULT_ENTRIES\t64\n"
  441. "#define DTB_SIZE\t4096\n"
  442. "#define HAVE_CMOV\n"
  443. "#define HAVE_MMX\n"
  444. "#define HAVE_SSE\n"
  445. "#define HAVE_SSE2\n"
  446. "#define HAVE_SSE3\n"
  447. "#define HAVE_SSSE3\n"
  448. "#define HAVE_SSE4_1\n"
  449. "#define HAVE_SSE4_2\n"
  450. "#define HAVE_AVX\n"
  451. "#define HAVE_AVX2\n"
  452. "#define HAVE_FMA3\n"
  453. "#define HAVE_AVX512VL\n"
  454. "#define HAVE_AVX512BF16\n"
  455. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  456. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  457. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  458. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  459. set(HAVE_CMOV 1)
  460. set(HAVE_MMX 1)
  461. set(HAVE_SSE 1)
  462. set(HAVE_SSE2 1)
  463. set(HAVE_SSE3 1)
  464. set(HAVE_SSSE3 1)
  465. set(HAVE_SSE4_1 1)
  466. set(HAVE_SSE4_2 1)
  467. set(HAVE_AVX 1)
  468. set(HAVE_AVX2 1)
  469. set(HAVE_FMA3 1)
  470. set(HAVE_AVX512VL 1)
  471. set(HAVE_AVX512BF16 1)
  472. set(SBGEMM_UNROLL_M 32)
  473. set(SBGEMM_UNROLL_N 16)
  474. set(SGEMM_UNROLL_M 16)
  475. set(SGEMM_UNROLL_N 4)
  476. set(DGEMM_UNROLL_M 16)
  477. set(DGEMM_UNROLL_N 2)
  478. set(CGEMM_UNROLL_M 8)
  479. set(CGEMM_UNROLL_N 2)
  480. set(ZGEMM_UNROLL_M 4)
  481. set(ZGEMM_UNROLL_N 2)
  482. set(CGEMM3M_UNROLL_M 8)
  483. set(CGEMM3M_UNROLL_N 4)
  484. set(ZGEMM3M_UNROLL_M 4)
  485. set(ZGEMM3M_UNROLL_N 4)
  486. elseif ("${TCORE}" STREQUAL "OPTERON")
  487. file(APPEND ${TARGET_CONF_TEMP}
  488. "#define L1_DATA_SIZE\t65536\n"
  489. "#define L1_DATA_LINESIZE\t64\n"
  490. "#define L2_SIZE\t1048576\n"
  491. "#define L2_LINESIZE\t64\n"
  492. "#define DTB_DEFAULT_ENTRIES\t32\n"
  493. "#define DTB_SIZE\t4096\n"
  494. "#define HAVE_3DNOW\n"
  495. "#define HAVE_3DNOWEX\n"
  496. "#define HAVE_MMX\n"
  497. "#define HAVE_SSE\n"
  498. "#define HAVE_SSE2\n"
  499. "#define SLOCAL_BUFFER_SIZE\t15360\n"
  500. "#define DLOCAL_BUFFER_SIZE\t15360\n"
  501. "#define CLOCAL_BUFFER_SIZE\t15360\n"
  502. "#define ZLOCAL_BUFFER_SIZE\t15360\n")
  503. set(HAVE_3DNOW 1)
  504. set(HAVE_3DNOWEX 1)
  505. set(HAVE_MMX 1)
  506. set(HAVE_SSE 1)
  507. set(HAVE_SSE2 1)
  508. set(SBGEMM_UNROLL_M 8)
  509. set(SBGEMM_UNROLL_N 4)
  510. set(SGEMM_UNROLL_M 8)
  511. set(SGEMM_UNROLL_N 4)
  512. set(DGEMM_UNROLL_M 4)
  513. set(DGEMM_UNROLL_N 4)
  514. set(CGEMM_UNROLL_M 4)
  515. set(CGEMM_UNROLL_N 2)
  516. set(ZGEMM_UNROLL_M 2)
  517. set(ZGEMM_UNROLL_N 2)
  518. set(CGEMM3M_UNROLL_M 8)
  519. set(CGEMM3M_UNROLL_N 4)
  520. set(ZGEMM3M_UNROLL_M 4)
  521. set(ZGEMM3M_UNROLL_N 4)
  522. elseif ("${TCORE}" STREQUAL "BARCELONA")
  523. file(APPEND ${TARGET_CONF_TEMP}
  524. "#define L1_DATA_SIZE\t32768\n"
  525. "#define L1_DATA_LINESIZE\t64\n"
  526. "#define L2_SIZE\t524288\n"
  527. "#define L2_LINESIZE\t64\n"
  528. "#define DTB_DEFAULT_ENTRIES\t64\n"
  529. "#define DTB_SIZE\t4096\n"
  530. "#define HAVE_MMX\n"
  531. "#define HAVE_SSE\n"
  532. "#define HAVE_SSE2\n"
  533. "#define HAVE_SSE3\n"
  534. "#define HAVE_SSE4A\n"
  535. "#define HAVE_MISALIGNSSE\n"
  536. "#define HAVE_128BITFPU\n"
  537. "#define HAVE_FASTMOVU\n"
  538. "#define SLOCAL_BUFFER_SIZE\t14336\n"
  539. "#define DLOCAL_BUFFER_SIZE\t14336\n"
  540. "#define CLOCAL_BUFFER_SIZE\t14336\n"
  541. "#define ZLOCAL_BUFFER_SIZE\t14336\n")
  542. set(HAVE_SSE 1)
  543. set(HAVE_SSE2 1)
  544. set(HAVE_SSE3 1)
  545. set(HAVE_SSE4A 1)
  546. set(HAVE_MISALIGNSSE 1)
  547. set(HAVE_128BITFPU 1)
  548. set(HAVE_FASTMOVU 1)
  549. set(SBGEMM_UNROLL_M 8)
  550. set(SBGEMM_UNROLL_N 4)
  551. set(SGEMM_UNROLL_M 8)
  552. set(SGEMM_UNROLL_N 4)
  553. set(DGEMM_UNROLL_M 4)
  554. set(DGEMM_UNROLL_N 4)
  555. set(CGEMM_UNROLL_M 4)
  556. set(CGEMM_UNROLL_N 2)
  557. set(ZGEMM_UNROLL_M 2)
  558. set(ZGEMM_UNROLL_N 2)
  559. set(CGEMM3M_UNROLL_M 8)
  560. set(CGEMM3M_UNROLL_N 4)
  561. set(ZGEMM3M_UNROLL_M 4)
  562. set(ZGEMM3M_UNROLL_N 4)
  563. elseif ("${TCORE}" STREQUAL "BULLDOZER")
  564. file(APPEND ${TARGET_CONF_TEMP}
  565. "#define L1_DATA_SIZE\t49152\n"
  566. "#define L1_DATA_LINESIZE\t64\n"
  567. "#define L2_SIZE\t1024000\n"
  568. "#define L2_LINESIZE\t64\n"
  569. "#define DTB_DEFAULT_ENTRIES\t32\n"
  570. "#define DTB_SIZE\t4096\n"
  571. "#define HAVE_MMX\n"
  572. "#define HAVE_SSE\n"
  573. "#define HAVE_SSE2\n"
  574. "#define HAVE_SSE3\n"
  575. "#define HAVE_SSE4A\n"
  576. "#define HAVE_AVX\n"
  577. "#define HAVE_MISALIGNSSE\n"
  578. "#define HAVE_128BITFPU\n"
  579. "#define HAVE_FASTMOVU\n"
  580. "#define SLOCAL_BUFFER_SIZE\t5376\n"
  581. "#define DLOCAL_BUFFER_SIZE\t5376\n"
  582. "#define CLOCAL_BUFFER_SIZE\t14336\n"
  583. "#define ZLOCAL_BUFFER_SIZE\t14336\n")
  584. set(HAVE_SSE 1)
  585. set(HAVE_SSE2 1)
  586. set(HAVE_SSE3 1)
  587. set(HAVE_SSE4A 1)
  588. set(HAVE_AVX 1)
  589. set(HAVE_MISALIGNSSE 1)
  590. set(HAVE_128BITFPU 1)
  591. set(HAVE_FASTMOVU 1)
  592. set(SBGEMM_UNROLL_M 8)
  593. set(SBGEMM_UNROLL_N 4)
  594. set(SGEMM_UNROLL_M 16)
  595. set(SGEMM_UNROLL_N 2)
  596. set(DGEMM_UNROLL_M 8)
  597. set(DGEMM_UNROLL_N 2)
  598. set(CGEMM_UNROLL_M 2)
  599. set(CGEMM_UNROLL_N 2)
  600. set(ZGEMM_UNROLL_M 2)
  601. set(ZGEMM_UNROLL_N 2)
  602. set(CGEMM3M_UNROLL_M 8)
  603. set(CGEMM3M_UNROLL_N 4)
  604. set(ZGEMM3M_UNROLL_M 4)
  605. set(ZGEMM3M_UNROLL_N 4)
  606. elseif ("${TCORE}" STREQUAL "PILEDRIVER")
  607. file(APPEND ${TARGET_CONF_TEMP}
  608. "#define L1_DATA_SIZE\t16384\n"
  609. "#define L1_DATA_LINESIZE\t64\n"
  610. "#define L2_SIZE\t2097152\n"
  611. "#define L2_LINESIZE\t64\n"
  612. "#define DTB_DEFAULT_ENTRIES\t64\n"
  613. "#define DTB_SIZE\t4096\n"
  614. "#define HAVE_MMX\n"
  615. "#define HAVE_SSE\n"
  616. "#define HAVE_SSE2\n"
  617. "#define HAVE_SSE3\n"
  618. "#define HAVE_SSE4_1\n"
  619. "#define HAVE_SSE4_2\n"
  620. "#define HAVE_SSE4A\n"
  621. "#define HAVE_AVX\n"
  622. "#define HAVE_MISALIGNSSE\n"
  623. "#define HAVE_128BITFPU\n"
  624. "#define HAVE_FASTMOVU\n"
  625. "#define HAVE_CFLUSH\n"
  626. "#define HAVE_FMA3\n"
  627. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  628. "#define DLOCAL_BUFFER_SIZE\t5376\n"
  629. "#define CLOCAL_BUFFER_SIZE\t10752\n"
  630. "#define ZLOCAL_BUFFER_SIZE\t10752\n")
  631. set(HAVE_SSE 1)
  632. set(HAVE_SSE2 1)
  633. set(HAVE_SSE3 1)
  634. set(HAVE_SSE4_1 1)
  635. set(HAVE_SSE4_2 1)
  636. set(HAVE_SSE4A 1)
  637. set(HAVE_AVX 1)
  638. set(HAVE_FMA3 1)
  639. set(HAVE_MISALIGNSSE 1)
  640. set(HAVE_128BITFPU 1)
  641. set(HAVE_FASTMOVU 1)
  642. set(HAVE_CFLUSH 1)
  643. set(SBGEMM_UNROLL_M 8)
  644. set(SBGEMM_UNROLL_N 4)
  645. set(SGEMM_UNROLL_M 16)
  646. set(SGEMM_UNROLL_N 2)
  647. set(DGEMM_UNROLL_M 8)
  648. set(DGEMM_UNROLL_N 2)
  649. set(CGEMM_UNROLL_M 4)
  650. set(CGEMM_UNROLL_N 2)
  651. set(ZGEMM_UNROLL_M 2)
  652. set(ZGEMM_UNROLL_N 2)
  653. set(CGEMM3M_UNROLL_M 8)
  654. set(CGEMM3M_UNROLL_N 4)
  655. set(ZGEMM3M_UNROLL_M 4)
  656. set(ZGEMM3M_UNROLL_N 4)
  657. elseif ("${TCORE}" STREQUAL "STEAMROLLER")
  658. file(APPEND ${TARGET_CONF_TEMP}
  659. "#define L1_DATA_SIZE\t16384\n"
  660. "#define L1_DATA_LINESIZE\t64\n"
  661. "#define L2_SIZE\t2097152\n"
  662. "#define L2_LINESIZE\t64\n"
  663. "#define DTB_DEFAULT_ENTRIES\t64\n"
  664. "#define DTB_SIZE\t4096\n"
  665. "#define HAVE_MMX\n"
  666. "#define HAVE_SSE\n"
  667. "#define HAVE_SSE2\n"
  668. "#define HAVE_SSE3\n"
  669. "#define HAVE_SSE4_1\n"
  670. "#define HAVE_SSE4_2\n"
  671. "#define HAVE_SSE4A\n"
  672. "#define HAVE_AVX\n"
  673. "#define HAVE_MISALIGNSSE\n"
  674. "#define HAVE_128BITFPU\n"
  675. "#define HAVE_FASTMOVU\n"
  676. "#define HAVE_CFLUSH\n"
  677. "#define HAVE_FMA3\n"
  678. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  679. "#define DLOCAL_BUFFER_SIZE\t5120\n"
  680. "#define CLOCAL_BUFFER_SIZE\t10240\n"
  681. "#define ZLOCAL_BUFFER_SIZE\t10240\n")
  682. set(HAVE_SSE 1)
  683. set(HAVE_SSE2 1)
  684. set(HAVE_SSE3 1)
  685. set(HAVE_SSE4_1 1)
  686. set(HAVE_SSE4_2 1)
  687. set(HAVE_SSE4A 1)
  688. set(HAVE_AVX 1)
  689. set(HAVE_FMA3 1)
  690. set(HAVE_MISALIGNSSE 1)
  691. set(HAVE_128BITFPU 1)
  692. set(HAVE_FASTMOVU 1)
  693. set(HAVE_CFLUSH 1)
  694. set(SBGEMM_UNROLL_M 8)
  695. set(SBGEMM_UNROLL_N 4)
  696. set(SGEMM_UNROLL_M 16)
  697. set(SGEMM_UNROLL_N 2)
  698. set(DGEMM_UNROLL_M 8)
  699. set(DGEMM_UNROLL_N 2)
  700. set(CGEMM_UNROLL_M 4)
  701. set(CGEMM_UNROLL_N 2)
  702. set(ZGEMM_UNROLL_M 2)
  703. set(ZGEMM_UNROLL_N 2)
  704. set(CGEMM3M_UNROLL_M 8)
  705. set(CGEMM3M_UNROLL_N 4)
  706. set(ZGEMM3M_UNROLL_M 4)
  707. set(ZGEMM3M_UNROLL_N 4)
  708. elseif ("${TCORE}" STREQUAL "EXCAVATOR")
  709. file(APPEND ${TARGET_CONF_TEMP}
  710. "#define L1_DATA_SIZE\t16384\n"
  711. "#define L1_DATA_LINESIZE\t64\n"
  712. "#define L2_SIZE\t2097152\n"
  713. "#define L2_LINESIZE\t64\n"
  714. "#define DTB_DEFAULT_ENTRIES\t64\n"
  715. "#define DTB_SIZE\t4096\n"
  716. "#define HAVE_MMX\n"
  717. "#define HAVE_SSE\n"
  718. "#define HAVE_SSE2\n"
  719. "#define HAVE_SSE3\n"
  720. "#define HAVE_SSE4_1\n"
  721. "#define HAVE_SSE4_2\n"
  722. "#define HAVE_SSE4A\n"
  723. "#define HAVE_AVX\n"
  724. "#define HAVE_MISALIGNSSE\n"
  725. "#define HAVE_128BITFPU\n"
  726. "#define HAVE_FASTMOVU\n"
  727. "#define HAVE_CFLUSH\n"
  728. "#define HAVE_FMA3\n"
  729. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  730. "#define DLOCAL_BUFFER_SIZE\t5120\n"
  731. "#define CLOCAL_BUFFER_SIZE\t10240\n"
  732. "#define ZLOCAL_BUFFER_SIZE\t10240\n")
  733. set(HAVE_SSE 1)
  734. set(HAVE_SSE2 1)
  735. set(HAVE_SSE3 1)
  736. set(HAVE_SSE4_1 1)
  737. set(HAVE_SSE4_2 1)
  738. set(HAVE_SSE4A 1)
  739. set(HAVE_AVX 1)
  740. set(HAVE_FMA3 1)
  741. set(HAVE_MISALIGNSSE 1)
  742. set(HAVE_128BITFPU 1)
  743. set(HAVE_FASTMOVU 1)
  744. set(HAVE_CFLUSH 1)
  745. set(SBGEMM_UNROLL_M 8)
  746. set(SBGEMM_UNROLL_N 4)
  747. set(SGEMM_UNROLL_M 16)
  748. set(SGEMM_UNROLL_N 2)
  749. set(DGEMM_UNROLL_M 8)
  750. set(DGEMM_UNROLL_N 2)
  751. set(CGEMM_UNROLL_M 4)
  752. set(CGEMM_UNROLL_N 2)
  753. set(ZGEMM_UNROLL_M 2)
  754. set(ZGEMM_UNROLL_N 2)
  755. set(CGEMM3M_UNROLL_M 8)
  756. set(CGEMM3M_UNROLL_N 4)
  757. set(ZGEMM3M_UNROLL_M 4)
  758. set(ZGEMM3M_UNROLL_N 4)
  759. elseif ("${TCORE}" STREQUAL "ZEN")
  760. file(APPEND ${TARGET_CONF_TEMP}
  761. "#define L1_DATA_SIZE\t32768\n"
  762. "#define L1_DATA_LINESIZE\t64\n"
  763. "#define L2_SIZE\t524288\n"
  764. "#define L2_LINESIZE\t64\n"
  765. "#define DTB_DEFAULT_ENTRIES\t64\n"
  766. "#define DTB_SIZE\t4096\n"
  767. "#define HAVE_MMX\n"
  768. "#define HAVE_SSE\n"
  769. "#define HAVE_SSE2\n"
  770. "#define HAVE_SSE3\n"
  771. "#define HAVE_SSE4_1\n"
  772. "#define HAVE_SSE4_2\n"
  773. "#define HAVE_SSE4A\n"
  774. "#define HAVE_MISALIGNSSE\n"
  775. "#define HAVE_128BITFPU\n"
  776. "#define HAVE_FASTMOVU\n"
  777. "#define HAVE_CFLUSH\n"
  778. "#define HAVE_AVX\n"
  779. "#define HAVE_AVX2\n"
  780. "#define HAVE_FMA3\n"
  781. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  782. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  783. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  784. "#define ZLOCAL_BUFFER_SIZE\t12288\n")
  785. set(HAVE_SSE 1)
  786. set(HAVE_SSE2 1)
  787. set(HAVE_SSE3 1)
  788. set(HAVE_SSE4_1 1)
  789. set(HAVE_SSE4_2 1)
  790. set(HAVE_AVX 1)
  791. set(HAVE_AVX2 1)
  792. set(HAVE_FMA3 1)
  793. set(HAVE_SSE4A 1)
  794. set(HAVE_MISALIGNSSE 1)
  795. set(HAVE_128BITFPU 1)
  796. set(HAVE_FASTMOVU 1)
  797. set(HAVE_CFLUSH 1)
  798. set(SBGEMM_UNROLL_M 8)
  799. set(SBGEMM_UNROLL_N 4)
  800. set(SGEMM_UNROLL_M 8)
  801. set(SGEMM_UNROLL_N 4)
  802. set(DGEMM_UNROLL_M 4)
  803. set(DGEMM_UNROLL_N 8)
  804. set(CGEMM_UNROLL_M 8)
  805. set(CGEMM_UNROLL_N 2)
  806. set(ZGEMM_UNROLL_M 4)
  807. set(ZGEMM_UNROLL_N 2)
  808. set(CGEMM3M_UNROLL_M 8)
  809. set(CGEMM3M_UNROLL_N 4)
  810. set(ZGEMM3M_UNROLL_M 4)
  811. set(ZGEMM3M_UNROLL_N 4)
  812. elseif ("${TCORE}" STREQUAL "ARMV5")
  813. file(APPEND ${TARGET_CONF_TEMP}
  814. "#define L1_DATA_SIZE\t65536\n"
  815. "#define L1_DATA_LINESIZE\t32\n"
  816. "#define L2_SIZE\t512488\n"
  817. "#define L2_LINESIZE\t32\n"
  818. "#define DTB_DEFAULT_ENTRIES\t64\n"
  819. "#define DTB_SIZE\t4096\n"
  820. "#define L2_ASSOCIATIVE\t4\n")
  821. set(SGEMM_UNROLL_M 2)
  822. set(SGEMM_UNROLL_N 2)
  823. set(DGEMM_UNROLL_M 2)
  824. set(DGEMM_UNROLL_N 2)
  825. set(CGEMM_UNROLL_M 2)
  826. set(CGEMM_UNROLL_N 2)
  827. set(ZGEMM_UNROLL_M 2)
  828. set(ZGEMM_UNROLL_N 2)
  829. elseif ("${TCORE}" STREQUAL "ARMV6")
  830. file(APPEND ${TARGET_CONF_TEMP}
  831. "#define L1_DATA_SIZE\t65536\n"
  832. "#define L1_DATA_LINESIZE\t32\n"
  833. "#define L2_SIZE\t512488\n"
  834. "#define L2_LINESIZE\t32\n"
  835. "#define DTB_DEFAULT_ENTRIES\t64\n"
  836. "#define DTB_SIZE\t4096\n"
  837. "#define L2_ASSOCIATIVE\t4\n"
  838. "#define HAVE_VFP\n")
  839. set(SGEMM_UNROLL_M 4)
  840. set(SGEMM_UNROLL_N 2)
  841. set(DGEMM_UNROLL_M 4)
  842. set(DGEMM_UNROLL_N 2)
  843. set(CGEMM_UNROLL_M 2)
  844. set(CGEMM_UNROLL_N 2)
  845. set(ZGEMM_UNROLL_M 2)
  846. set(ZGEMM_UNROLL_N 2)
  847. elseif ("${TCORE}" STREQUAL "ARMV7")
  848. file(APPEND ${TARGET_CONF_TEMP}
  849. "#define L1_DATA_SIZE\t65536\n"
  850. "#define L1_DATA_LINESIZE\t32\n"
  851. "#define L2_SIZE\t512488\n"
  852. "#define L2_LINESIZE\t32\n"
  853. "#define DTB_DEFAULT_ENTRIES\t64\n"
  854. "#define DTB_SIZE\t4096\n"
  855. "#define L2_ASSOCIATIVE\t4\n"
  856. "#define HAVE_VFPV3\n"
  857. "#define HAVE_VFP\n")
  858. set(SGEMM_UNROLL_M 4)
  859. set(SGEMM_UNROLL_N 4)
  860. set(DGEMM_UNROLL_M 4)
  861. set(DGEMM_UNROLL_N 4)
  862. set(CGEMM_UNROLL_M 2)
  863. set(CGEMM_UNROLL_N 2)
  864. set(ZGEMM_UNROLL_M 2)
  865. set(ZGEMM_UNROLL_N 2)
  866. elseif ("${TCORE}" STREQUAL "ARMV8")
  867. file(APPEND ${TARGET_CONF_TEMP}
  868. "#define L1_DATA_SIZE\t32768\n"
  869. "#define L1_DATA_LINESIZE\t64\n"
  870. "#define L2_SIZE\t262144\n"
  871. "#define L2_LINESIZE\t64\n"
  872. "#define DTB_DEFAULT_ENTRIES\t64\n"
  873. "#define DTB_SIZE\t4096\n"
  874. "#define L2_ASSOCIATIVE\t32\n"
  875. "#define ARMV8\n")
  876. set(SGEMM_UNROLL_M 16)
  877. set(SGEMM_UNROLL_N 4)
  878. set(DGEMM_UNROLL_M 8)
  879. set(DGEMM_UNROLL_N 4)
  880. set(CGEMM_UNROLL_M 8)
  881. set(CGEMM_UNROLL_N 4)
  882. set(ZGEMM_UNROLL_M 4)
  883. set(ZGEMM_UNROLL_N 4)
  884. set(SYMV_P 16)
  885. elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53" OR "${TCORE}" STREQUAL "CORTEXA55")
  886. file(APPEND ${TARGET_CONF_TEMP}
  887. "#define L1_CODE_SIZE\t32768\n"
  888. "#define L1_CODE_LINESIZE\t64\n"
  889. "#define L1_CODE_ASSOCIATIVE\t3\n"
  890. "#define L1_DATA_SIZE\t32768\n"
  891. "#define L1_DATA_LINESIZE\t64\n"
  892. "#define L1_DATA_ASSOCIATIVE\t2\n"
  893. "#define L2_SIZE\t262144\n"
  894. "#define L2_LINESIZE\t64\n"
  895. "#define L2_ASSOCIATIVE\t16\n"
  896. "#define DTB_DEFAULT_ENTRIES\t64\n"
  897. "#define DTB_SIZE\t4096\n"
  898. "#define HAVE_VFPV4\n"
  899. "#define HAVE_VFPV3\n"
  900. "#define HAVE_VFP\n"
  901. "#define HAVE_NEON\n"
  902. "#define ARMV8\n")
  903. if ("${TCORE}" STREQUAL "CORTEXA57")
  904. set(SGEMM_UNROLL_M 16)
  905. set(SGEMM_UNROLL_N 4)
  906. else ()
  907. set(SGEMM_UNROLL_M 8)
  908. set(SGEMM_UNROLL_N 8)
  909. endif ()
  910. if ("${TCORE}" STREQUAL "CORTEXA53")
  911. set(DGEMM_UNROLL_M 4)
  912. else ()
  913. set(DGEMM_UNROLL_M 8)
  914. endif ()
  915. set(DGEMM_UNROLL_N 4)
  916. set(CGEMM_UNROLL_M 8)
  917. set(CGEMM_UNROLL_N 4)
  918. set(ZGEMM_UNROLL_M 4)
  919. set(ZGEMM_UNROLL_N 4)
  920. set(SYMV_P 16)
  921. elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73")
  922. file(APPEND ${TARGET_CONF_TEMP}
  923. "#define L1_CODE_SIZE\t49152\n"
  924. "#define L1_CODE_LINESIZE\t64\n"
  925. "#define L1_CODE_ASSOCIATIVE\t3\n"
  926. "#define L1_DATA_SIZE\t32768\n"
  927. "#define L1_DATA_LINESIZE\t64\n"
  928. "#define L1_DATA_ASSOCIATIVE\t2\n"
  929. "#define L2_SIZE\t524288\n"
  930. "#define L2_LINESIZE\t64\n"
  931. "#define L2_ASSOCIATIVE\t16\n"
  932. "#define DTB_DEFAULT_ENTRIES\t64\n"
  933. "#define DTB_SIZE\t4096\n"
  934. "#define HAVE_VFPV4\n"
  935. "#define HAVE_VFPV3\n"
  936. "#define HAVE_VFP\n"
  937. "#define HAVE_NEON\n"
  938. "#define ARMV8\n")
  939. set(SGEMM_UNROLL_M 16)
  940. set(SGEMM_UNROLL_N 4)
  941. set(DGEMM_UNROLL_M 8)
  942. set(DGEMM_UNROLL_N 4)
  943. set(CGEMM_UNROLL_M 8)
  944. set(CGEMM_UNROLL_N 4)
  945. set(ZGEMM_UNROLL_M 4)
  946. set(ZGEMM_UNROLL_N 4)
  947. set(SYMV_P 16)
  948. elseif ("${TCORE}" STREQUAL "NEOVERSEN1")
  949. file(APPEND ${TARGET_CONF_TEMP}
  950. "#define L1_CODE_SIZE\t65536\n"
  951. "#define L1_CODE_LINESIZE\t64\n"
  952. "#define L1_CODE_ASSOCIATIVE\t4\n"
  953. "#define L1_DATA_SIZE\t65536\n"
  954. "#define L1_DATA_LINESIZE\t64\n"
  955. "#define L1_DATA_ASSOCIATIVE\t4\n"
  956. "#define L2_SIZE\t1048576\n\n"
  957. "#define L2_LINESIZE\t64\n"
  958. "#define L2_ASSOCIATIVE\t8\n"
  959. "#define DTB_DEFAULT_ENTRIES\t48\n"
  960. "#define DTB_SIZE\t4096\n"
  961. "#define HAVE_VFPV4\n"
  962. "#define HAVE_VFPV3\n"
  963. "#define HAVE_VFP\n"
  964. "#define HAVE_NEON\n"
  965. "#define ARMV8\n")
  966. set(SGEMM_UNROLL_M 16)
  967. set(SGEMM_UNROLL_N 4)
  968. set(DGEMM_UNROLL_M 8)
  969. set(DGEMM_UNROLL_N 4)
  970. set(CGEMM_UNROLL_M 8)
  971. set(CGEMM_UNROLL_N 4)
  972. set(ZGEMM_UNROLL_M 4)
  973. set(ZGEMM_UNROLL_N 4)
  974. set(SYMV_P 16)
  975. elseif ("${TCORE}" STREQUAL "NEOVERSEV1")
  976. file(APPEND ${TARGET_CONF_TEMP}
  977. "#define L1_CODE_SIZE\t65536\n"
  978. "#define L1_CODE_LINESIZE\t64\n"
  979. "#define L1_CODE_ASSOCIATIVE\t4\n"
  980. "#define L1_DATA_SIZE\t65536\n"
  981. "#define L1_DATA_LINESIZE\t64\n"
  982. "#define L1_DATA_ASSOCIATIVE\t4\n"
  983. "#define L2_SIZE\t1048576\n\n"
  984. "#define L2_LINESIZE\t64\n"
  985. "#define L2_ASSOCIATIVE\t8\n"
  986. "#define DTB_DEFAULT_ENTRIES\t48\n"
  987. "#define DTB_SIZE\t4096\n"
  988. "#define HAVE_VFPV4\n"
  989. "#define HAVE_VFPV3\n"
  990. "#define HAVE_VFP\n"
  991. "#define HAVE_NEON\n"
  992. "#define HAVE_SVE\n"
  993. "#define ARMV8\n")
  994. set(SGEMM_UNROLL_M 16)
  995. set(SGEMM_UNROLL_N 4)
  996. set(DGEMM_UNROLL_M 8)
  997. set(DGEMM_UNROLL_N 4)
  998. set(CGEMM_UNROLL_M 8)
  999. set(CGEMM_UNROLL_N 4)
  1000. set(ZGEMM_UNROLL_M 4)
  1001. set(ZGEMM_UNROLL_N 4)
  1002. set(SYMV_P 16)
  1003. elseif ("${TCORE}" STREQUAL "NEOVERSEN2")
  1004. file(APPEND ${TARGET_CONF_TEMP}
  1005. "#define L1_CODE_SIZE\t65536\n"
  1006. "#define L1_CODE_LINESIZE\t64\n"
  1007. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1008. "#define L1_DATA_SIZE\t65536\n"
  1009. "#define L1_DATA_LINESIZE\t64\n"
  1010. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1011. "#define L2_SIZE\t1048576\n\n"
  1012. "#define L2_LINESIZE\t64\n"
  1013. "#define L2_ASSOCIATIVE\t8\n"
  1014. "#define DTB_DEFAULT_ENTRIES\t48\n"
  1015. "#define DTB_SIZE\t4096\n"
  1016. "#define HAVE_VFPV4\n"
  1017. "#define HAVE_VFPV3\n"
  1018. "#define HAVE_VFP\n"
  1019. "#define HAVE_NEON\n"
  1020. "#define HAVE_SVE\n"
  1021. "#define ARMV8\n")
  1022. set(SGEMM_UNROLL_M 16)
  1023. set(SGEMM_UNROLL_N 4)
  1024. set(DGEMM_UNROLL_M 8)
  1025. set(DGEMM_UNROLL_N 4)
  1026. set(CGEMM_UNROLL_M 8)
  1027. set(CGEMM_UNROLL_N 4)
  1028. set(ZGEMM_UNROLL_M 4)
  1029. set(ZGEMM_UNROLL_N 4)
  1030. set(SYMV_P 16)
  1031. elseif ("${TCORE}" STREQUAL "FALKOR")
  1032. file(APPEND ${TARGET_CONF_TEMP}
  1033. "#define L1_CODE_SIZE\t65536\n"
  1034. "#define L1_CODE_LINESIZE\t64\n"
  1035. "#define L1_CODE_ASSOCIATIVE\t3\n"
  1036. "#define L1_DATA_SIZE\t32768\n"
  1037. "#define L1_DATA_LINESIZE\t128\n"
  1038. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1039. "#define L2_SIZE\t524288\n"
  1040. "#define L2_LINESIZE\t64\n"
  1041. "#define L2_ASSOCIATIVE\t16\n"
  1042. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1043. "#define DTB_SIZE\t4096\n"
  1044. "#define HAVE_VFPV4\n"
  1045. "#define HAVE_VFPV3\n"
  1046. "#define HAVE_VFP\n"
  1047. "#define HAVE_NEON\n"
  1048. "#define ARMV8\n")
  1049. set(SGEMM_UNROLL_M 16)
  1050. set(SGEMM_UNROLL_N 4)
  1051. set(DGEMM_UNROLL_M 8)
  1052. set(DGEMM_UNROLL_N 4)
  1053. set(CGEMM_UNROLL_M 8)
  1054. set(CGEMM_UNROLL_N 4)
  1055. set(ZGEMM_UNROLL_M 4)
  1056. set(ZGEMM_UNROLL_N 4)
  1057. set(SYMV_P 16)
  1058. elseif ("${TCORE}" STREQUAL "THUNDERX")
  1059. file(APPEND ${TARGET_CONF_TEMP}
  1060. "#define L1_CODE_SIZE\t32768\n"
  1061. "#define L1_CODE_LINESIZE\t64\n"
  1062. "#define L1_CODE_ASSOCIATIVE\t3\n"
  1063. "#define L1_DATA_SIZE\t32768\n"
  1064. "#define L1_DATA_LINESIZE\t128\n"
  1065. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1066. "#define L2_SIZE\t167772164\n"
  1067. "#define L2_LINESIZE\t128\n"
  1068. "#define L2_ASSOCIATIVE\t16\n"
  1069. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1070. "#define DTB_SIZE\t4096\n"
  1071. "#define HAVE_VFPV4\n"
  1072. "#define HAVE_VFPV3\n"
  1073. "#define HAVE_VFP\n"
  1074. "#define HAVE_NEON\n"
  1075. "#define ARMV8\n")
  1076. set(SGEMM_UNROLL_M 4)
  1077. set(SGEMM_UNROLL_N 4)
  1078. set(DGEMM_UNROLL_M 2)
  1079. set(DGEMM_UNROLL_N 2)
  1080. set(CGEMM_UNROLL_M 2)
  1081. set(CGEMM_UNROLL_N 2)
  1082. set(ZGEMM_UNROLL_M 2)
  1083. set(ZGEMM_UNROLL_N 2)
  1084. set(SYMV_P 16)
  1085. elseif ("${TCORE}" STREQUAL "THUNDERX2T99")
  1086. file(APPEND ${TARGET_CONF_TEMP}
  1087. "#define L1_CODE_SIZE\t32768\n"
  1088. "#define L1_CODE_LINESIZE\t64\n"
  1089. "#define L1_CODE_ASSOCIATIVE\t8\n"
  1090. "#define L1_DATA_SIZE\t32768\n"
  1091. "#define L1_DATA_LINESIZE\t64\n"
  1092. "#define L1_DATA_ASSOCIATIVE\t8\n"
  1093. "#define L2_SIZE\t262144\n"
  1094. "#define L2_LINESIZE\t64\n"
  1095. "#define L2_ASSOCIATIVE\t8\n"
  1096. "#define L3_SIZE\t33554432\n"
  1097. "#define L3_LINESIZE\t64\n"
  1098. "#define L3_ASSOCIATIVE\t32\n"
  1099. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1100. "#define DTB_SIZE\t4096\n"
  1101. "#define ARMV8\n")
  1102. set(SGEMM_UNROLL_M 16)
  1103. set(SGEMM_UNROLL_N 4)
  1104. set(DGEMM_UNROLL_M 8)
  1105. set(DGEMM_UNROLL_N 4)
  1106. set(CGEMM_UNROLL_M 8)
  1107. set(CGEMM_UNROLL_N 4)
  1108. set(ZGEMM_UNROLL_M 4)
  1109. set(ZGEMM_UNROLL_N 4)
  1110. set(SYMV_P 16)
  1111. elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
  1112. file(APPEND ${TARGET_CONF_TEMP}
  1113. "#define THUNDERX3T110\n"
  1114. "#define L1_CODE_SIZE\t65536\n"
  1115. "#define L1_CODE_LINESIZE\t64\n"
  1116. "#define L1_CODE_ASSOCIATIVE\t8\n"
  1117. "#define L1_DATA_SIZE\t65536\n"
  1118. "#define L1_DATA_LINESIZE\t64\n"
  1119. "#define L1_DATA_ASSOCIATIVE\t8\n"
  1120. "#define L2_SIZE\t524288\n"
  1121. "#define L2_LINESIZE\t64\n"
  1122. "#define L2_ASSOCIATIVE\t8\n"
  1123. "#define L3_SIZE\t94371840\n"
  1124. "#define L3_LINESIZE\t64\n"
  1125. "#define L3_ASSOCIATIVE\t32\n"
  1126. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1127. "#define DTB_SIZE\t4096\n"
  1128. "#define ARMV8\n")
  1129. set(SGEMM_UNROLL_M 16)
  1130. set(SGEMM_UNROLL_N 4)
  1131. set(DGEMM_UNROLL_M 8)
  1132. set(DGEMM_UNROLL_N 4)
  1133. set(CGEMM_UNROLL_M 8)
  1134. set(CGEMM_UNROLL_N 4)
  1135. set(ZGEMM_UNROLL_M 4)
  1136. set(ZGEMM_UNROLL_N 4)
  1137. set(SYMV_P 16)
  1138. elseif ("${TCORE}" STREQUAL "TSV110")
  1139. file(APPEND ${TARGET_CONF_TEMP}
  1140. "#define ARMV8\n"
  1141. "#define L1_CODE_SIZE\t65536\n"
  1142. "#define L1_CODE_LINESIZE\t64\n"
  1143. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1144. "#define L1_DATA_SIZE\t65536\n"
  1145. "#define L1_DATA_LINESIZE\t64\n"
  1146. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1147. "#define L2_SIZE\t524288\n"
  1148. "#define L2_LINESIZE\t64\n"
  1149. "#define L2_ASSOCIATIVE\t8\n"
  1150. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1151. "#define DTB_SIZE\t4096\n")
  1152. set(SGEMM_UNROLL_M 16)
  1153. set(SGEMM_UNROLL_N 4)
  1154. set(DGEMM_UNROLL_M 8)
  1155. set(DGEMM_UNROLL_N 4)
  1156. set(CGEMM_UNROLL_M 8)
  1157. set(CGEMM_UNROLL_N 4)
  1158. set(ZGEMM_UNROLL_M 4)
  1159. set(ZGEMM_UNROLL_N 4)
  1160. set(SYMV_P 16)
  1161. elseif ("${TCORE}" STREQUAL "EMAG8180")
  1162. file(APPEND ${TARGET_CONF_TEMP}
  1163. "#define ARMV8\n"
  1164. "#define L1_CODE_SIZE\t32768\n"
  1165. "#define L1_CODE_LINESIZE\t64\n"
  1166. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1167. "#define L1_DATA_SIZE\t32768\n"
  1168. "#define L1_DATA_LINESIZE\t64\n"
  1169. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1170. "#define L2_SIZE\t5262144\n"
  1171. "#define L2_LINESIZE\t64\n"
  1172. "#define L2_ASSOCIATIVE\t8\n"
  1173. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1174. "#define DTB_SIZE\t4096\n")
  1175. set(SGEMM_UNROLL_M 16)
  1176. set(SGEMM_UNROLL_N 4)
  1177. set(DGEMM_UNROLL_M 8)
  1178. set(DGEMM_UNROLL_N 4)
  1179. set(CGEMM_UNROLL_M 8)
  1180. set(CGEMM_UNROLL_N 4)
  1181. set(ZGEMM_UNROLL_M 4)
  1182. set(ZGEMM_UNROLL_N 4)
  1183. set(SYMV_P 16)
  1184. elseif ("${TCORE}" STREQUAL "VORTEX")
  1185. file(APPEND ${TARGET_CONF_TEMP}
  1186. "#define ARMV8\n"
  1187. "#define L1_CODE_SIZE\t32768\n"
  1188. "#define L1_CODE_LINESIZE\t64\n"
  1189. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1190. "#define L1_DATA_SIZE\t32768\n"
  1191. "#define L1_DATA_LINESIZE\t64\n"
  1192. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1193. "#define L2_SIZE\t5262144\n"
  1194. "#define L2_LINESIZE\t64\n"
  1195. "#define L2_ASSOCIATIVE\t8\n"
  1196. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1197. "#define DTB_SIZE\t4096\n")
  1198. set(SGEMM_UNROLL_M 16)
  1199. set(SGEMM_UNROLL_N 4)
  1200. set(DGEMM_UNROLL_M 8)
  1201. set(DGEMM_UNROLL_N 4)
  1202. set(CGEMM_UNROLL_M 8)
  1203. set(CGEMM_UNROLL_N 4)
  1204. set(ZGEMM_UNROLL_M 4)
  1205. set(ZGEMM_UNROLL_N 4)
  1206. set(SYMV_P 16)
  1207. elseif ("${TCORE}" STREQUAL "P5600")
  1208. file(APPEND ${TARGET_CONF_TEMP}
  1209. "#define L2_SIZE 1048576\n"
  1210. "#define DTB_SIZE 4096\n"
  1211. "#define DTB_DEFAULT_ENTRIES 64\n")
  1212. set(SGEMM_UNROLL_M 2)
  1213. set(SGEMM_UNROLL_N 2)
  1214. set(DGEMM_UNROLL_M 2)
  1215. set(DGEMM_UNROLL_N 2)
  1216. set(CGEMM_UNROLL_M 2)
  1217. set(CGEMM_UNROLL_N 2)
  1218. set(ZGEMM_UNROLL_M 2)
  1219. set(ZGEMM_UNROLL_N 2)
  1220. set(SYMV_P 16)
  1221. elseif ("${TCORE}" MATCHES "MIPS")
  1222. file(APPEND ${TARGET_CONF_TEMP}
  1223. "#define L2_SIZE 262144\n"
  1224. "#define DTB_SIZE 4096\n"
  1225. "#define DTB_DEFAULT_ENTRIES 64\n")
  1226. set(SGEMM_UNROLL_M 2)
  1227. set(SGEMM_UNROLL_N 2)
  1228. set(DGEMM_UNROLL_M 2)
  1229. set(DGEMM_UNROLL_N 2)
  1230. set(CGEMM_UNROLL_M 2)
  1231. set(CGEMM_UNROLL_N 2)
  1232. set(ZGEMM_UNROLL_M 2)
  1233. set(ZGEMM_UNROLL_N 2)
  1234. set(SYMV_P 16)
  1235. elseif ("${TCORE}" STREQUAL "POWER6")
  1236. file(APPEND ${TARGET_CONF_TEMP}
  1237. "#define L1_DATA_SIZE 32768\n"
  1238. "#define L1_DATA_LINESIZE 128\n"
  1239. "#define L2_SIZE 524288\n"
  1240. "#define L2_LINESIZE 128 \n"
  1241. "#define DTB_DEFAULT_ENTRIES 128\n"
  1242. "#define DTB_SIZE 4096\n"
  1243. "#define L2_ASSOCIATIVE 8\n")
  1244. set(SGEMM_UNROLL_M 4)
  1245. set(SGEMM_UNROLL_N 4)
  1246. set(DGEMM_UNROLL_M 4)
  1247. set(DGEMM_UNROLL_N 4)
  1248. set(CGEMM_UNROLL_M 2)
  1249. set(CGEMM_UNROLL_N 4)
  1250. set(ZGEMM_UNROLL_M 2)
  1251. set(ZGEMM_UNROLL_N 4)
  1252. set(SYMV_P 8)
  1253. elseif ("${TCORE}" STREQUAL "POWER8")
  1254. file(APPEND ${TARGET_CONF_TEMP}
  1255. "#define L1_DATA_SIZE 32768\n"
  1256. "#define L1_DATA_LINESIZE 128\n"
  1257. "#define L2_SIZE 524288\n"
  1258. "#define L2_LINESIZE 128 \n"
  1259. "#define DTB_DEFAULT_ENTRIES 128\n"
  1260. "#define DTB_SIZE 4096\n"
  1261. "#define L2_ASSOCIATIVE 8\n")
  1262. set(SGEMM_UNROLL_M 16)
  1263. set(SGEMM_UNROLL_N 8)
  1264. set(DGEMM_UNROLL_M 16)
  1265. set(DGEMM_UNROLL_N 4)
  1266. set(CGEMM_UNROLL_M 8)
  1267. set(CGEMM_UNROLL_N 4)
  1268. set(ZGEMM_UNROLL_M 8)
  1269. set(ZGEMM_UNROLL_N 2)
  1270. set(SYMV_P 8)
  1271. elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10")
  1272. file(APPEND ${TARGET_CONF_TEMP}
  1273. "#define L1_DATA_SIZE 32768\n"
  1274. "#define L1_DATA_LINESIZE 128\n"
  1275. "#define L2_SIZE 524288\n"
  1276. "#define L2_LINESIZE 128 \n"
  1277. "#define DTB_DEFAULT_ENTRIES 128\n"
  1278. "#define DTB_SIZE 4096\n"
  1279. "#define L2_ASSOCIATIVE 8\n")
  1280. set(SGEMM_UNROLL_M 16)
  1281. set(SGEMM_UNROLL_N 8)
  1282. set(DGEMM_UNROLL_M 16)
  1283. set(DGEMM_UNROLL_N 4)
  1284. set(CGEMM_UNROLL_M 8)
  1285. set(CGEMM_UNROLL_N 4)
  1286. set(ZGEMM_UNROLL_M 8)
  1287. set(ZGEMM_UNROLL_N 2)
  1288. set(SYMV_P 8)
  1289. elseif ("${TCORE}" STREQUAL "GENERIC")
  1290. file(APPEND ${TARGET_CONF_TEMP}
  1291. "#define L1_DATA_SIZE 32768\n"
  1292. "#define L1_DATA_LINESIZE 128\n"
  1293. "#define L2_SIZE 524288\n"
  1294. "#define L2_LINESIZE 128 \n"
  1295. "#define DTB_DEFAULT_ENTRIES 128\n"
  1296. "#define DTB_SIZE 4096\n"
  1297. "#define L2_ASSOCIATIVE 8\n")
  1298. endif()
  1299. set(SBGEMM_UNROLL_M 8)
  1300. set(SBGEMM_UNROLL_N 4)
  1301. # Or should this actually be NUM_CORES?
  1302. if (${NUM_THREADS} GREATER 0)
  1303. file(APPEND ${TARGET_CONF_TEMP} "#define NUM_CORES\t${NUM_THREADS}\n")
  1304. endif()
  1305. # GetArch_2nd
  1306. foreach(float_char S;D;Q;C;Z;X)
  1307. if (NOT DEFINED ${float_char}GEMM_UNROLL_M)
  1308. set(${float_char}GEMM_UNROLL_M 2)
  1309. endif()
  1310. if (NOT DEFINED ${float_char}GEMM_UNROLL_N)
  1311. set(${float_char}GEMM_UNROLL_N 2)
  1312. endif()
  1313. endforeach()
  1314. file(APPEND ${TARGET_CONF_TEMP}
  1315. "#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n")
  1316. # Move to where gen_config_h would place it
  1317. file(MAKE_DIRECTORY ${TARGET_CONF_DIR})
  1318. file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}")
  1319. else(NOT CMAKE_CROSSCOMPILING)
  1320. # compile getarch
  1321. set(GETARCH_SRC
  1322. ${PROJECT_SOURCE_DIR}/getarch.c
  1323. ${CPUIDEMU}
  1324. )
  1325. if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
  1326. #Use generic for MSVC now
  1327. message("MSVC")
  1328. set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
  1329. else()
  1330. list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
  1331. if (DEFINED TARGET_CORE)
  1332. set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_${TARGET_CORE})
  1333. endif ()
  1334. endif ()
  1335. if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1336. # disable WindowsStore strict CRT checks
  1337. set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
  1338. endif ()
  1339. set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
  1340. set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
  1341. file(MAKE_DIRECTORY "${GETARCH_DIR}")
  1342. configure_file("${TARGET_CONF_TEMP}" "${GETARCH_DIR}/${TARGET_CONF}" COPYONLY)
  1343. if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1344. if (CMAKE_ASM_COMPILER_ID STREQUAL "")
  1345. try_compile(GETARCH_RESULT "${GETARCH_DIR}"
  1346. SOURCES ${GETARCH_SRC}
  1347. CMAKE_FLAGS "-DCMAKE_ASM_COMPILER=${CMAKE_C_COMPILER}"
  1348. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1349. OUTPUT_VARIABLE GETARCH_LOG
  1350. COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
  1351. )
  1352. else()
  1353. try_compile(GETARCH_RESULT "${GETARCH_DIR}"
  1354. SOURCES ${GETARCH_SRC}
  1355. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1356. OUTPUT_VARIABLE GETARCH_LOG
  1357. COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH_BIN}"
  1358. )
  1359. endif()
  1360. if (NOT ${GETARCH_RESULT})
  1361. MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
  1362. endif ()
  1363. endif ()
  1364. unset (HAVE_AVX2)
  1365. unset (HAVE_AVX)
  1366. unset (HAVE_FMA3)
  1367. unset (HAVE_MMX)
  1368. unset (HAVE_SSE)
  1369. unset (HAVE_SSE2)
  1370. unset (HAVE_SSE3)
  1371. unset (HAVE_SSSE3)
  1372. unset (HAVE_SSE4A)
  1373. unset (HAVE_SSE4_1)
  1374. unset (HAVE_SSE4_2)
  1375. unset (HAVE_NEON)
  1376. unset (HAVE_VFP)
  1377. unset (HAVE_VFPV3)
  1378. unset (HAVE_VFPV4)
  1379. message(STATUS "Running getarch")
  1380. # use the cmake binary w/ the -E param to run a shell command in a cross-platform way
  1381. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
  1382. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
  1383. message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
  1384. # append config data from getarch to the TARGET file and read in CMake vars
  1385. file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH_CONF_OUT})
  1386. ParseGetArchVars(${GETARCH_MAKE_OUT})
  1387. set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
  1388. set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
  1389. file(MAKE_DIRECTORY "${GETARCH2_DIR}")
  1390. configure_file("${TARGET_CONF_TEMP}" "${GETARCH2_DIR}/${TARGET_CONF}" COPYONLY)
  1391. if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1392. try_compile(GETARCH2_RESULT "${GETARCH2_DIR}"
  1393. SOURCES "${PROJECT_SOURCE_DIR}/getarch_2nd.c"
  1394. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1395. OUTPUT_VARIABLE GETARCH2_LOG
  1396. COPY_FILE "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}"
  1397. )
  1398. if (NOT ${GETARCH2_RESULT})
  1399. MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
  1400. endif ()
  1401. endif ()
  1402. # use the cmake binary w/ the -E param to run a shell command in a cross-platform way
  1403. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
  1404. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
  1405. # append config data from getarch_2nd to the TARGET file and read in CMake vars
  1406. file(APPEND "${TARGET_CONF_TEMP}" ${GETARCH2_CONF_OUT})
  1407. configure_file("${TARGET_CONF_TEMP}" "${TARGET_CONF_DIR}/${TARGET_CONF}" COPYONLY)
  1408. ParseGetArchVars(${GETARCH2_MAKE_OUT})
  1409. endif()