You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prebuild.cmake 44 kB

5 years ago
5 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400
  1. ##
  2. ## Author: Hank Anderson <hank@statease.com>
  3. ## Description: Ported from OpenBLAS/Makefile.prebuild
  4. ## This is triggered by system.cmake and runs before any of the code is built.
  5. ## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
  6. ## Next it runs f_check and appends some fortran information to the files.
  7. ## Then it runs getarch and getarch_2nd for even more environment information.
  8. ## Finally it builds gen_config_h for use at build time to generate config.h.
  9. # CMake vars set by this file:
  10. # CORE
  11. # LIBCORE
  12. # NUM_CORES
  13. # HAVE_MMX
  14. # HAVE_SSE
  15. # HAVE_SSE2
  16. # HAVE_SSE3
  17. # MAKE
  18. # SBGEMM_UNROLL_M
  19. # SBGEMM_UNROLL_N
  20. # SGEMM_UNROLL_M
  21. # SGEMM_UNROLL_N
  22. # DGEMM_UNROLL_M
  23. # DGEMM_UNROLL_M
  24. # QGEMM_UNROLL_N
  25. # QGEMM_UNROLL_N
  26. # CGEMM_UNROLL_M
  27. # CGEMM_UNROLL_M
  28. # ZGEMM_UNROLL_N
  29. # ZGEMM_UNROLL_N
  30. # XGEMM_UNROLL_M
  31. # XGEMM_UNROLL_N
  32. # CGEMM3M_UNROLL_M
  33. # CGEMM3M_UNROLL_N
  34. # ZGEMM3M_UNROLL_M
  35. # ZGEMM3M_UNROLL_M
  36. # XGEMM3M_UNROLL_N
  37. # XGEMM3M_UNROLL_N
  38. # CPUIDEMU = ../../cpuid/table.o
  39. if (DEFINED CPUIDEMU)
  40. set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
  41. endif ()
  42. if (BUILD_KERNEL)
  43. # set the C flags for just this file
  44. set(GETARCH2_FLAGS "-DBUILD_KERNEL")
  45. set(TARGET_CONF "config_kernel.h")
  46. set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE})
  47. else()
  48. set(TARGET_CONF "config.h")
  49. set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR})
  50. endif ()
  51. set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp")
  52. # c_check
  53. set(FU "")
  54. if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
  55. set(FU "_")
  56. endif()
  57. if(MINGW AND NOT MINGW64)
  58. set(FU "_")
  59. endif()
  60. set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
  61. if (${COMPILER_ID} STREQUAL "GNU")
  62. set(COMPILER_ID "GCC")
  63. endif ()
  64. string(TOUPPER ${ARCH} UC_ARCH)
  65. file(WRITE ${TARGET_CONF_TEMP}
  66. "#define OS_${HOST_OS}\t1\n"
  67. "#define ARCH_${UC_ARCH}\t1\n"
  68. "#define C_${COMPILER_ID}\t1\n"
  69. "#define __${BINARY}BIT__\t1\n"
  70. "#define FUNDERSCORE\t${FU}\n")
  71. if (${HOST_OS} STREQUAL "WINDOWSSTORE")
  72. file(APPEND ${TARGET_CONF_TEMP}
  73. "#define OS_WINNT\t1\n")
  74. endif ()
  75. # f_check
  76. if (NOT NOFORTRAN)
  77. include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
  78. else ()
  79. file(APPEND ${TARGET_CONF_TEMP}
  80. "#define BUNDERSCORE _\n"
  81. "#define NEEDBUNDERSCORE 1\n")
  82. set(BU "_")
  83. endif ()
  84. # Cannot run getarch on target if we are cross-compiling
  85. if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE"))
  86. # Write to config as getarch would
  87. if (DEFINED TARGET_CORE)
  88. set(TCORE ${TARGET_CORE})
  89. else()
  90. set(TCORE ${CORE})
  91. endif()
  92. # TODO: Set up defines that getarch sets up based on every other target
  93. # Perhaps this should be inside a different file as it grows larger
  94. file(APPEND ${TARGET_CONF_TEMP}
  95. "#define ${TCORE}\n"
  96. "#define CORE_${TCORE}\n"
  97. "#define CHAR_CORENAME \"${TCORE}\"\n")
  98. if ("${TCORE}" STREQUAL "CORE2")
  99. file(APPEND ${TARGET_CONF_TEMP}
  100. "#define L1_DATA_SIZE\t32768\n"
  101. "#define L1_DATA_LINESIZE\t64\n"
  102. "#define L2_SIZE\t1048576\n"
  103. "#define L2_LINESIZE\t64\n"
  104. "#define DTB_DEFAULT_ENTRIES\t256\n"
  105. "#define DTB_SIZE\t4096\n"
  106. "#define HAVE_CMOV\n"
  107. "#define HAVE_MMX\n"
  108. "#define HAVE_SSE\n"
  109. "#define HAVE_SSE2\n"
  110. "#define HAVE_SSE3\n"
  111. "#define HAVE_SSSE3\n"
  112. "#define SLOCAL_BUFFER_SIZE\t16384\n"
  113. "#define DLOCAL_BUFFER_SIZE\t16384\n"
  114. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  115. "#define ZLOCAL_BUFFER_SIZE\t16384\n")
  116. set(HAVE_SSE 1)
  117. set(HAVE_SSE2 1)
  118. set(HAVE_SSE3 1)
  119. set(HAVE_SSSE3 1)
  120. set(SBGEMM_UNROLL_M 8)
  121. set(SBGEMM_UNROLL_N 4)
  122. set(SGEMM_UNROLL_M 8)
  123. set(SGEMM_UNROLL_N 4)
  124. set(DGEMM_UNROLL_M 4)
  125. set(DGEMM_UNROLL_N 4)
  126. set(CGEMM_UNROLL_M 4)
  127. set(CGEMM_UNROLL_N 2)
  128. set(ZGEMM_UNROLL_M 2)
  129. set(ZGEMM_UNROLL_N 2)
  130. set(CGEMM3M_UNROLL_M 8)
  131. set(CGEMM3M_UNROLL_N 4)
  132. set(ZGEMM3M_UNROLL_M 4)
  133. set(ZGEMM3M_UNROLL_N 4)
  134. elseif ("${TCORE}" STREQUAL "ATOM")
  135. file(APPEND ${TARGET_CONF_TEMP}
  136. "#define L1_DATA_SIZE\t24576\n"
  137. "#define L1_DATA_LINESIZE\t64\n"
  138. "#define L2_SIZE\t524288\n"
  139. "#define L2_LINESIZE\t64\n"
  140. "#define DTB_DEFAULT_ENTRIES\t64\n"
  141. "#define DTB_SIZE\t4096\n"
  142. "#define HAVE_CMOV\n"
  143. "#define HAVE_MMX\n"
  144. "#define HAVE_SSE\n"
  145. "#define HAVE_SSE2\n"
  146. "#define HAVE_SSE3\n"
  147. "#define HAVE_SSSE3\n"
  148. "#define SLOCAL_BUFFER_SIZE\t16384\n"
  149. "#define DLOCAL_BUFFER_SIZE\t8192\n"
  150. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  151. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  152. set(HAVE_SSE 1)
  153. set(HAVE_SSE2 1)
  154. set(HAVE_SSE3 1)
  155. set(HAVE_SSSE3 1)
  156. set(SBGEMM_UNROLL_M 8)
  157. set(SBGEMM_UNROLL_N 4)
  158. set(SGEMM_UNROLL_M 8)
  159. set(SGEMM_UNROLL_N 4)
  160. set(DGEMM_UNROLL_M 4)
  161. set(DGEMM_UNROLL_N 2)
  162. set(CGEMM_UNROLL_M 4)
  163. set(CGEMM_UNROLL_N 2)
  164. set(ZGEMM_UNROLL_M 2)
  165. set(ZGEMM_UNROLL_N 1)
  166. set(CGEMM3M_UNROLL_M 8)
  167. set(CGEMM3M_UNROLL_N 4)
  168. set(ZGEMM3M_UNROLL_M 4)
  169. set(ZGEMM3M_UNROLL_N 4)
  170. elseif ("${TCORE}" STREQUAL "PRESCOTT")
  171. file(APPEND ${TARGET_CONF_TEMP}
  172. "#define L1_DATA_SIZE\t16384\n"
  173. "#define L1_DATA_LINESIZE\t64\n"
  174. "#define L2_SIZE\t1048576\n"
  175. "#define L2_LINESIZE\t64\n"
  176. "#define DTB_DEFAULT_ENTRIES\t64\n"
  177. "#define DTB_SIZE\t4096\n"
  178. "#define HAVE_CMOV\n"
  179. "#define HAVE_MMX\n"
  180. "#define HAVE_SSE\n"
  181. "#define HAVE_SSE2\n"
  182. "#define HAVE_SSE3\n"
  183. "#define SLOCAL_BUFFER_SIZE\t8192\n"
  184. "#define DLOCAL_BUFFER_SIZE\t8192\n"
  185. "#define CLOCAL_BUFFER_SIZE\t8192\n"
  186. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  187. set(HAVE_SSE 1)
  188. set(HAVE_SSE2 1)
  189. set(HAVE_SSE3 1)
  190. set(SBGEMM_UNROLL_M 8)
  191. set(SBGEMM_UNROLL_N 4)
  192. set(SGEMM_UNROLL_M 8)
  193. set(SGEMM_UNROLL_N 4)
  194. set(DGEMM_UNROLL_M 4)
  195. set(DGEMM_UNROLL_N 4)
  196. set(CGEMM_UNROLL_M 4)
  197. set(CGEMM_UNROLL_N 2)
  198. set(ZGEMM_UNROLL_M 2)
  199. set(ZGEMM_UNROLL_N 2)
  200. set(CGEMM3M_UNROLL_M 8)
  201. set(CGEMM3M_UNROLL_N 4)
  202. set(ZGEMM3M_UNROLL_M 4)
  203. set(ZGEMM3M_UNROLL_N 4)
  204. elseif ("${TCORE}" STREQUAL "NEHALEM")
  205. file(APPEND ${TARGET_CONF_TEMP}
  206. "#define L1_DATA_SIZE\t32768\n"
  207. "#define L1_DATA_LINESIZE\t64\n"
  208. "#define L2_SIZE\t262144\n"
  209. "#define L2_LINESIZE\t64\n"
  210. "#define DTB_DEFAULT_ENTRIES\t64\n"
  211. "#define DTB_SIZE\t4096\n"
  212. "#define HAVE_CMOV\n"
  213. "#define HAVE_MMX\n"
  214. "#define HAVE_SSE\n"
  215. "#define HAVE_SSE2\n"
  216. "#define HAVE_SSE3\n"
  217. "#define HAVE_SSSE3\n"
  218. "#define HAVE_SSE4_1\n"
  219. "#define HAVE_SSE4_2\n"
  220. "#define SLOCAL_BUFFER_SIZE\t65535\n"
  221. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  222. "#define CLOCAL_BUFFER_SIZE\t65536\n"
  223. "#define ZLOCAL_BUFFER_SIZE\t32768\n")
  224. set(HAVE_SSE 1)
  225. set(HAVE_SSE2 1)
  226. set(HAVE_SSE3 1)
  227. set(HAVE_SSSE3 1)
  228. set(HAVE_SSE4_1 1)
  229. set(HAVE_SSE4_2 1)
  230. set(SBGEMM_UNROLL_M 8)
  231. set(SBGEMM_UNROLL_N 4)
  232. set(SGEMM_UNROLL_M 4)
  233. set(SGEMM_UNROLL_N 8)
  234. set(DGEMM_UNROLL_M 2)
  235. set(DGEMM_UNROLL_N 8)
  236. set(CGEMM_UNROLL_M 2)
  237. set(CGEMM_UNROLL_N 4)
  238. set(ZGEMM_UNROLL_M 1)
  239. set(ZGEMM_UNROLL_N 4)
  240. set(CGEMM3M_UNROLL_M 4)
  241. set(CGEMM3M_UNROLL_N 8)
  242. set(ZGEMM3M_UNROLL_M 2)
  243. set(ZGEMM3M_UNROLL_N 8)
  244. elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
  245. file(APPEND ${TARGET_CONF_TEMP}
  246. "#define L1_DATA_SIZE\t32768\n"
  247. "#define L1_DATA_LINESIZE\t64\n"
  248. "#define L2_SIZE\t262144\n"
  249. "#define L2_LINESIZE\t64\n"
  250. "#define DTB_DEFAULT_ENTRIES\t64\n"
  251. "#define DTB_SIZE\t4096\n"
  252. "#define HAVE_CMOV\n"
  253. "#define HAVE_MMX\n"
  254. "#define HAVE_SSE\n"
  255. "#define HAVE_SSE2\n"
  256. "#define HAVE_SSE3\n"
  257. "#define HAVE_SSSE3\n"
  258. "#define HAVE_SSE4_1\n"
  259. "#define HAVE_SSE4_2\n"
  260. "#define HAVE_AVX\n"
  261. "#define SLOCAL_BUFFER_SIZE\t24576\n"
  262. "#define DLOCAL_BUFFER_SIZE\t16384\n"
  263. "#define CLOCAL_BUFFER_SIZE\t32768\n"
  264. "#define ZLOCAL_BUFFER_SIZE\t24576\n")
  265. set(HAVE_SSE 1)
  266. set(HAVE_SSE2 1)
  267. set(HAVE_SSE3 1)
  268. set(HAVE_SSSE3 1)
  269. set(HAVE_SSE4_1 1)
  270. set(HAVE_SSE4_2 1)
  271. set(HAVE_AVX 1)
  272. set(SBGEMM_UNROLL_M 8)
  273. set(SBGEMM_UNROLL_N 4)
  274. set(SGEMM_UNROLL_M 16)
  275. set(SGEMM_UNROLL_N 4)
  276. set(DGEMM_UNROLL_M 8)
  277. set(DGEMM_UNROLL_N 4)
  278. set(CGEMM_UNROLL_M 8)
  279. set(CGEMM_UNROLL_N 2)
  280. set(ZGEMM_UNROLL_M 1)
  281. set(ZGEMM_UNROLL_N 4)
  282. set(CGEMM3M_UNROLL_M 4)
  283. set(CGEMM3M_UNROLL_N 8)
  284. set(ZGEMM3M_UNROLL_M 2)
  285. set(ZGEMM3M_UNROLL_N 8)
  286. elseif ("${TCORE}" STREQUAL "HASWELL")
  287. file(APPEND ${TARGET_CONF_TEMP}
  288. "#define L1_DATA_SIZE\t32768\n"
  289. "#define L1_DATA_LINESIZE\t64\n"
  290. "#define L2_SIZE\t262144\n"
  291. "#define L2_LINESIZE\t64\n"
  292. "#define DTB_DEFAULT_ENTRIES\t64\n"
  293. "#define DTB_SIZE\t4096\n"
  294. "#define HAVE_CMOV\n"
  295. "#define HAVE_MMX\n"
  296. "#define HAVE_SSE\n"
  297. "#define HAVE_SSE2\n"
  298. "#define HAVE_SSE3\n"
  299. "#define HAVE_SSSE3\n"
  300. "#define HAVE_SSE4_1\n"
  301. "#define HAVE_SSE4_2\n"
  302. "#define HAVE_AVX\n"
  303. "#define HAVE_AVX2\n"
  304. "#define HAVE_FMA3\n"
  305. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  306. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  307. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  308. "#define ZLOCAL_BUFFER_SIZE\t12288\n")
  309. set(HAVE_SSE 1)
  310. set(HAVE_SSE2 1)
  311. set(HAVE_SSE3 1)
  312. set(HAVE_SSSE3 1)
  313. set(HAVE_SSE4_1 1)
  314. set(HAVE_SSE4_2 1)
  315. set(HAVE_AVX 1)
  316. set(HAVE_AVX2 1)
  317. set(HAVE_FMA3 1)
  318. set(SBGEMM_UNROLL_M 8)
  319. set(SBGEMM_UNROLL_N 4)
  320. set(SGEMM_UNROLL_M 8)
  321. set(SGEMM_UNROLL_N 4)
  322. set(DGEMM_UNROLL_M 4)
  323. set(DGEMM_UNROLL_N 8)
  324. set(CGEMM_UNROLL_M 8)
  325. set(CGEMM_UNROLL_N 2)
  326. set(ZGEMM_UNROLL_M 4)
  327. set(ZGEMM_UNROLL_N 2)
  328. set(CGEMM3M_UNROLL_M 8)
  329. set(CGEMM3M_UNROLL_N 4)
  330. set(ZGEMM3M_UNROLL_M 4)
  331. set(ZGEMM3M_UNROLL_N 4)
  332. elseif ("${TCORE}" STREQUAL "SKYLAKEX")
  333. file(APPEND ${TARGET_CONF_TEMP}
  334. "#define L1_DATA_SIZE\t32768\n"
  335. "#define L1_DATA_LINESIZE\t64\n"
  336. "#define L2_SIZE\t262144\n"
  337. "#define L2_LINESIZE\t64\n"
  338. "#define DTB_DEFAULT_ENTRIES\t64\n"
  339. "#define DTB_SIZE\t4096\n"
  340. "#define HAVE_CMOV\n"
  341. "#define HAVE_MMX\n"
  342. "#define HAVE_SSE\n"
  343. "#define HAVE_SSE2\n"
  344. "#define HAVE_SSE3\n"
  345. "#define HAVE_SSSE3\n"
  346. "#define HAVE_SSE4_1\n"
  347. "#define HAVE_SSE4_2\n"
  348. "#define HAVE_AVX\n"
  349. "#define HAVE_AVX2\n"
  350. "#define HAVE_FMA3\n"
  351. "#define HAVE_AVX512VL\n"
  352. "#define SLOCAL_BUFFER_SIZE\t28672\n"
  353. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  354. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  355. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  356. set(HAVE_CMOV 1)
  357. set(HAVE_MMX 1)
  358. set(HAVE_SSE 1)
  359. set(HAVE_SSE2 1)
  360. set(HAVE_SSE3 1)
  361. set(HAVE_SSSE3 1)
  362. set(HAVE_SSE4_1 1)
  363. set(HAVE_SSE4_2 1)
  364. set(HAVE_AVX 1)
  365. set(HAVE_AVX2 1)
  366. set(HAVE_FMA3 1)
  367. set(HAVE_AVX512VL 1)
  368. set(SBGEMM_UNROLL_M 8)
  369. set(SBGEMM_UNROLL_N 4)
  370. set(SGEMM_UNROLL_M 16)
  371. set(SGEMM_UNROLL_N 4)
  372. set(DGEMM_UNROLL_M 16)
  373. set(DGEMM_UNROLL_N 2)
  374. set(CGEMM_UNROLL_M 8)
  375. set(CGEMM_UNROLL_N 2)
  376. set(ZGEMM_UNROLL_M 4)
  377. set(ZGEMM_UNROLL_N 2)
  378. set(CGEMM3M_UNROLL_M 8)
  379. set(CGEMM3M_UNROLL_N 4)
  380. set(ZGEMM3M_UNROLL_M 4)
  381. set(ZGEMM3M_UNROLL_N 4)
  382. elseif ("${TCORE}" STREQUAL "COOPERLAKE")
  383. file(APPEND ${TARGET_CONF_TEMP}
  384. "#define L1_DATA_SIZE\t32768\n"
  385. "#define L1_DATA_LINESIZE\t64\n"
  386. "#define L2_SIZE\t262144\n"
  387. "#define L2_LINESIZE\t64\n"
  388. "#define DTB_DEFAULT_ENTRIES\t64\n"
  389. "#define DTB_SIZE\t4096\n"
  390. "#define HAVE_CMOV\n"
  391. "#define HAVE_MMX\n"
  392. "#define HAVE_SSE\n"
  393. "#define HAVE_SSE2\n"
  394. "#define HAVE_SSE3\n"
  395. "#define HAVE_SSSE3\n"
  396. "#define HAVE_SSE4_1\n"
  397. "#define HAVE_SSE4_2\n"
  398. "#define HAVE_AVX\n"
  399. "#define HAVE_AVX2\n"
  400. "#define HAVE_FMA3\n"
  401. "#define HAVE_AVX512VL\n"
  402. "#define HAVE_AVX512BF16\n"
  403. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  404. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  405. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  406. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  407. set(HAVE_CMOV 1)
  408. set(HAVE_MMX 1)
  409. set(HAVE_SSE 1)
  410. set(HAVE_SSE2 1)
  411. set(HAVE_SSE3 1)
  412. set(HAVE_SSSE3 1)
  413. set(HAVE_SSE4_1 1)
  414. set(HAVE_SSE4_2 1)
  415. set(HAVE_AVX 1)
  416. set(HAVE_AVX2 1)
  417. set(HAVE_FMA3 1)
  418. set(HAVE_AVX512VL 1)
  419. set(HAVE_AVX512BF16 1)
  420. set(SBGEMM_UNROLL_M 16)
  421. set(SBGEMM_UNROLL_N 4)
  422. set(SGEMM_UNROLL_M 16)
  423. set(SGEMM_UNROLL_N 4)
  424. set(DGEMM_UNROLL_M 16)
  425. set(DGEMM_UNROLL_N 2)
  426. set(CGEMM_UNROLL_M 8)
  427. set(CGEMM_UNROLL_N 2)
  428. set(ZGEMM_UNROLL_M 4)
  429. set(ZGEMM_UNROLL_N 2)
  430. set(CGEMM3M_UNROLL_M 8)
  431. set(CGEMM3M_UNROLL_N 4)
  432. set(ZGEMM3M_UNROLL_M 4)
  433. set(ZGEMM3M_UNROLL_N 4)
  434. elseif ("${TCORE}" STREQUAL "SAPPHIRERAPIDS")
  435. file(APPEND ${TARGET_CONF_TEMP}
  436. "#define L1_DATA_SIZE\t32768\n"
  437. "#define L1_DATA_LINESIZE\t64\n"
  438. "#define L2_SIZE\t262144\n"
  439. "#define L2_LINESIZE\t64\n"
  440. "#define DTB_DEFAULT_ENTRIES\t64\n"
  441. "#define DTB_SIZE\t4096\n"
  442. "#define HAVE_CMOV\n"
  443. "#define HAVE_MMX\n"
  444. "#define HAVE_SSE\n"
  445. "#define HAVE_SSE2\n"
  446. "#define HAVE_SSE3\n"
  447. "#define HAVE_SSSE3\n"
  448. "#define HAVE_SSE4_1\n"
  449. "#define HAVE_SSE4_2\n"
  450. "#define HAVE_AVX\n"
  451. "#define HAVE_AVX2\n"
  452. "#define HAVE_FMA3\n"
  453. "#define HAVE_AVX512VL\n"
  454. "#define HAVE_AVX512BF16\n"
  455. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  456. "#define DLOCAL_BUFFER_SIZE\t12288\n"
  457. "#define CLOCAL_BUFFER_SIZE\t12288\n"
  458. "#define ZLOCAL_BUFFER_SIZE\t8192\n")
  459. set(HAVE_CMOV 1)
  460. set(HAVE_MMX 1)
  461. set(HAVE_SSE 1)
  462. set(HAVE_SSE2 1)
  463. set(HAVE_SSE3 1)
  464. set(HAVE_SSSE3 1)
  465. set(HAVE_SSE4_1 1)
  466. set(HAVE_SSE4_2 1)
  467. set(HAVE_AVX 1)
  468. set(HAVE_AVX2 1)
  469. set(HAVE_FMA3 1)
  470. set(HAVE_AVX512VL 1)
  471. set(HAVE_AVX512BF16 1)
  472. set(SBGEMM_UNROLL_M 32)
  473. set(SBGEMM_UNROLL_N 16)
  474. set(SGEMM_UNROLL_M 16)
  475. set(SGEMM_UNROLL_N 4)
  476. set(DGEMM_UNROLL_M 16)
  477. set(DGEMM_UNROLL_N 2)
  478. set(CGEMM_UNROLL_M 8)
  479. set(CGEMM_UNROLL_N 2)
  480. set(ZGEMM_UNROLL_M 4)
  481. set(ZGEMM_UNROLL_N 2)
  482. set(CGEMM3M_UNROLL_M 8)
  483. set(CGEMM3M_UNROLL_N 4)
  484. set(ZGEMM3M_UNROLL_M 4)
  485. set(ZGEMM3M_UNROLL_N 4)
  486. elseif ("${TCORE}" STREQUAL "OPTERON")
  487. file(APPEND ${TARGET_CONF_TEMP}
  488. "#define L1_DATA_SIZE\t65536\n"
  489. "#define L1_DATA_LINESIZE\t64\n"
  490. "#define L2_SIZE\t1048576\n"
  491. "#define L2_LINESIZE\t64\n"
  492. "#define DTB_DEFAULT_ENTRIES\t32\n"
  493. "#define DTB_SIZE\t4096\n"
  494. "#define HAVE_3DNOW\n"
  495. "#define HAVE_3DNOWEX\n"
  496. "#define HAVE_MMX\n"
  497. "#define HAVE_SSE\n"
  498. "#define HAVE_SSE2\n"
  499. "#define SLOCAL_BUFFER_SIZE\t15360\n"
  500. "#define DLOCAL_BUFFER_SIZE\t15360\n"
  501. "#define CLOCAL_BUFFER_SIZE\t15360\n"
  502. "#define ZLOCAL_BUFFER_SIZE\t15360\n")
  503. set(HAVE_3DNOW 1)
  504. set(HAVE_3DNOWEX 1)
  505. set(HAVE_MMX 1)
  506. set(HAVE_SSE 1)
  507. set(HAVE_SSE2 1)
  508. set(SBGEMM_UNROLL_M 8)
  509. set(SBGEMM_UNROLL_N 4)
  510. set(SGEMM_UNROLL_M 8)
  511. set(SGEMM_UNROLL_N 4)
  512. set(DGEMM_UNROLL_M 4)
  513. set(DGEMM_UNROLL_N 4)
  514. set(CGEMM_UNROLL_M 4)
  515. set(CGEMM_UNROLL_N 2)
  516. set(ZGEMM_UNROLL_M 2)
  517. set(ZGEMM_UNROLL_N 2)
  518. set(CGEMM3M_UNROLL_M 8)
  519. set(CGEMM3M_UNROLL_N 4)
  520. set(ZGEMM3M_UNROLL_M 4)
  521. set(ZGEMM3M_UNROLL_N 4)
  522. elseif ("${TCORE}" STREQUAL "BARCELONA")
  523. file(APPEND ${TARGET_CONF_TEMP}
  524. "#define L1_DATA_SIZE\t32768\n"
  525. "#define L1_DATA_LINESIZE\t64\n"
  526. "#define L2_SIZE\t524288\n"
  527. "#define L2_LINESIZE\t64\n"
  528. "#define DTB_DEFAULT_ENTRIES\t64\n"
  529. "#define DTB_SIZE\t4096\n"
  530. "#define HAVE_MMX\n"
  531. "#define HAVE_SSE\n"
  532. "#define HAVE_SSE2\n"
  533. "#define HAVE_SSE3\n"
  534. "#define HAVE_SSE4A\n"
  535. "#define HAVE_MISALIGNSSE\n"
  536. "#define HAVE_128BITFPU\n"
  537. "#define HAVE_FASTMOVU\n"
  538. "#define SLOCAL_BUFFER_SIZE\t14336\n"
  539. "#define DLOCAL_BUFFER_SIZE\t14336\n"
  540. "#define CLOCAL_BUFFER_SIZE\t14336\n"
  541. "#define ZLOCAL_BUFFER_SIZE\t14336\n")
  542. set(HAVE_SSE 1)
  543. set(HAVE_SSE2 1)
  544. set(HAVE_SSE3 1)
  545. set(HAVE_SSE4A 1)
  546. set(HAVE_MISALIGNSSE 1)
  547. set(HAVE_128BITFPU 1)
  548. set(HAVE_FASTMOVU 1)
  549. set(SBGEMM_UNROLL_M 8)
  550. set(SBGEMM_UNROLL_N 4)
  551. set(SGEMM_UNROLL_M 8)
  552. set(SGEMM_UNROLL_N 4)
  553. set(DGEMM_UNROLL_M 4)
  554. set(DGEMM_UNROLL_N 4)
  555. set(CGEMM_UNROLL_M 4)
  556. set(CGEMM_UNROLL_N 2)
  557. set(ZGEMM_UNROLL_M 2)
  558. set(ZGEMM_UNROLL_N 2)
  559. set(CGEMM3M_UNROLL_M 8)
  560. set(CGEMM3M_UNROLL_N 4)
  561. set(ZGEMM3M_UNROLL_M 4)
  562. set(ZGEMM3M_UNROLL_N 4)
  563. elseif ("${TCORE}" STREQUAL "BULLDOZER")
  564. file(APPEND ${TARGET_CONF_TEMP}
  565. "#define L1_DATA_SIZE\t49152\n"
  566. "#define L1_DATA_LINESIZE\t64\n"
  567. "#define L2_SIZE\t1024000\n"
  568. "#define L2_LINESIZE\t64\n"
  569. "#define DTB_DEFAULT_ENTRIES\t32\n"
  570. "#define DTB_SIZE\t4096\n"
  571. "#define HAVE_MMX\n"
  572. "#define HAVE_SSE\n"
  573. "#define HAVE_SSE2\n"
  574. "#define HAVE_SSE3\n"
  575. "#define HAVE_SSE4A\n"
  576. "#define HAVE_AVX\n"
  577. "#define HAVE_MISALIGNSSE\n"
  578. "#define HAVE_128BITFPU\n"
  579. "#define HAVE_FASTMOVU\n"
  580. "#define SLOCAL_BUFFER_SIZE\t5376\n"
  581. "#define DLOCAL_BUFFER_SIZE\t5376\n"
  582. "#define CLOCAL_BUFFER_SIZE\t14336\n"
  583. "#define ZLOCAL_BUFFER_SIZE\t14336\n")
  584. set(HAVE_SSE 1)
  585. set(HAVE_SSE2 1)
  586. set(HAVE_SSE3 1)
  587. set(HAVE_SSE4A 1)
  588. set(HAVE_AVX 1)
  589. set(HAVE_MISALIGNSSE 1)
  590. set(HAVE_128BITFPU 1)
  591. set(HAVE_FASTMOVU 1)
  592. set(SBGEMM_UNROLL_M 8)
  593. set(SBGEMM_UNROLL_N 4)
  594. set(SGEMM_UNROLL_M 16)
  595. set(SGEMM_UNROLL_N 2)
  596. set(DGEMM_UNROLL_M 8)
  597. set(DGEMM_UNROLL_N 2)
  598. set(CGEMM_UNROLL_M 2)
  599. set(CGEMM_UNROLL_N 2)
  600. set(ZGEMM_UNROLL_M 2)
  601. set(ZGEMM_UNROLL_N 2)
  602. set(CGEMM3M_UNROLL_M 8)
  603. set(CGEMM3M_UNROLL_N 4)
  604. set(ZGEMM3M_UNROLL_M 4)
  605. set(ZGEMM3M_UNROLL_N 4)
  606. elseif ("${TCORE}" STREQUAL "PILEDRIVER")
  607. file(APPEND ${TARGET_CONF_TEMP}
  608. "#define L1_DATA_SIZE\t16384\n"
  609. "#define L1_DATA_LINESIZE\t64\n"
  610. "#define L2_SIZE\t2097152\n"
  611. "#define L2_LINESIZE\t64\n"
  612. "#define DTB_DEFAULT_ENTRIES\t64\n"
  613. "#define DTB_SIZE\t4096\n"
  614. "#define HAVE_MMX\n"
  615. "#define HAVE_SSE\n"
  616. "#define HAVE_SSE2\n"
  617. "#define HAVE_SSE3\n"
  618. "#define HAVE_SSE4_1\n"
  619. "#define HAVE_SSE4_2\n"
  620. "#define HAVE_SSE4A\n"
  621. "#define HAVE_AVX\n"
  622. "#define HAVE_MISALIGNSSE\n"
  623. "#define HAVE_128BITFPU\n"
  624. "#define HAVE_FASTMOVU\n"
  625. "#define HAVE_CFLUSH\n"
  626. "#define HAVE_FMA3\n"
  627. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  628. "#define DLOCAL_BUFFER_SIZE\t5376\n"
  629. "#define CLOCAL_BUFFER_SIZE\t10752\n"
  630. "#define ZLOCAL_BUFFER_SIZE\t10752\n")
  631. set(HAVE_SSE 1)
  632. set(HAVE_SSE2 1)
  633. set(HAVE_SSE3 1)
  634. set(HAVE_SSE4_1 1)
  635. set(HAVE_SSE4_2 1)
  636. set(HAVE_SSE4A 1)
  637. set(HAVE_AVX 1)
  638. set(HAVE_FMA3 1)
  639. set(HAVE_MISALIGNSSE 1)
  640. set(HAVE_128BITFPU 1)
  641. set(HAVE_FASTMOVU 1)
  642. set(HAVE_CFLUSH 1)
  643. set(SBGEMM_UNROLL_M 8)
  644. set(SBGEMM_UNROLL_N 4)
  645. set(SGEMM_UNROLL_M 16)
  646. set(SGEMM_UNROLL_N 2)
  647. set(DGEMM_UNROLL_M 8)
  648. set(DGEMM_UNROLL_N 2)
  649. set(CGEMM_UNROLL_M 4)
  650. set(CGEMM_UNROLL_N 2)
  651. set(ZGEMM_UNROLL_M 2)
  652. set(ZGEMM_UNROLL_N 2)
  653. set(CGEMM3M_UNROLL_M 8)
  654. set(CGEMM3M_UNROLL_N 4)
  655. set(ZGEMM3M_UNROLL_M 4)
  656. set(ZGEMM3M_UNROLL_N 4)
  657. elseif ("${TCORE}" STREQUAL "STEAMROLLER")
  658. file(APPEND ${TARGET_CONF_TEMP}
  659. "#define L1_DATA_SIZE\t16384\n"
  660. "#define L1_DATA_LINESIZE\t64\n"
  661. "#define L2_SIZE\t2097152\n"
  662. "#define L2_LINESIZE\t64\n"
  663. "#define DTB_DEFAULT_ENTRIES\t64\n"
  664. "#define DTB_SIZE\t4096\n"
  665. "#define HAVE_MMX\n"
  666. "#define HAVE_SSE\n"
  667. "#define HAVE_SSE2\n"
  668. "#define HAVE_SSE3\n"
  669. "#define HAVE_SSE4_1\n"
  670. "#define HAVE_SSE4_2\n"
  671. "#define HAVE_SSE4A\n"
  672. "#define HAVE_AVX\n"
  673. "#define HAVE_MISALIGNSSE\n"
  674. "#define HAVE_128BITFPU\n"
  675. "#define HAVE_FASTMOVU\n"
  676. "#define HAVE_CFLUSH\n"
  677. "#define HAVE_FMA3\n"
  678. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  679. "#define DLOCAL_BUFFER_SIZE\t5120\n"
  680. "#define CLOCAL_BUFFER_SIZE\t10240\n"
  681. "#define ZLOCAL_BUFFER_SIZE\t10240\n")
  682. set(HAVE_SSE 1)
  683. set(HAVE_SSE2 1)
  684. set(HAVE_SSE3 1)
  685. set(HAVE_SSE4_1 1)
  686. set(HAVE_SSE4_2 1)
  687. set(HAVE_SSE4A 1)
  688. set(HAVE_AVX 1)
  689. set(HAVE_FMA3 1)
  690. set(HAVE_MISALIGNSSE 1)
  691. set(HAVE_128BITFPU 1)
  692. set(HAVE_FASTMOVU 1)
  693. set(HAVE_CFLUSH 1)
  694. set(SBGEMM_UNROLL_M 8)
  695. set(SBGEMM_UNROLL_N 4)
  696. set(SGEMM_UNROLL_M 16)
  697. set(SGEMM_UNROLL_N 2)
  698. set(DGEMM_UNROLL_M 8)
  699. set(DGEMM_UNROLL_N 2)
  700. set(CGEMM_UNROLL_M 4)
  701. set(CGEMM_UNROLL_N 2)
  702. set(ZGEMM_UNROLL_M 2)
  703. set(ZGEMM_UNROLL_N 2)
  704. set(CGEMM3M_UNROLL_M 8)
  705. set(CGEMM3M_UNROLL_N 4)
  706. set(ZGEMM3M_UNROLL_M 4)
  707. set(ZGEMM3M_UNROLL_N 4)
  708. elseif ("${TCORE}" STREQUAL "EXCAVATOR")
  709. file(APPEND ${TARGET_CONF_TEMP}
  710. "#define L1_DATA_SIZE\t16384\n"
  711. "#define L1_DATA_LINESIZE\t64\n"
  712. "#define L2_SIZE\t2097152\n"
  713. "#define L2_LINESIZE\t64\n"
  714. "#define DTB_DEFAULT_ENTRIES\t64\n"
  715. "#define DTB_SIZE\t4096\n"
  716. "#define HAVE_MMX\n"
  717. "#define HAVE_SSE\n"
  718. "#define HAVE_SSE2\n"
  719. "#define HAVE_SSE3\n"
  720. "#define HAVE_SSE4_1\n"
  721. "#define HAVE_SSE4_2\n"
  722. "#define HAVE_SSE4A\n"
  723. "#define HAVE_AVX\n"
  724. "#define HAVE_MISALIGNSSE\n"
  725. "#define HAVE_128BITFPU\n"
  726. "#define HAVE_FASTMOVU\n"
  727. "#define HAVE_CFLUSH\n"
  728. "#define HAVE_FMA3\n"
  729. "#define SLOCAL_BUFFER_SIZE\t6144\n"
  730. "#define DLOCAL_BUFFER_SIZE\t5120\n"
  731. "#define CLOCAL_BUFFER_SIZE\t10240\n"
  732. "#define ZLOCAL_BUFFER_SIZE\t10240\n")
  733. set(HAVE_SSE 1)
  734. set(HAVE_SSE2 1)
  735. set(HAVE_SSE3 1)
  736. set(HAVE_SSE4_1 1)
  737. set(HAVE_SSE4_2 1)
  738. set(HAVE_SSE4A 1)
  739. set(HAVE_AVX 1)
  740. set(HAVE_FMA3 1)
  741. set(HAVE_MISALIGNSSE 1)
  742. set(HAVE_128BITFPU 1)
  743. set(HAVE_FASTMOVU 1)
  744. set(HAVE_CFLUSH 1)
  745. set(SBGEMM_UNROLL_M 8)
  746. set(SBGEMM_UNROLL_N 4)
  747. set(SGEMM_UNROLL_M 16)
  748. set(SGEMM_UNROLL_N 2)
  749. set(DGEMM_UNROLL_M 8)
  750. set(DGEMM_UNROLL_N 2)
  751. set(CGEMM_UNROLL_M 4)
  752. set(CGEMM_UNROLL_N 2)
  753. set(ZGEMM_UNROLL_M 2)
  754. set(ZGEMM_UNROLL_N 2)
  755. set(CGEMM3M_UNROLL_M 8)
  756. set(CGEMM3M_UNROLL_N 4)
  757. set(ZGEMM3M_UNROLL_M 4)
  758. set(ZGEMM3M_UNROLL_N 4)
  759. elseif ("${TCORE}" STREQUAL "ZEN")
  760. file(APPEND ${TARGET_CONF_TEMP}
  761. "#define L1_DATA_SIZE\t32768\n"
  762. "#define L1_DATA_LINESIZE\t64\n"
  763. "#define L2_SIZE\t524288\n"
  764. "#define L2_LINESIZE\t64\n"
  765. "#define DTB_DEFAULT_ENTRIES\t64\n"
  766. "#define DTB_SIZE\t4096\n"
  767. "#define HAVE_MMX\n"
  768. "#define HAVE_SSE\n"
  769. "#define HAVE_SSE2\n"
  770. "#define HAVE_SSE3\n"
  771. "#define HAVE_SSE4_1\n"
  772. "#define HAVE_SSE4_2\n"
  773. "#define HAVE_SSE4A\n"
  774. "#define HAVE_MISALIGNSSE\n"
  775. "#define HAVE_128BITFPU\n"
  776. "#define HAVE_FASTMOVU\n"
  777. "#define HAVE_CFLUSH\n"
  778. "#define HAVE_AVX\n"
  779. "#define HAVE_AVX2\n"
  780. "#define HAVE_FMA3\n"
  781. "#define SLOCAL_BUFFER_SIZE\t20480\n"
  782. "#define DLOCAL_BUFFER_SIZE\t32768\n"
  783. "#define CLOCAL_BUFFER_SIZE\t16384\n"
  784. "#define ZLOCAL_BUFFER_SIZE\t12288\n")
  785. set(HAVE_SSE 1)
  786. set(HAVE_SSE2 1)
  787. set(HAVE_SSE3 1)
  788. set(HAVE_SSE4_1 1)
  789. set(HAVE_SSE4_2 1)
  790. set(HAVE_AVX 1)
  791. set(HAVE_AVX2 1)
  792. set(HAVE_FMA3 1)
  793. set(HAVE_SSE4A 1)
  794. set(HAVE_MISALIGNSSE 1)
  795. set(HAVE_128BITFPU 1)
  796. set(HAVE_FASTMOVU 1)
  797. set(HAVE_CFLUSH 1)
  798. set(SBGEMM_UNROLL_M 8)
  799. set(SBGEMM_UNROLL_N 4)
  800. set(SGEMM_UNROLL_M 8)
  801. set(SGEMM_UNROLL_N 4)
  802. set(DGEMM_UNROLL_M 4)
  803. set(DGEMM_UNROLL_N 8)
  804. set(CGEMM_UNROLL_M 8)
  805. set(CGEMM_UNROLL_N 2)
  806. set(ZGEMM_UNROLL_M 4)
  807. set(ZGEMM_UNROLL_N 2)
  808. set(CGEMM3M_UNROLL_M 8)
  809. set(CGEMM3M_UNROLL_N 4)
  810. set(ZGEMM3M_UNROLL_M 4)
  811. set(ZGEMM3M_UNROLL_N 4)
  812. elseif ("${TCORE}" STREQUAL "ARMV7")
  813. file(APPEND ${TARGET_CONF_TEMP}
  814. "#define L1_DATA_SIZE\t65536\n"
  815. "#define L1_DATA_LINESIZE\t32\n"
  816. "#define L2_SIZE\t512488\n"
  817. "#define L2_LINESIZE\t32\n"
  818. "#define DTB_DEFAULT_ENTRIES\t64\n"
  819. "#define DTB_SIZE\t4096\n"
  820. "#define L2_ASSOCIATIVE\t4\n"
  821. "#define HAVE_VFPV3\n"
  822. "#define HAVE_VFP\n")
  823. set(SGEMM_UNROLL_M 4)
  824. set(SGEMM_UNROLL_N 4)
  825. set(DGEMM_UNROLL_M 4)
  826. set(DGEMM_UNROLL_N 4)
  827. set(CGEMM_UNROLL_M 2)
  828. set(CGEMM_UNROLL_N 2)
  829. set(ZGEMM_UNROLL_M 2)
  830. set(ZGEMM_UNROLL_N 2)
  831. elseif ("${TCORE}" STREQUAL "ARMV8")
  832. file(APPEND ${TARGET_CONF_TEMP}
  833. "#define L1_DATA_SIZE\t32768\n"
  834. "#define L1_DATA_LINESIZE\t64\n"
  835. "#define L2_SIZE\t262144\n"
  836. "#define L2_LINESIZE\t64\n"
  837. "#define DTB_DEFAULT_ENTRIES\t64\n"
  838. "#define DTB_SIZE\t4096\n"
  839. "#define L2_ASSOCIATIVE\t32\n"
  840. "#define ARMV8\n")
  841. set(SGEMM_UNROLL_M 16)
  842. set(SGEMM_UNROLL_N 4)
  843. set(DGEMM_UNROLL_M 8)
  844. set(DGEMM_UNROLL_N 4)
  845. set(CGEMM_UNROLL_M 8)
  846. set(CGEMM_UNROLL_N 4)
  847. set(ZGEMM_UNROLL_M 4)
  848. set(ZGEMM_UNROLL_N 4)
  849. set(SYMV_P 16)
  850. elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53" OR "${TCORE}" STREQUAL "CORTEXA55")
  851. file(APPEND ${TARGET_CONF_TEMP}
  852. "#define L1_CODE_SIZE\t32768\n"
  853. "#define L1_CODE_LINESIZE\t64\n"
  854. "#define L1_CODE_ASSOCIATIVE\t3\n"
  855. "#define L1_DATA_SIZE\t32768\n"
  856. "#define L1_DATA_LINESIZE\t64\n"
  857. "#define L1_DATA_ASSOCIATIVE\t2\n"
  858. "#define L2_SIZE\t262144\n"
  859. "#define L2_LINESIZE\t64\n"
  860. "#define L2_ASSOCIATIVE\t16\n"
  861. "#define DTB_DEFAULT_ENTRIES\t64\n"
  862. "#define DTB_SIZE\t4096\n"
  863. "#define HAVE_VFPV4\n"
  864. "#define HAVE_VFPV3\n"
  865. "#define HAVE_VFP\n"
  866. "#define HAVE_NEON\n"
  867. "#define ARMV8\n")
  868. if ("${TCORE}" STREQUAL "CORTEXA57")
  869. set(SGEMM_UNROLL_M 16)
  870. set(SGEMM_UNROLL_N 4)
  871. else ()
  872. set(SGEMM_UNROLL_M 8)
  873. set(SGEMM_UNROLL_N 8)
  874. endif ()
  875. set(DGEMM_UNROLL_M 8)
  876. set(DGEMM_UNROLL_N 4)
  877. set(CGEMM_UNROLL_M 8)
  878. set(CGEMM_UNROLL_N 4)
  879. set(ZGEMM_UNROLL_M 4)
  880. set(ZGEMM_UNROLL_N 4)
  881. set(SYMV_P 16)
  882. elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73")
  883. file(APPEND ${TARGET_CONF_TEMP}
  884. "#define L1_CODE_SIZE\t49152\n"
  885. "#define L1_CODE_LINESIZE\t64\n"
  886. "#define L1_CODE_ASSOCIATIVE\t3\n"
  887. "#define L1_DATA_SIZE\t32768\n"
  888. "#define L1_DATA_LINESIZE\t64\n"
  889. "#define L1_DATA_ASSOCIATIVE\t2\n"
  890. "#define L2_SIZE\t524288\n"
  891. "#define L2_LINESIZE\t64\n"
  892. "#define L2_ASSOCIATIVE\t16\n"
  893. "#define DTB_DEFAULT_ENTRIES\t64\n"
  894. "#define DTB_SIZE\t4096\n"
  895. "#define HAVE_VFPV4\n"
  896. "#define HAVE_VFPV3\n"
  897. "#define HAVE_VFP\n"
  898. "#define HAVE_NEON\n"
  899. "#define ARMV8\n")
  900. set(SGEMM_UNROLL_M 16)
  901. set(SGEMM_UNROLL_N 4)
  902. set(DGEMM_UNROLL_M 8)
  903. set(DGEMM_UNROLL_N 4)
  904. set(CGEMM_UNROLL_M 8)
  905. set(CGEMM_UNROLL_N 4)
  906. set(ZGEMM_UNROLL_M 4)
  907. set(ZGEMM_UNROLL_N 4)
  908. set(SYMV_P 16)
  909. elseif ("${TCORE}" STREQUAL "NEOVERSEN1")
  910. file(APPEND ${TARGET_CONF_TEMP}
  911. "#define L1_CODE_SIZE\t65536\n"
  912. "#define L1_CODE_LINESIZE\t64\n"
  913. "#define L1_CODE_ASSOCIATIVE\t4\n"
  914. "#define L1_DATA_SIZE\t65536\n"
  915. "#define L1_DATA_LINESIZE\t64\n"
  916. "#define L1_DATA_ASSOCIATIVE\t4\n"
  917. "#define L2_SIZE\t1048576\n\n"
  918. "#define L2_LINESIZE\t64\n"
  919. "#define L2_ASSOCIATIVE\t8\n"
  920. "#define DTB_DEFAULT_ENTRIES\t48\n"
  921. "#define DTB_SIZE\t4096\n"
  922. "#define HAVE_VFPV4\n"
  923. "#define HAVE_VFPV3\n"
  924. "#define HAVE_VFP\n"
  925. "#define HAVE_NEON\n"
  926. "#define ARMV8\n")
  927. set(SGEMM_UNROLL_M 16)
  928. set(SGEMM_UNROLL_N 4)
  929. set(DGEMM_UNROLL_M 8)
  930. set(DGEMM_UNROLL_N 4)
  931. set(CGEMM_UNROLL_M 8)
  932. set(CGEMM_UNROLL_N 4)
  933. set(ZGEMM_UNROLL_M 4)
  934. set(ZGEMM_UNROLL_N 4)
  935. set(SYMV_P 16)
  936. elseif ("${TCORE}" STREQUAL "NEOVERSEV1")
  937. file(APPEND ${TARGET_CONF_TEMP}
  938. "#define L1_CODE_SIZE\t65536\n"
  939. "#define L1_CODE_LINESIZE\t64\n"
  940. "#define L1_CODE_ASSOCIATIVE\t4\n"
  941. "#define L1_DATA_SIZE\t65536\n"
  942. "#define L1_DATA_LINESIZE\t64\n"
  943. "#define L1_DATA_ASSOCIATIVE\t4\n"
  944. "#define L2_SIZE\t1048576\n\n"
  945. "#define L2_LINESIZE\t64\n"
  946. "#define L2_ASSOCIATIVE\t8\n"
  947. "#define DTB_DEFAULT_ENTRIES\t48\n"
  948. "#define DTB_SIZE\t4096\n"
  949. "#define HAVE_VFPV4\n"
  950. "#define HAVE_VFPV3\n"
  951. "#define HAVE_VFP\n"
  952. "#define HAVE_NEON\n"
  953. "#define HAVE_SVE\n"
  954. "#define ARMV8\n")
  955. set(SGEMM_UNROLL_M 16)
  956. set(SGEMM_UNROLL_N 4)
  957. set(DGEMM_UNROLL_M 8)
  958. set(DGEMM_UNROLL_N 4)
  959. set(CGEMM_UNROLL_M 8)
  960. set(CGEMM_UNROLL_N 4)
  961. set(ZGEMM_UNROLL_M 4)
  962. set(ZGEMM_UNROLL_N 4)
  963. set(SYMV_P 16)
  964. elseif ("${TCORE}" STREQUAL "NEOVERSEN2")
  965. file(APPEND ${TARGET_CONF_TEMP}
  966. "#define L1_CODE_SIZE\t65536\n"
  967. "#define L1_CODE_LINESIZE\t64\n"
  968. "#define L1_CODE_ASSOCIATIVE\t4\n"
  969. "#define L1_DATA_SIZE\t65536\n"
  970. "#define L1_DATA_LINESIZE\t64\n"
  971. "#define L1_DATA_ASSOCIATIVE\t2\n"
  972. "#define L2_SIZE\t1048576\n\n"
  973. "#define L2_LINESIZE\t64\n"
  974. "#define L2_ASSOCIATIVE\t8\n"
  975. "#define DTB_DEFAULT_ENTRIES\t48\n"
  976. "#define DTB_SIZE\t4096\n"
  977. "#define HAVE_VFPV4\n"
  978. "#define HAVE_VFPV3\n"
  979. "#define HAVE_VFP\n"
  980. "#define HAVE_NEON\n"
  981. "#define HAVE_SVE\n"
  982. "#define ARMV8\n")
  983. set(SGEMM_UNROLL_M 16)
  984. set(SGEMM_UNROLL_N 4)
  985. set(DGEMM_UNROLL_M 8)
  986. set(DGEMM_UNROLL_N 4)
  987. set(CGEMM_UNROLL_M 8)
  988. set(CGEMM_UNROLL_N 4)
  989. set(ZGEMM_UNROLL_M 4)
  990. set(ZGEMM_UNROLL_N 4)
  991. set(SYMV_P 16)
  992. elseif ("${TCORE}" STREQUAL "FALKOR")
  993. file(APPEND ${TARGET_CONF_TEMP}
  994. "#define L1_CODE_SIZE\t65536\n"
  995. "#define L1_CODE_LINESIZE\t64\n"
  996. "#define L1_CODE_ASSOCIATIVE\t3\n"
  997. "#define L1_DATA_SIZE\t32768\n"
  998. "#define L1_DATA_LINESIZE\t128\n"
  999. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1000. "#define L2_SIZE\t524288\n"
  1001. "#define L2_LINESIZE\t64\n"
  1002. "#define L2_ASSOCIATIVE\t16\n"
  1003. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1004. "#define DTB_SIZE\t4096\n"
  1005. "#define HAVE_VFPV4\n"
  1006. "#define HAVE_VFPV3\n"
  1007. "#define HAVE_VFP\n"
  1008. "#define HAVE_NEON\n"
  1009. "#define ARMV8\n")
  1010. set(SGEMM_UNROLL_M 16)
  1011. set(SGEMM_UNROLL_N 4)
  1012. set(DGEMM_UNROLL_M 8)
  1013. set(DGEMM_UNROLL_N 4)
  1014. set(CGEMM_UNROLL_M 8)
  1015. set(CGEMM_UNROLL_N 4)
  1016. set(ZGEMM_UNROLL_M 4)
  1017. set(ZGEMM_UNROLL_N 4)
  1018. set(SYMV_P 16)
  1019. elseif ("${TCORE}" STREQUAL "THUNDERX")
  1020. file(APPEND ${TARGET_CONF_TEMP}
  1021. "#define L1_CODE_SIZE\t32768\n"
  1022. "#define L1_CODE_LINESIZE\t64\n"
  1023. "#define L1_CODE_ASSOCIATIVE\t3\n"
  1024. "#define L1_DATA_SIZE\t32768\n"
  1025. "#define L1_DATA_LINESIZE\t128\n"
  1026. "#define L1_DATA_ASSOCIATIVE\t2\n"
  1027. "#define L2_SIZE\t167772164\n"
  1028. "#define L2_LINESIZE\t128\n"
  1029. "#define L2_ASSOCIATIVE\t16\n"
  1030. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1031. "#define DTB_SIZE\t4096\n"
  1032. "#define HAVE_VFPV4\n"
  1033. "#define HAVE_VFPV3\n"
  1034. "#define HAVE_VFP\n"
  1035. "#define HAVE_NEON\n"
  1036. "#define ARMV8\n")
  1037. set(SGEMM_UNROLL_M 4)
  1038. set(SGEMM_UNROLL_N 4)
  1039. set(DGEMM_UNROLL_M 2)
  1040. set(DGEMM_UNROLL_N 2)
  1041. set(CGEMM_UNROLL_M 2)
  1042. set(CGEMM_UNROLL_N 2)
  1043. set(ZGEMM_UNROLL_M 2)
  1044. set(ZGEMM_UNROLL_N 2)
  1045. set(SYMV_P 16)
  1046. elseif ("${TCORE}" STREQUAL "THUNDERX2T99")
  1047. file(APPEND ${TARGET_CONF_TEMP}
  1048. "#define L1_CODE_SIZE\t32768\n"
  1049. "#define L1_CODE_LINESIZE\t64\n"
  1050. "#define L1_CODE_ASSOCIATIVE\t8\n"
  1051. "#define L1_DATA_SIZE\t32768\n"
  1052. "#define L1_DATA_LINESIZE\t64\n"
  1053. "#define L1_DATA_ASSOCIATIVE\t8\n"
  1054. "#define L2_SIZE\t262144\n"
  1055. "#define L2_LINESIZE\t64\n"
  1056. "#define L2_ASSOCIATIVE\t8\n"
  1057. "#define L3_SIZE\t33554432\n"
  1058. "#define L3_LINESIZE\t64\n"
  1059. "#define L3_ASSOCIATIVE\t32\n"
  1060. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1061. "#define DTB_SIZE\t4096\n"
  1062. "#define ARMV8\n")
  1063. set(SGEMM_UNROLL_M 16)
  1064. set(SGEMM_UNROLL_N 4)
  1065. set(DGEMM_UNROLL_M 8)
  1066. set(DGEMM_UNROLL_N 4)
  1067. set(CGEMM_UNROLL_M 8)
  1068. set(CGEMM_UNROLL_N 4)
  1069. set(ZGEMM_UNROLL_M 4)
  1070. set(ZGEMM_UNROLL_N 4)
  1071. set(SYMV_P 16)
  1072. elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
  1073. file(APPEND ${TARGET_CONF_TEMP}
  1074. "#define THUNDERX3T110\n"
  1075. "#define L1_CODE_SIZE\t65536\n"
  1076. "#define L1_CODE_LINESIZE\t64\n"
  1077. "#define L1_CODE_ASSOCIATIVE\t8\n"
  1078. "#define L1_DATA_SIZE\t65536\n"
  1079. "#define L1_DATA_LINESIZE\t64\n"
  1080. "#define L1_DATA_ASSOCIATIVE\t8\n"
  1081. "#define L2_SIZE\t524288\n"
  1082. "#define L2_LINESIZE\t64\n"
  1083. "#define L2_ASSOCIATIVE\t8\n"
  1084. "#define L3_SIZE\t94371840\n"
  1085. "#define L3_LINESIZE\t64\n"
  1086. "#define L3_ASSOCIATIVE\t32\n"
  1087. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1088. "#define DTB_SIZE\t4096\n"
  1089. "#define ARMV8\n")
  1090. set(SGEMM_UNROLL_M 16)
  1091. set(SGEMM_UNROLL_N 4)
  1092. set(DGEMM_UNROLL_M 8)
  1093. set(DGEMM_UNROLL_N 4)
  1094. set(CGEMM_UNROLL_M 8)
  1095. set(CGEMM_UNROLL_N 4)
  1096. set(ZGEMM_UNROLL_M 4)
  1097. set(ZGEMM_UNROLL_N 4)
  1098. set(SYMV_P 16)
  1099. elseif ("${TCORE}" STREQUAL "TSV110")
  1100. file(APPEND ${TARGET_CONF_TEMP}
  1101. "#define ARMV8\n"
  1102. "#define L1_CODE_SIZE\t65536\n"
  1103. "#define L1_CODE_LINESIZE\t64\n"
  1104. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1105. "#define L1_DATA_SIZE\t65536\n"
  1106. "#define L1_DATA_LINESIZE\t64\n"
  1107. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1108. "#define L2_SIZE\t524288\n"
  1109. "#define L2_LINESIZE\t64\n"
  1110. "#define L2_ASSOCIATIVE\t8\n"
  1111. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1112. "#define DTB_SIZE\t4096\n")
  1113. set(SGEMM_UNROLL_M 16)
  1114. set(SGEMM_UNROLL_N 4)
  1115. set(DGEMM_UNROLL_M 8)
  1116. set(DGEMM_UNROLL_N 4)
  1117. set(CGEMM_UNROLL_M 8)
  1118. set(CGEMM_UNROLL_N 4)
  1119. set(ZGEMM_UNROLL_M 4)
  1120. set(ZGEMM_UNROLL_N 4)
  1121. set(SYMV_P 16)
  1122. elseif ("${TCORE}" STREQUAL "EMAG8180")
  1123. file(APPEND ${TARGET_CONF_TEMP}
  1124. "#define ARMV8\n"
  1125. "#define L1_CODE_SIZE\t32768\n"
  1126. "#define L1_CODE_LINESIZE\t64\n"
  1127. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1128. "#define L1_DATA_SIZE\t32768\n"
  1129. "#define L1_DATA_LINESIZE\t64\n"
  1130. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1131. "#define L2_SIZE\t5262144\n"
  1132. "#define L2_LINESIZE\t64\n"
  1133. "#define L2_ASSOCIATIVE\t8\n"
  1134. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1135. "#define DTB_SIZE\t4096\n")
  1136. set(SGEMM_UNROLL_M 16)
  1137. set(SGEMM_UNROLL_N 4)
  1138. set(DGEMM_UNROLL_M 8)
  1139. set(DGEMM_UNROLL_N 4)
  1140. set(CGEMM_UNROLL_M 8)
  1141. set(CGEMM_UNROLL_N 4)
  1142. set(ZGEMM_UNROLL_M 4)
  1143. set(ZGEMM_UNROLL_N 4)
  1144. set(SYMV_P 16)
  1145. elseif ("${TCORE}" STREQUAL "VORTEX")
  1146. file(APPEND ${TARGET_CONF_TEMP}
  1147. "#define ARMV8\n"
  1148. "#define L1_CODE_SIZE\t32768\n"
  1149. "#define L1_CODE_LINESIZE\t64\n"
  1150. "#define L1_CODE_ASSOCIATIVE\t4\n"
  1151. "#define L1_DATA_SIZE\t32768\n"
  1152. "#define L1_DATA_LINESIZE\t64\n"
  1153. "#define L1_DATA_ASSOCIATIVE\t4\n"
  1154. "#define L2_SIZE\t5262144\n"
  1155. "#define L2_LINESIZE\t64\n"
  1156. "#define L2_ASSOCIATIVE\t8\n"
  1157. "#define DTB_DEFAULT_ENTRIES\t64\n"
  1158. "#define DTB_SIZE\t4096\n")
  1159. set(SGEMM_UNROLL_M 16)
  1160. set(SGEMM_UNROLL_N 4)
  1161. set(DGEMM_UNROLL_M 8)
  1162. set(DGEMM_UNROLL_N 4)
  1163. set(CGEMM_UNROLL_M 8)
  1164. set(CGEMM_UNROLL_N 4)
  1165. set(ZGEMM_UNROLL_M 4)
  1166. set(ZGEMM_UNROLL_N 4)
  1167. set(SYMV_P 16)
  1168. elseif ("${TCORE}" STREQUAL "P5600")
  1169. file(APPEND ${TARGET_CONF_TEMP}
  1170. "#define L2_SIZE 1048576\n"
  1171. "#define DTB_SIZE 4096\n"
  1172. "#define DTB_DEFAULT_ENTRIES 64\n")
  1173. set(SGEMM_UNROLL_M 2)
  1174. set(SGEMM_UNROLL_N 2)
  1175. set(DGEMM_UNROLL_M 2)
  1176. set(DGEMM_UNROLL_N 2)
  1177. set(CGEMM_UNROLL_M 2)
  1178. set(CGEMM_UNROLL_N 2)
  1179. set(ZGEMM_UNROLL_M 2)
  1180. set(ZGEMM_UNROLL_N 2)
  1181. set(SYMV_P 16)
  1182. elseif ("${TCORE}" MATCHES "MIPS")
  1183. file(APPEND ${TARGET_CONF_TEMP}
  1184. "#define L2_SIZE 262144\n"
  1185. "#define DTB_SIZE 4096\n"
  1186. "#define DTB_DEFAULT_ENTRIES 64\n")
  1187. set(SGEMM_UNROLL_M 2)
  1188. set(SGEMM_UNROLL_N 2)
  1189. set(DGEMM_UNROLL_M 2)
  1190. set(DGEMM_UNROLL_N 2)
  1191. set(CGEMM_UNROLL_M 2)
  1192. set(CGEMM_UNROLL_N 2)
  1193. set(ZGEMM_UNROLL_M 2)
  1194. set(ZGEMM_UNROLL_N 2)
  1195. set(SYMV_P 16)
  1196. elseif ("${TCORE}" STREQUAL "POWER6")
  1197. file(APPEND ${TARGET_CONF_TEMP}
  1198. "#define L1_DATA_SIZE 32768\n"
  1199. "#define L1_DATA_LINESIZE 128\n"
  1200. "#define L2_SIZE 524288\n"
  1201. "#define L2_LINESIZE 128 \n"
  1202. "#define DTB_DEFAULT_ENTRIES 128\n"
  1203. "#define DTB_SIZE 4096\n"
  1204. "#define L2_ASSOCIATIVE 8\n")
  1205. set(SGEMM_UNROLL_M 4)
  1206. set(SGEMM_UNROLL_N 4)
  1207. set(DGEMM_UNROLL_M 4)
  1208. set(DGEMM_UNROLL_N 4)
  1209. set(CGEMM_UNROLL_M 2)
  1210. set(CGEMM_UNROLL_N 4)
  1211. set(ZGEMM_UNROLL_M 2)
  1212. set(ZGEMM_UNROLL_N 4)
  1213. set(SYMV_P 8)
  1214. elseif ("${TCORE}" STREQUAL "POWER8")
  1215. file(APPEND ${TARGET_CONF_TEMP}
  1216. "#define L1_DATA_SIZE 32768\n"
  1217. "#define L1_DATA_LINESIZE 128\n"
  1218. "#define L2_SIZE 524288\n"
  1219. "#define L2_LINESIZE 128 \n"
  1220. "#define DTB_DEFAULT_ENTRIES 128\n"
  1221. "#define DTB_SIZE 4096\n"
  1222. "#define L2_ASSOCIATIVE 8\n")
  1223. set(SGEMM_UNROLL_M 16)
  1224. set(SGEMM_UNROLL_N 8)
  1225. set(DGEMM_UNROLL_M 16)
  1226. set(DGEMM_UNROLL_N 4)
  1227. set(CGEMM_UNROLL_M 8)
  1228. set(CGEMM_UNROLL_N 4)
  1229. set(ZGEMM_UNROLL_M 8)
  1230. set(ZGEMM_UNROLL_N 2)
  1231. set(SYMV_P 8)
  1232. elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10")
  1233. file(APPEND ${TARGET_CONF_TEMP}
  1234. "#define L1_DATA_SIZE 32768\n"
  1235. "#define L1_DATA_LINESIZE 128\n"
  1236. "#define L2_SIZE 524288\n"
  1237. "#define L2_LINESIZE 128 \n"
  1238. "#define DTB_DEFAULT_ENTRIES 128\n"
  1239. "#define DTB_SIZE 4096\n"
  1240. "#define L2_ASSOCIATIVE 8\n")
  1241. set(SGEMM_UNROLL_M 16)
  1242. set(SGEMM_UNROLL_N 8)
  1243. set(DGEMM_UNROLL_M 16)
  1244. set(DGEMM_UNROLL_N 4)
  1245. set(CGEMM_UNROLL_M 8)
  1246. set(CGEMM_UNROLL_N 4)
  1247. set(ZGEMM_UNROLL_M 8)
  1248. set(ZGEMM_UNROLL_N 2)
  1249. set(SYMV_P 8)
  1250. elseif ("${TCORE}" STREQUAL "GENERIC")
  1251. file(APPEND ${TARGET_CONF_TEMP}
  1252. "#define L1_DATA_SIZE 32768\n"
  1253. "#define L1_DATA_LINESIZE 128\n"
  1254. "#define L2_SIZE 524288\n"
  1255. "#define L2_LINESIZE 128 \n"
  1256. "#define DTB_DEFAULT_ENTRIES 128\n"
  1257. "#define DTB_SIZE 4096\n"
  1258. "#define L2_ASSOCIATIVE 8\n")
  1259. endif()
  1260. set(SBGEMM_UNROLL_M 8)
  1261. set(SBGEMM_UNROLL_N 4)
  1262. # Or should this actually be NUM_CORES?
  1263. if (${NUM_THREADS} GREATER 0)
  1264. file(APPEND ${TARGET_CONF_TEMP} "#define NUM_CORES\t${NUM_THREADS}\n")
  1265. endif()
  1266. # GetArch_2nd
  1267. foreach(float_char S;D;Q;C;Z;X)
  1268. if (NOT DEFINED ${float_char}GEMM_UNROLL_M)
  1269. set(${float_char}GEMM_UNROLL_M 2)
  1270. endif()
  1271. if (NOT DEFINED ${float_char}GEMM_UNROLL_N)
  1272. set(${float_char}GEMM_UNROLL_N 2)
  1273. endif()
  1274. endforeach()
  1275. file(APPEND ${TARGET_CONF_TEMP}
  1276. "#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n")
  1277. # Move to where gen_config_h would place it
  1278. file(MAKE_DIRECTORY ${TARGET_CONF_DIR})
  1279. file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}")
  1280. else(NOT CMAKE_CROSSCOMPILING)
  1281. # compile getarch
  1282. set(GETARCH_SRC
  1283. ${PROJECT_SOURCE_DIR}/getarch.c
  1284. ${CPUIDEMU}
  1285. )
  1286. if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
  1287. #Use generic for MSVC now
  1288. message("MSVC")
  1289. set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
  1290. else()
  1291. list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
  1292. if (DEFINED TARGET_CORE)
  1293. set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_${TARGET_CORE})
  1294. endif ()
  1295. endif ()
  1296. if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1297. # disable WindowsStore strict CRT checks
  1298. set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
  1299. endif ()
  1300. set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
  1301. set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
  1302. file(MAKE_DIRECTORY ${GETARCH_DIR})
  1303. configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY)
  1304. if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1305. if (CMAKE_ASM_COMPILER_ID STREQUAL "")
  1306. try_compile(GETARCH_RESULT ${GETARCH_DIR}
  1307. SOURCES ${GETARCH_SRC}
  1308. CMAKE_FLAGS "-DCMAKE_ASM_COMPILER=${CMAKE_C_COMPILER}"
  1309. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1310. OUTPUT_VARIABLE GETARCH_LOG
  1311. COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
  1312. )
  1313. else()
  1314. try_compile(GETARCH_RESULT ${GETARCH_DIR}
  1315. SOURCES ${GETARCH_SRC}
  1316. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1317. OUTPUT_VARIABLE GETARCH_LOG
  1318. COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
  1319. )
  1320. endif()
  1321. if (NOT ${GETARCH_RESULT})
  1322. MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
  1323. endif ()
  1324. endif ()
  1325. unset (HAVE_AVX2)
  1326. unset (HAVE_AVX)
  1327. unset (HAVE_FMA3)
  1328. unset (HAVE_MMX)
  1329. unset (HAVE_SSE)
  1330. unset (HAVE_SSE2)
  1331. unset (HAVE_SSE3)
  1332. unset (HAVE_SSSE3)
  1333. unset (HAVE_SSE4A)
  1334. unset (HAVE_SSE4_1)
  1335. unset (HAVE_SSE4_2)
  1336. unset (HAVE_NEON)
  1337. unset (HAVE_VFP)
  1338. unset (HAVE_VFPV3)
  1339. unset (HAVE_VFPV4)
  1340. message(STATUS "Running getarch")
  1341. # use the cmake binary w/ the -E param to run a shell command in a cross-platform way
  1342. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
  1343. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
  1344. message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
  1345. # append config data from getarch to the TARGET file and read in CMake vars
  1346. file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT})
  1347. ParseGetArchVars(${GETARCH_MAKE_OUT})
  1348. set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
  1349. set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
  1350. file(MAKE_DIRECTORY ${GETARCH2_DIR})
  1351. configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY)
  1352. if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
  1353. try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
  1354. SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
  1355. COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
  1356. OUTPUT_VARIABLE GETARCH2_LOG
  1357. COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
  1358. )
  1359. if (NOT ${GETARCH2_RESULT})
  1360. MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
  1361. endif ()
  1362. endif ()
  1363. # use the cmake binary w/ the -E param to run a shell command in a cross-platform way
  1364. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
  1365. execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
  1366. # append config data from getarch_2nd to the TARGET file and read in CMake vars
  1367. file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT})
  1368. configure_file(${TARGET_CONF_TEMP} ${TARGET_CONF_DIR}/${TARGET_CONF} COPYONLY)
  1369. ParseGetArchVars(${GETARCH2_MAKE_OUT})
  1370. endif()