You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 36 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef PARAM_H
  39. #define PARAM_H
  40. #ifdef OPTERON
  41. #define SNUMOPT 4
  42. #define DNUMOPT 2
  43. #define GEMM_DEFAULT_OFFSET_A 64
  44. #define GEMM_DEFAULT_OFFSET_B 256
  45. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  46. #define SGEMM_DEFAULT_UNROLL_N 4
  47. #define DGEMM_DEFAULT_UNROLL_N 4
  48. #define QGEMM_DEFAULT_UNROLL_N 2
  49. #define CGEMM_DEFAULT_UNROLL_N 2
  50. #define ZGEMM_DEFAULT_UNROLL_N 2
  51. #define XGEMM_DEFAULT_UNROLL_N 1
  52. #ifdef ARCH_X86
  53. #define SGEMM_DEFAULT_UNROLL_M 4
  54. #define DGEMM_DEFAULT_UNROLL_M 2
  55. #define QGEMM_DEFAULT_UNROLL_M 2
  56. #define CGEMM_DEFAULT_UNROLL_M 2
  57. #define ZGEMM_DEFAULT_UNROLL_M 1
  58. #define XGEMM_DEFAULT_UNROLL_M 1
  59. #else
  60. #define SGEMM_DEFAULT_UNROLL_M 8
  61. #define DGEMM_DEFAULT_UNROLL_M 4
  62. #define QGEMM_DEFAULT_UNROLL_M 2
  63. #define CGEMM_DEFAULT_UNROLL_M 4
  64. #define ZGEMM_DEFAULT_UNROLL_M 2
  65. #define XGEMM_DEFAULT_UNROLL_M 1
  66. #endif
  67. #define SGEMM_DEFAULT_P sgemm_p
  68. #define DGEMM_DEFAULT_P dgemm_p
  69. #define QGEMM_DEFAULT_P qgemm_p
  70. #define CGEMM_DEFAULT_P cgemm_p
  71. #define ZGEMM_DEFAULT_P zgemm_p
  72. #define XGEMM_DEFAULT_P xgemm_p
  73. #define SGEMM_DEFAULT_R sgemm_r
  74. #define DGEMM_DEFAULT_R dgemm_r
  75. #define QGEMM_DEFAULT_R qgemm_r
  76. #define CGEMM_DEFAULT_R cgemm_r
  77. #define ZGEMM_DEFAULT_R zgemm_r
  78. #define XGEMM_DEFAULT_R xgemm_r
  79. #ifdef ALLOC_HUGETLB
  80. #define SGEMM_DEFAULT_Q 248
  81. #define DGEMM_DEFAULT_Q 248
  82. #define QGEMM_DEFAULT_Q 248
  83. #define CGEMM_DEFAULT_Q 248
  84. #define ZGEMM_DEFAULT_Q 248
  85. #define XGEMM_DEFAULT_Q 248
  86. #else
  87. #define SGEMM_DEFAULT_Q 240
  88. #define DGEMM_DEFAULT_Q 240
  89. #define QGEMM_DEFAULT_Q 240
  90. #define CGEMM_DEFAULT_Q 240
  91. #define ZGEMM_DEFAULT_Q 240
  92. #define XGEMM_DEFAULT_Q 240
  93. #endif
  94. #define SYMV_P 16
  95. #define HAVE_EXCLUSIVE_CACHE
  96. #endif
  97. #if defined(BARCELONA) || defined(SHANGHAI)
  98. #define SNUMOPT 8
  99. #define DNUMOPT 4
  100. #define GEMM_DEFAULT_OFFSET_A 64
  101. #define GEMM_DEFAULT_OFFSET_B 832
  102. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  103. #define SGEMM_DEFAULT_UNROLL_N 4
  104. #define DGEMM_DEFAULT_UNROLL_N 4
  105. #define QGEMM_DEFAULT_UNROLL_N 2
  106. #define CGEMM_DEFAULT_UNROLL_N 2
  107. #define ZGEMM_DEFAULT_UNROLL_N 2
  108. #define XGEMM_DEFAULT_UNROLL_N 1
  109. #ifdef ARCH_X86
  110. #define SGEMM_DEFAULT_UNROLL_M 4
  111. #define DGEMM_DEFAULT_UNROLL_M 2
  112. #define QGEMM_DEFAULT_UNROLL_M 2
  113. #define CGEMM_DEFAULT_UNROLL_M 2
  114. #define ZGEMM_DEFAULT_UNROLL_M 1
  115. #define XGEMM_DEFAULT_UNROLL_M 1
  116. #else
  117. #define SGEMM_DEFAULT_UNROLL_M 8
  118. #define DGEMM_DEFAULT_UNROLL_M 4
  119. #define QGEMM_DEFAULT_UNROLL_M 2
  120. #define CGEMM_DEFAULT_UNROLL_M 4
  121. #define ZGEMM_DEFAULT_UNROLL_M 2
  122. #define XGEMM_DEFAULT_UNROLL_M 1
  123. #endif
  124. #if 0
  125. #define SGEMM_DEFAULT_P 496
  126. #define DGEMM_DEFAULT_P 248
  127. #define QGEMM_DEFAULT_P 124
  128. #define CGEMM_DEFAULT_P 248
  129. #define ZGEMM_DEFAULT_P 124
  130. #define XGEMM_DEFAULT_P 62
  131. #define SGEMM_DEFAULT_Q 248
  132. #define DGEMM_DEFAULT_Q 248
  133. #define QGEMM_DEFAULT_Q 248
  134. #define CGEMM_DEFAULT_Q 248
  135. #define ZGEMM_DEFAULT_Q 248
  136. #define XGEMM_DEFAULT_Q 248
  137. #else
  138. #define SGEMM_DEFAULT_P 448
  139. #define DGEMM_DEFAULT_P 224
  140. #define QGEMM_DEFAULT_P 112
  141. #define CGEMM_DEFAULT_P 224
  142. #define ZGEMM_DEFAULT_P 112
  143. #define XGEMM_DEFAULT_P 56
  144. #define SGEMM_DEFAULT_Q 224
  145. #define DGEMM_DEFAULT_Q 224
  146. #define QGEMM_DEFAULT_Q 224
  147. #define CGEMM_DEFAULT_Q 224
  148. #define ZGEMM_DEFAULT_Q 224
  149. #define XGEMM_DEFAULT_Q 224
  150. #endif
  151. #define SGEMM_DEFAULT_R sgemm_r
  152. #define QGEMM_DEFAULT_R qgemm_r
  153. #define DGEMM_DEFAULT_R dgemm_r
  154. #define CGEMM_DEFAULT_R cgemm_r
  155. #define ZGEMM_DEFAULT_R zgemm_r
  156. #define XGEMM_DEFAULT_R xgemm_r
  157. #define SYMV_P 16
  158. #define HAVE_EXCLUSIVE_CACHE
  159. #define GEMM_THREAD gemm_thread_mn
  160. #endif
  161. #ifdef ATHLON
  162. #define SNUMOPT 4
  163. #define DNUMOPT 2
  164. #define GEMM_DEFAULT_OFFSET_A 0
  165. #define GEMM_DEFAULT_OFFSET_B 384
  166. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  167. #define SGEMM_DEFAULT_UNROLL_N 4
  168. #define DGEMM_DEFAULT_UNROLL_N 4
  169. #define QGEMM_DEFAULT_UNROLL_N 2
  170. #define CGEMM_DEFAULT_UNROLL_N 2
  171. #define ZGEMM_DEFAULT_UNROLL_N 2
  172. #define XGEMM_DEFAULT_UNROLL_N 1
  173. #define SGEMM_DEFAULT_UNROLL_M 2
  174. #define DGEMM_DEFAULT_UNROLL_M 1
  175. #define QGEMM_DEFAULT_UNROLL_M 2
  176. #define CGEMM_DEFAULT_UNROLL_M 1
  177. #define ZGEMM_DEFAULT_UNROLL_M 1
  178. #define XGEMM_DEFAULT_UNROLL_M 1
  179. #define SGEMM_DEFAULT_R sgemm_r
  180. #define DGEMM_DEFAULT_R dgemm_r
  181. #define QGEMM_DEFAULT_R qgemm_r
  182. #define CGEMM_DEFAULT_R cgemm_r
  183. #define ZGEMM_DEFAULT_R zgemm_r
  184. #define XGEMM_DEFAULT_R xgemm_r
  185. #define SGEMM_DEFAULT_P 208
  186. #define DGEMM_DEFAULT_P 104
  187. #define QGEMM_DEFAULT_P 56
  188. #define CGEMM_DEFAULT_P 104
  189. #define ZGEMM_DEFAULT_P 56
  190. #define XGEMM_DEFAULT_P 28
  191. #define SGEMM_DEFAULT_Q 208
  192. #define DGEMM_DEFAULT_Q 208
  193. #define QGEMM_DEFAULT_Q 208
  194. #define CGEMM_DEFAULT_Q 208
  195. #define ZGEMM_DEFAULT_Q 208
  196. #define XGEMM_DEFAULT_Q 208
  197. #define SYMV_P 16
  198. #define HAVE_EXCLUSIVE_CACHE
  199. #endif
  200. #ifdef VIAC3
  201. #define SNUMOPT 2
  202. #define DNUMOPT 1
  203. #define GEMM_DEFAULT_OFFSET_A 0
  204. #define GEMM_DEFAULT_OFFSET_B 256
  205. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  206. #define SGEMM_DEFAULT_UNROLL_N 4
  207. #define DGEMM_DEFAULT_UNROLL_N 4
  208. #define QGEMM_DEFAULT_UNROLL_N 2
  209. #define CGEMM_DEFAULT_UNROLL_N 2
  210. #define ZGEMM_DEFAULT_UNROLL_N 2
  211. #define XGEMM_DEFAULT_UNROLL_N 1
  212. #define SGEMM_DEFAULT_UNROLL_M 2
  213. #define DGEMM_DEFAULT_UNROLL_M 1
  214. #define QGEMM_DEFAULT_UNROLL_M 2
  215. #define CGEMM_DEFAULT_UNROLL_M 1
  216. #define ZGEMM_DEFAULT_UNROLL_M 1
  217. #define XGEMM_DEFAULT_UNROLL_M 1
  218. #define SGEMM_DEFAULT_R sgemm_r
  219. #define DGEMM_DEFAULT_R dgemm_r
  220. #define QGEMM_DEFAULT_R qgemm_r
  221. #define CGEMM_DEFAULT_R cgemm_r
  222. #define ZGEMM_DEFAULT_R zgemm_r
  223. #define XGEMM_DEFAULT_R xgemm_r
  224. #define SGEMM_DEFAULT_P 128
  225. #define DGEMM_DEFAULT_P 128
  226. #define QGEMM_DEFAULT_P 128
  227. #define CGEMM_DEFAULT_P 128
  228. #define ZGEMM_DEFAULT_P 128
  229. #define XGEMM_DEFAULT_P 128
  230. #define SGEMM_DEFAULT_Q 512
  231. #define DGEMM_DEFAULT_Q 256
  232. #define QGEMM_DEFAULT_Q 256
  233. #define CGEMM_DEFAULT_Q 256
  234. #define ZGEMM_DEFAULT_Q 128
  235. #define XGEMM_DEFAULT_Q 128
  236. #define SYMV_P 16
  237. #endif
  238. #ifdef NANO
  239. #define SNUMOPT 4
  240. #define DNUMOPT 2
  241. #define GEMM_DEFAULT_OFFSET_A 64
  242. #define GEMM_DEFAULT_OFFSET_B 256
  243. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  244. #ifdef ARCH_X86
  245. #define SGEMM_DEFAULT_UNROLL_N 4
  246. #define DGEMM_DEFAULT_UNROLL_N 4
  247. #define QGEMM_DEFAULT_UNROLL_N 2
  248. #define CGEMM_DEFAULT_UNROLL_N 2
  249. #define ZGEMM_DEFAULT_UNROLL_N 2
  250. #define XGEMM_DEFAULT_UNROLL_N 1
  251. #define SGEMM_DEFAULT_UNROLL_M 4
  252. #define DGEMM_DEFAULT_UNROLL_M 2
  253. #define QGEMM_DEFAULT_UNROLL_M 2
  254. #define CGEMM_DEFAULT_UNROLL_M 2
  255. #define ZGEMM_DEFAULT_UNROLL_M 1
  256. #define XGEMM_DEFAULT_UNROLL_M 1
  257. #else
  258. #define SGEMM_DEFAULT_UNROLL_N 8
  259. #define DGEMM_DEFAULT_UNROLL_N 4
  260. #define QGEMM_DEFAULT_UNROLL_N 2
  261. #define CGEMM_DEFAULT_UNROLL_N 4
  262. #define ZGEMM_DEFAULT_UNROLL_N 2
  263. #define XGEMM_DEFAULT_UNROLL_N 1
  264. #define SGEMM_DEFAULT_UNROLL_M 4
  265. #define DGEMM_DEFAULT_UNROLL_M 4
  266. #define QGEMM_DEFAULT_UNROLL_M 2
  267. #define CGEMM_DEFAULT_UNROLL_M 2
  268. #define ZGEMM_DEFAULT_UNROLL_M 2
  269. #define XGEMM_DEFAULT_UNROLL_M 1
  270. #endif
  271. #define SGEMM_DEFAULT_P 288
  272. #define DGEMM_DEFAULT_P 288
  273. #define QGEMM_DEFAULT_P 288
  274. #define CGEMM_DEFAULT_P 288
  275. #define ZGEMM_DEFAULT_P 288
  276. #define XGEMM_DEFAULT_P 288
  277. #define SGEMM_DEFAULT_R sgemm_r
  278. #define DGEMM_DEFAULT_R dgemm_r
  279. #define QGEMM_DEFAULT_R qgemm_r
  280. #define CGEMM_DEFAULT_R cgemm_r
  281. #define ZGEMM_DEFAULT_R zgemm_r
  282. #define XGEMM_DEFAULT_R xgemm_r
  283. #define SGEMM_DEFAULT_Q 256
  284. #define DGEMM_DEFAULT_Q 128
  285. #define QGEMM_DEFAULT_Q 64
  286. #define CGEMM_DEFAULT_Q 128
  287. #define ZGEMM_DEFAULT_Q 64
  288. #define XGEMM_DEFAULT_Q 32
  289. #define SYMV_P 16
  290. #define HAVE_EXCLUSIVE_CACHE
  291. #endif
  292. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  293. #ifdef HAVE_SSE
  294. #define SNUMOPT 2
  295. #else
  296. #define SNUMOPT 1
  297. #endif
  298. #define DNUMOPT 1
  299. #define GEMM_DEFAULT_OFFSET_A 0
  300. #define GEMM_DEFAULT_OFFSET_B 0
  301. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  302. #ifdef HAVE_SSE
  303. #define SGEMM_DEFAULT_UNROLL_M 8
  304. #define CGEMM_DEFAULT_UNROLL_M 4
  305. #else
  306. #define SGEMM_DEFAULT_UNROLL_M 4
  307. #define CGEMM_DEFAULT_UNROLL_M 2
  308. #endif
  309. #define DGEMM_DEFAULT_UNROLL_M 2
  310. #define SGEMM_DEFAULT_UNROLL_N 2
  311. #define DGEMM_DEFAULT_UNROLL_N 2
  312. #define QGEMM_DEFAULT_UNROLL_M 2
  313. #define QGEMM_DEFAULT_UNROLL_N 2
  314. #define CGEMM_DEFAULT_UNROLL_N 1
  315. #define ZGEMM_DEFAULT_UNROLL_M 1
  316. #define ZGEMM_DEFAULT_UNROLL_N 1
  317. #define XGEMM_DEFAULT_UNROLL_M 1
  318. #define XGEMM_DEFAULT_UNROLL_N 1
  319. #define SGEMM_DEFAULT_P sgemm_p
  320. #define SGEMM_DEFAULT_Q 256
  321. #define SGEMM_DEFAULT_R sgemm_r
  322. #define DGEMM_DEFAULT_P dgemm_p
  323. #define DGEMM_DEFAULT_Q 256
  324. #define DGEMM_DEFAULT_R dgemm_r
  325. #define QGEMM_DEFAULT_P qgemm_p
  326. #define QGEMM_DEFAULT_Q 256
  327. #define QGEMM_DEFAULT_R qgemm_r
  328. #define CGEMM_DEFAULT_P cgemm_p
  329. #define CGEMM_DEFAULT_Q 256
  330. #define CGEMM_DEFAULT_R cgemm_r
  331. #define ZGEMM_DEFAULT_P zgemm_p
  332. #define ZGEMM_DEFAULT_Q 256
  333. #define ZGEMM_DEFAULT_R zgemm_r
  334. #define XGEMM_DEFAULT_P xgemm_p
  335. #define XGEMM_DEFAULT_Q 256
  336. #define XGEMM_DEFAULT_R xgemm_r
  337. #define SYMV_P 4
  338. #endif
  339. #ifdef PENTIUMM
  340. #define SNUMOPT 2
  341. #define DNUMOPT 1
  342. #define GEMM_DEFAULT_OFFSET_A 0
  343. #define GEMM_DEFAULT_OFFSET_B 0
  344. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  345. #ifdef CORE_YONAH
  346. #define SGEMM_DEFAULT_UNROLL_M 4
  347. #define SGEMM_DEFAULT_UNROLL_N 4
  348. #define DGEMM_DEFAULT_UNROLL_M 2
  349. #define DGEMM_DEFAULT_UNROLL_N 4
  350. #define QGEMM_DEFAULT_UNROLL_M 2
  351. #define QGEMM_DEFAULT_UNROLL_N 2
  352. #define CGEMM_DEFAULT_UNROLL_M 2
  353. #define CGEMM_DEFAULT_UNROLL_N 2
  354. #define ZGEMM_DEFAULT_UNROLL_M 1
  355. #define ZGEMM_DEFAULT_UNROLL_N 2
  356. #define XGEMM_DEFAULT_UNROLL_M 1
  357. #define XGEMM_DEFAULT_UNROLL_N 1
  358. #else
  359. #define SGEMM_DEFAULT_UNROLL_M 8
  360. #define SGEMM_DEFAULT_UNROLL_N 2
  361. #define DGEMM_DEFAULT_UNROLL_M 2
  362. #define DGEMM_DEFAULT_UNROLL_N 2
  363. #define QGEMM_DEFAULT_UNROLL_M 2
  364. #define QGEMM_DEFAULT_UNROLL_N 2
  365. #define CGEMM_DEFAULT_UNROLL_M 4
  366. #define CGEMM_DEFAULT_UNROLL_N 1
  367. #define ZGEMM_DEFAULT_UNROLL_M 1
  368. #define ZGEMM_DEFAULT_UNROLL_N 1
  369. #define XGEMM_DEFAULT_UNROLL_M 1
  370. #define XGEMM_DEFAULT_UNROLL_N 1
  371. #endif
  372. #define SGEMM_DEFAULT_P sgemm_p
  373. #define SGEMM_DEFAULT_Q 256
  374. #define SGEMM_DEFAULT_R sgemm_r
  375. #define DGEMM_DEFAULT_P dgemm_p
  376. #define DGEMM_DEFAULT_Q 256
  377. #define DGEMM_DEFAULT_R dgemm_r
  378. #define QGEMM_DEFAULT_P qgemm_p
  379. #define QGEMM_DEFAULT_Q 256
  380. #define QGEMM_DEFAULT_R qgemm_r
  381. #define CGEMM_DEFAULT_P cgemm_p
  382. #define CGEMM_DEFAULT_Q 256
  383. #define CGEMM_DEFAULT_R cgemm_r
  384. #define ZGEMM_DEFAULT_P zgemm_p
  385. #define ZGEMM_DEFAULT_Q 256
  386. #define ZGEMM_DEFAULT_R zgemm_r
  387. #define XGEMM_DEFAULT_P xgemm_p
  388. #define XGEMM_DEFAULT_Q 256
  389. #define XGEMM_DEFAULT_R xgemm_r
  390. #define SYMV_P 4
  391. #endif
  392. #ifdef CORE_NORTHWOOD
  393. #define SNUMOPT 4
  394. #define DNUMOPT 2
  395. #define GEMM_DEFAULT_OFFSET_A 0
  396. #define GEMM_DEFAULT_OFFSET_B 32
  397. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  398. #define SYMV_P 8
  399. #define SGEMM_DEFAULT_UNROLL_M 8
  400. #define DGEMM_DEFAULT_UNROLL_M 4
  401. #define QGEMM_DEFAULT_UNROLL_M 2
  402. #define CGEMM_DEFAULT_UNROLL_M 4
  403. #define ZGEMM_DEFAULT_UNROLL_M 2
  404. #define XGEMM_DEFAULT_UNROLL_M 1
  405. #define SGEMM_DEFAULT_UNROLL_N 2
  406. #define DGEMM_DEFAULT_UNROLL_N 2
  407. #define QGEMM_DEFAULT_UNROLL_N 2
  408. #define CGEMM_DEFAULT_UNROLL_N 1
  409. #define ZGEMM_DEFAULT_UNROLL_N 1
  410. #define XGEMM_DEFAULT_UNROLL_N 1
  411. #define SGEMM_DEFAULT_P sgemm_p
  412. #define SGEMM_DEFAULT_R sgemm_r
  413. #define DGEMM_DEFAULT_P dgemm_p
  414. #define DGEMM_DEFAULT_R dgemm_r
  415. #define QGEMM_DEFAULT_P qgemm_p
  416. #define QGEMM_DEFAULT_R qgemm_r
  417. #define CGEMM_DEFAULT_P cgemm_p
  418. #define CGEMM_DEFAULT_R cgemm_r
  419. #define ZGEMM_DEFAULT_P zgemm_p
  420. #define ZGEMM_DEFAULT_R zgemm_r
  421. #define XGEMM_DEFAULT_P xgemm_p
  422. #define XGEMM_DEFAULT_R xgemm_r
  423. #define SGEMM_DEFAULT_Q 128
  424. #define DGEMM_DEFAULT_Q 128
  425. #define QGEMM_DEFAULT_Q 128
  426. #define CGEMM_DEFAULT_Q 128
  427. #define ZGEMM_DEFAULT_Q 128
  428. #define XGEMM_DEFAULT_Q 128
  429. #endif
  430. #ifdef CORE_PRESCOTT
  431. #define SNUMOPT 4
  432. #define DNUMOPT 2
  433. #ifndef __64BIT__
  434. #define GEMM_DEFAULT_OFFSET_A 128
  435. #define GEMM_DEFAULT_OFFSET_B 192
  436. #else
  437. #define GEMM_DEFAULT_OFFSET_A 0
  438. #define GEMM_DEFAULT_OFFSET_B 256
  439. #endif
  440. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  441. #define SYMV_P 8
  442. #ifdef ARCH_X86
  443. #define SGEMM_DEFAULT_UNROLL_M 4
  444. #define DGEMM_DEFAULT_UNROLL_M 2
  445. #define QGEMM_DEFAULT_UNROLL_M 2
  446. #define CGEMM_DEFAULT_UNROLL_M 2
  447. #define ZGEMM_DEFAULT_UNROLL_M 1
  448. #define XGEMM_DEFAULT_UNROLL_M 1
  449. #else
  450. #define SGEMM_DEFAULT_UNROLL_M 8
  451. #define DGEMM_DEFAULT_UNROLL_M 4
  452. #define QGEMM_DEFAULT_UNROLL_M 2
  453. #define CGEMM_DEFAULT_UNROLL_M 4
  454. #define ZGEMM_DEFAULT_UNROLL_M 2
  455. #define XGEMM_DEFAULT_UNROLL_M 1
  456. #endif
  457. #define SGEMM_DEFAULT_UNROLL_N 4
  458. #define DGEMM_DEFAULT_UNROLL_N 4
  459. #define QGEMM_DEFAULT_UNROLL_N 2
  460. #define CGEMM_DEFAULT_UNROLL_N 2
  461. #define ZGEMM_DEFAULT_UNROLL_N 2
  462. #define XGEMM_DEFAULT_UNROLL_N 1
  463. #define SGEMM_DEFAULT_P sgemm_p
  464. #define SGEMM_DEFAULT_R sgemm_r
  465. #define DGEMM_DEFAULT_P dgemm_p
  466. #define DGEMM_DEFAULT_R dgemm_r
  467. #define QGEMM_DEFAULT_P qgemm_p
  468. #define QGEMM_DEFAULT_R qgemm_r
  469. #define CGEMM_DEFAULT_P cgemm_p
  470. #define CGEMM_DEFAULT_R cgemm_r
  471. #define ZGEMM_DEFAULT_P zgemm_p
  472. #define ZGEMM_DEFAULT_R zgemm_r
  473. #define XGEMM_DEFAULT_P xgemm_p
  474. #define XGEMM_DEFAULT_R xgemm_r
  475. #define SGEMM_DEFAULT_Q 128
  476. #define DGEMM_DEFAULT_Q 128
  477. #define QGEMM_DEFAULT_Q 128
  478. #define CGEMM_DEFAULT_Q 128
  479. #define ZGEMM_DEFAULT_Q 128
  480. #define XGEMM_DEFAULT_Q 128
  481. #endif
  482. #ifdef CORE2
  483. #define SNUMOPT 8
  484. #define DNUMOPT 4
  485. #define GEMM_DEFAULT_OFFSET_A 448
  486. #define GEMM_DEFAULT_OFFSET_B 128
  487. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  488. #define SYMV_P 8
  489. #define SWITCH_RATIO 4
  490. #ifdef ARCH_X86
  491. #define SGEMM_DEFAULT_UNROLL_M 8
  492. #define DGEMM_DEFAULT_UNROLL_M 4
  493. #define QGEMM_DEFAULT_UNROLL_M 2
  494. #define CGEMM_DEFAULT_UNROLL_M 4
  495. #define ZGEMM_DEFAULT_UNROLL_M 2
  496. #define XGEMM_DEFAULT_UNROLL_M 1
  497. #define SGEMM_DEFAULT_UNROLL_N 2
  498. #define DGEMM_DEFAULT_UNROLL_N 2
  499. #define QGEMM_DEFAULT_UNROLL_N 2
  500. #define CGEMM_DEFAULT_UNROLL_N 1
  501. #define ZGEMM_DEFAULT_UNROLL_N 1
  502. #define XGEMM_DEFAULT_UNROLL_N 1
  503. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  504. #else
  505. #define SGEMM_DEFAULT_UNROLL_M 8
  506. #define DGEMM_DEFAULT_UNROLL_M 4
  507. #define QGEMM_DEFAULT_UNROLL_M 2
  508. #define CGEMM_DEFAULT_UNROLL_M 4
  509. #define ZGEMM_DEFAULT_UNROLL_M 2
  510. #define XGEMM_DEFAULT_UNROLL_M 1
  511. #define SGEMM_DEFAULT_UNROLL_N 4
  512. #define DGEMM_DEFAULT_UNROLL_N 4
  513. #define QGEMM_DEFAULT_UNROLL_N 2
  514. #define CGEMM_DEFAULT_UNROLL_N 2
  515. #define ZGEMM_DEFAULT_UNROLL_N 2
  516. #define XGEMM_DEFAULT_UNROLL_N 1
  517. #endif
  518. #define SGEMM_DEFAULT_P sgemm_p
  519. #define SGEMM_DEFAULT_R sgemm_r
  520. #define DGEMM_DEFAULT_P dgemm_p
  521. #define DGEMM_DEFAULT_R dgemm_r
  522. #define QGEMM_DEFAULT_P qgemm_p
  523. #define QGEMM_DEFAULT_R qgemm_r
  524. #define CGEMM_DEFAULT_P cgemm_p
  525. #define CGEMM_DEFAULT_R cgemm_r
  526. #define ZGEMM_DEFAULT_P zgemm_p
  527. #define ZGEMM_DEFAULT_R zgemm_r
  528. #define XGEMM_DEFAULT_P xgemm_p
  529. #define XGEMM_DEFAULT_R xgemm_r
  530. #define SGEMM_DEFAULT_Q 256
  531. #define DGEMM_DEFAULT_Q 256
  532. #define QGEMM_DEFAULT_Q 256
  533. #define CGEMM_DEFAULT_Q 256
  534. #define ZGEMM_DEFAULT_Q 256
  535. #define XGEMM_DEFAULT_Q 256
  536. #endif
  537. #ifdef PENRYN
  538. #define SNUMOPT 8
  539. #define DNUMOPT 4
  540. #define GEMM_DEFAULT_OFFSET_A 128
  541. #define GEMM_DEFAULT_OFFSET_B 0
  542. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  543. #define SYMV_P 8
  544. #define SWITCH_RATIO 4
  545. #ifdef ARCH_X86
  546. #define SGEMM_DEFAULT_UNROLL_M 4
  547. #define DGEMM_DEFAULT_UNROLL_M 2
  548. #define QGEMM_DEFAULT_UNROLL_M 2
  549. #define CGEMM_DEFAULT_UNROLL_M 2
  550. #define ZGEMM_DEFAULT_UNROLL_M 1
  551. #define XGEMM_DEFAULT_UNROLL_M 1
  552. #define SGEMM_DEFAULT_UNROLL_N 4
  553. #define DGEMM_DEFAULT_UNROLL_N 4
  554. #define QGEMM_DEFAULT_UNROLL_N 2
  555. #define CGEMM_DEFAULT_UNROLL_N 2
  556. #define ZGEMM_DEFAULT_UNROLL_N 2
  557. #define XGEMM_DEFAULT_UNROLL_N 1
  558. #else
  559. #define SGEMM_DEFAULT_UNROLL_M 8
  560. #define DGEMM_DEFAULT_UNROLL_M 4
  561. #define QGEMM_DEFAULT_UNROLL_M 2
  562. #define CGEMM_DEFAULT_UNROLL_M 4
  563. #define ZGEMM_DEFAULT_UNROLL_M 2
  564. #define XGEMM_DEFAULT_UNROLL_M 1
  565. #define SGEMM_DEFAULT_UNROLL_N 4
  566. #define DGEMM_DEFAULT_UNROLL_N 4
  567. #define QGEMM_DEFAULT_UNROLL_N 2
  568. #define CGEMM_DEFAULT_UNROLL_N 2
  569. #define ZGEMM_DEFAULT_UNROLL_N 2
  570. #define XGEMM_DEFAULT_UNROLL_N 1
  571. #endif
  572. #define SGEMM_DEFAULT_P sgemm_p
  573. #define SGEMM_DEFAULT_R sgemm_r
  574. #define DGEMM_DEFAULT_P dgemm_p
  575. #define DGEMM_DEFAULT_R dgemm_r
  576. #define QGEMM_DEFAULT_P qgemm_p
  577. #define QGEMM_DEFAULT_R qgemm_r
  578. #define CGEMM_DEFAULT_P cgemm_p
  579. #define CGEMM_DEFAULT_R cgemm_r
  580. #define ZGEMM_DEFAULT_P zgemm_p
  581. #define ZGEMM_DEFAULT_R zgemm_r
  582. #define XGEMM_DEFAULT_P xgemm_p
  583. #define XGEMM_DEFAULT_R xgemm_r
  584. #define SGEMM_DEFAULT_Q 512
  585. #define DGEMM_DEFAULT_Q 256
  586. #define QGEMM_DEFAULT_Q 128
  587. #define CGEMM_DEFAULT_Q 512
  588. #define ZGEMM_DEFAULT_Q 256
  589. #define XGEMM_DEFAULT_Q 128
  590. #define GETRF_FACTOR 0.75
  591. #endif
  592. #ifdef DUNNINGTON
  593. #define SNUMOPT 8
  594. #define DNUMOPT 4
  595. #define GEMM_DEFAULT_OFFSET_A 128
  596. #define GEMM_DEFAULT_OFFSET_B 0
  597. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  598. #define SYMV_P 8
  599. #define SWITCH_RATIO 4
  600. #ifdef ARCH_X86
  601. #define SGEMM_DEFAULT_UNROLL_M 4
  602. #define DGEMM_DEFAULT_UNROLL_M 2
  603. #define QGEMM_DEFAULT_UNROLL_M 2
  604. #define CGEMM_DEFAULT_UNROLL_M 2
  605. #define ZGEMM_DEFAULT_UNROLL_M 1
  606. #define XGEMM_DEFAULT_UNROLL_M 1
  607. #define SGEMM_DEFAULT_UNROLL_N 4
  608. #define DGEMM_DEFAULT_UNROLL_N 4
  609. #define QGEMM_DEFAULT_UNROLL_N 2
  610. #define CGEMM_DEFAULT_UNROLL_N 2
  611. #define ZGEMM_DEFAULT_UNROLL_N 2
  612. #define XGEMM_DEFAULT_UNROLL_N 1
  613. #else
  614. #define SGEMM_DEFAULT_UNROLL_M 8
  615. #define DGEMM_DEFAULT_UNROLL_M 4
  616. #define QGEMM_DEFAULT_UNROLL_M 2
  617. #define CGEMM_DEFAULT_UNROLL_M 4
  618. #define ZGEMM_DEFAULT_UNROLL_M 2
  619. #define XGEMM_DEFAULT_UNROLL_M 1
  620. #define SGEMM_DEFAULT_UNROLL_N 4
  621. #define DGEMM_DEFAULT_UNROLL_N 4
  622. #define QGEMM_DEFAULT_UNROLL_N 2
  623. #define CGEMM_DEFAULT_UNROLL_N 2
  624. #define ZGEMM_DEFAULT_UNROLL_N 2
  625. #define XGEMM_DEFAULT_UNROLL_N 1
  626. #endif
  627. #define SGEMM_DEFAULT_P sgemm_p
  628. #define SGEMM_DEFAULT_R sgemm_r
  629. #define DGEMM_DEFAULT_P dgemm_p
  630. #define DGEMM_DEFAULT_R dgemm_r
  631. #define QGEMM_DEFAULT_P qgemm_p
  632. #define QGEMM_DEFAULT_R qgemm_r
  633. #define CGEMM_DEFAULT_P cgemm_p
  634. #define CGEMM_DEFAULT_R cgemm_r
  635. #define ZGEMM_DEFAULT_P zgemm_p
  636. #define ZGEMM_DEFAULT_R zgemm_r
  637. #define XGEMM_DEFAULT_P xgemm_p
  638. #define XGEMM_DEFAULT_R xgemm_r
  639. #define SGEMM_DEFAULT_Q 768
  640. #define DGEMM_DEFAULT_Q 384
  641. #define QGEMM_DEFAULT_Q 192
  642. #define CGEMM_DEFAULT_Q 768
  643. #define ZGEMM_DEFAULT_Q 384
  644. #define XGEMM_DEFAULT_Q 192
  645. #define GETRF_FACTOR 0.75
  646. #define GEMM_THREAD gemm_thread_mn
  647. #endif
  648. #ifdef NEHALEM
  649. #define SNUMOPT 8
  650. #define DNUMOPT 4
  651. #define GEMM_DEFAULT_OFFSET_A 32
  652. #define GEMM_DEFAULT_OFFSET_B 0
  653. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  654. #define SYMV_P 8
  655. #define SWITCH_RATIO 4
  656. #ifdef ARCH_X86
  657. #define SGEMM_DEFAULT_UNROLL_M 4
  658. #define DGEMM_DEFAULT_UNROLL_M 2
  659. #define QGEMM_DEFAULT_UNROLL_M 2
  660. #define CGEMM_DEFAULT_UNROLL_M 2
  661. #define ZGEMM_DEFAULT_UNROLL_M 1
  662. #define XGEMM_DEFAULT_UNROLL_M 1
  663. #define SGEMM_DEFAULT_UNROLL_N 4
  664. #define DGEMM_DEFAULT_UNROLL_N 4
  665. #define QGEMM_DEFAULT_UNROLL_N 2
  666. #define CGEMM_DEFAULT_UNROLL_N 2
  667. #define ZGEMM_DEFAULT_UNROLL_N 2
  668. #define XGEMM_DEFAULT_UNROLL_N 1
  669. #else
  670. #define SGEMM_DEFAULT_UNROLL_M 4
  671. #define DGEMM_DEFAULT_UNROLL_M 2
  672. #define QGEMM_DEFAULT_UNROLL_M 2
  673. #define CGEMM_DEFAULT_UNROLL_M 2
  674. #define ZGEMM_DEFAULT_UNROLL_M 1
  675. #define XGEMM_DEFAULT_UNROLL_M 1
  676. #define SGEMM_DEFAULT_UNROLL_N 8
  677. #define DGEMM_DEFAULT_UNROLL_N 8
  678. #define QGEMM_DEFAULT_UNROLL_N 2
  679. #define CGEMM_DEFAULT_UNROLL_N 4
  680. #define ZGEMM_DEFAULT_UNROLL_N 4
  681. #define XGEMM_DEFAULT_UNROLL_N 1
  682. #endif
  683. #define SGEMM_DEFAULT_P 504
  684. #define SGEMM_DEFAULT_R sgemm_r
  685. #define DGEMM_DEFAULT_P 504
  686. #define DGEMM_DEFAULT_R dgemm_r
  687. #define QGEMM_DEFAULT_P 504
  688. #define QGEMM_DEFAULT_R qgemm_r
  689. #define CGEMM_DEFAULT_P 252
  690. #define CGEMM_DEFAULT_R cgemm_r
  691. #define ZGEMM_DEFAULT_P 252
  692. #define ZGEMM_DEFAULT_R zgemm_r
  693. #define XGEMM_DEFAULT_P 252
  694. #define XGEMM_DEFAULT_R xgemm_r
  695. #define SGEMM_DEFAULT_Q 512
  696. #define DGEMM_DEFAULT_Q 256
  697. #define QGEMM_DEFAULT_Q 128
  698. #define CGEMM_DEFAULT_Q 512
  699. #define ZGEMM_DEFAULT_Q 256
  700. #define XGEMM_DEFAULT_Q 128
  701. #define GETRF_FACTOR 0.72
  702. #endif
  703. #ifdef ATOM
  704. #define SNUMOPT 2
  705. #define DNUMOPT 1
  706. #define GEMM_DEFAULT_OFFSET_A 64
  707. #define GEMM_DEFAULT_OFFSET_B 0
  708. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  709. #define SYMV_P 8
  710. #ifdef ARCH_X86
  711. #define SGEMM_DEFAULT_UNROLL_M 4
  712. #define DGEMM_DEFAULT_UNROLL_M 2
  713. #define QGEMM_DEFAULT_UNROLL_M 2
  714. #define CGEMM_DEFAULT_UNROLL_M 2
  715. #define ZGEMM_DEFAULT_UNROLL_M 1
  716. #define XGEMM_DEFAULT_UNROLL_M 1
  717. #else
  718. #define SGEMM_DEFAULT_UNROLL_M 8
  719. #define DGEMM_DEFAULT_UNROLL_M 4
  720. #define QGEMM_DEFAULT_UNROLL_M 2
  721. #define CGEMM_DEFAULT_UNROLL_M 4
  722. #define ZGEMM_DEFAULT_UNROLL_M 2
  723. #define XGEMM_DEFAULT_UNROLL_M 1
  724. #endif
  725. #define SGEMM_DEFAULT_UNROLL_N 4
  726. #define DGEMM_DEFAULT_UNROLL_N 2
  727. #define QGEMM_DEFAULT_UNROLL_N 2
  728. #define CGEMM_DEFAULT_UNROLL_N 2
  729. #define ZGEMM_DEFAULT_UNROLL_N 1
  730. #define XGEMM_DEFAULT_UNROLL_N 1
  731. #define SGEMM_DEFAULT_P sgemm_p
  732. #define SGEMM_DEFAULT_R sgemm_r
  733. #define DGEMM_DEFAULT_P dgemm_p
  734. #define DGEMM_DEFAULT_R dgemm_r
  735. #define QGEMM_DEFAULT_P qgemm_p
  736. #define QGEMM_DEFAULT_R qgemm_r
  737. #define CGEMM_DEFAULT_P cgemm_p
  738. #define CGEMM_DEFAULT_R cgemm_r
  739. #define ZGEMM_DEFAULT_P zgemm_p
  740. #define ZGEMM_DEFAULT_R zgemm_r
  741. #define XGEMM_DEFAULT_P xgemm_p
  742. #define XGEMM_DEFAULT_R xgemm_r
  743. #define SGEMM_DEFAULT_Q 256
  744. #define DGEMM_DEFAULT_Q 256
  745. #define QGEMM_DEFAULT_Q 256
  746. #define CGEMM_DEFAULT_Q 256
  747. #define ZGEMM_DEFAULT_Q 256
  748. #define XGEMM_DEFAULT_Q 256
  749. #endif
  750. #ifdef ITANIUM2
  751. #define SNUMOPT 4
  752. #define DNUMOPT 4
  753. #define GEMM_DEFAULT_OFFSET_A 0
  754. #define GEMM_DEFAULT_OFFSET_B 128
  755. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  756. #define SGEMM_DEFAULT_UNROLL_M 8
  757. #define SGEMM_DEFAULT_UNROLL_N 8
  758. #define DGEMM_DEFAULT_UNROLL_M 8
  759. #define DGEMM_DEFAULT_UNROLL_N 8
  760. #define QGEMM_DEFAULT_UNROLL_M 8
  761. #define QGEMM_DEFAULT_UNROLL_N 8
  762. #define CGEMM_DEFAULT_UNROLL_M 4
  763. #define CGEMM_DEFAULT_UNROLL_N 4
  764. #define ZGEMM_DEFAULT_UNROLL_M 4
  765. #define ZGEMM_DEFAULT_UNROLL_N 4
  766. #define XGEMM_DEFAULT_UNROLL_M 4
  767. #define XGEMM_DEFAULT_UNROLL_N 4
  768. #define SGEMM_DEFAULT_P sgemm_p
  769. #define DGEMM_DEFAULT_P dgemm_p
  770. #define QGEMM_DEFAULT_P qgemm_p
  771. #define CGEMM_DEFAULT_P cgemm_p
  772. #define ZGEMM_DEFAULT_P zgemm_p
  773. #define XGEMM_DEFAULT_P xgemm_p
  774. #define SGEMM_DEFAULT_Q 1024
  775. #define DGEMM_DEFAULT_Q 1024
  776. #define QGEMM_DEFAULT_Q 1024
  777. #define CGEMM_DEFAULT_Q 1024
  778. #define ZGEMM_DEFAULT_Q 1024
  779. #define XGEMM_DEFAULT_Q 1024
  780. #define SGEMM_DEFAULT_R sgemm_r
  781. #define DGEMM_DEFAULT_R dgemm_r
  782. #define QGEMM_DEFAULT_R qgemm_r
  783. #define CGEMM_DEFAULT_R cgemm_r
  784. #define ZGEMM_DEFAULT_R zgemm_r
  785. #define XGEMM_DEFAULT_R xgemm_r
  786. #define SYMV_P 16
  787. #define GETRF_FACTOR 0.65
  788. #endif
  789. #if defined(EV4) || defined(EV5) || defined(EV6)
  790. #ifdef EV4
  791. #define SNUMOPT 1
  792. #define DNUMOPT 1
  793. #else
  794. #define SNUMOPT 2
  795. #define DNUMOPT 2
  796. #endif
  797. #define GEMM_DEFAULT_OFFSET_A 512
  798. #define GEMM_DEFAULT_OFFSET_B 512
  799. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  800. #define SGEMM_DEFAULT_UNROLL_M 4
  801. #define SGEMM_DEFAULT_UNROLL_N 4
  802. #define DGEMM_DEFAULT_UNROLL_M 4
  803. #define DGEMM_DEFAULT_UNROLL_N 4
  804. #define CGEMM_DEFAULT_UNROLL_M 2
  805. #define CGEMM_DEFAULT_UNROLL_N 2
  806. #define ZGEMM_DEFAULT_UNROLL_M 2
  807. #define ZGEMM_DEFAULT_UNROLL_N 2
  808. #define SYMV_P 8
  809. #ifdef EV4
  810. #define SGEMM_DEFAULT_P 32
  811. #define SGEMM_DEFAULT_Q 112
  812. #define SGEMM_DEFAULT_R 256
  813. #define DGEMM_DEFAULT_P 32
  814. #define DGEMM_DEFAULT_Q 56
  815. #define DGEMM_DEFAULT_R 256
  816. #define CGEMM_DEFAULT_P 32
  817. #define CGEMM_DEFAULT_Q 64
  818. #define CGEMM_DEFAULT_R 240
  819. #define ZGEMM_DEFAULT_P 32
  820. #define ZGEMM_DEFAULT_Q 32
  821. #define ZGEMM_DEFAULT_R 240
  822. #endif
  823. #ifdef EV5
  824. #define SGEMM_DEFAULT_P 64
  825. #define SGEMM_DEFAULT_Q 256
  826. #define DGEMM_DEFAULT_P 64
  827. #define DGEMM_DEFAULT_Q 128
  828. #define CGEMM_DEFAULT_P 64
  829. #define CGEMM_DEFAULT_Q 128
  830. #define ZGEMM_DEFAULT_P 64
  831. #define ZGEMM_DEFAULT_Q 64
  832. #endif
  833. #ifdef EV6
  834. #define SGEMM_DEFAULT_P 256
  835. #define SGEMM_DEFAULT_Q 512
  836. #define DGEMM_DEFAULT_P 256
  837. #define DGEMM_DEFAULT_Q 256
  838. #define CGEMM_DEFAULT_P 256
  839. #define CGEMM_DEFAULT_Q 256
  840. #define ZGEMM_DEFAULT_P 128
  841. #define ZGEMM_DEFAULT_Q 256
  842. #endif
  843. #endif
  844. #ifdef CELL
  845. #define SNUMOPT 2
  846. #define DNUMOPT 2
  847. #define GEMM_DEFAULT_OFFSET_A 0
  848. #define GEMM_DEFAULT_OFFSET_B 8192
  849. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  850. #define SGEMM_DEFAULT_UNROLL_M 16
  851. #define SGEMM_DEFAULT_UNROLL_N 4
  852. #define DGEMM_DEFAULT_UNROLL_M 4
  853. #define DGEMM_DEFAULT_UNROLL_N 4
  854. #define CGEMM_DEFAULT_UNROLL_M 8
  855. #define CGEMM_DEFAULT_UNROLL_N 2
  856. #define ZGEMM_DEFAULT_UNROLL_M 2
  857. #define ZGEMM_DEFAULT_UNROLL_N 2
  858. #define SGEMM_DEFAULT_P 128
  859. #define DGEMM_DEFAULT_P 128
  860. #define CGEMM_DEFAULT_P 128
  861. #define ZGEMM_DEFAULT_P 128
  862. #define SGEMM_DEFAULT_Q 512
  863. #define DGEMM_DEFAULT_Q 256
  864. #define CGEMM_DEFAULT_Q 256
  865. #define ZGEMM_DEFAULT_Q 128
  866. #define SYMV_P 4
  867. #endif
  868. #ifdef PPCG4
  869. #define GEMM_DEFAULT_OFFSET_A 0
  870. #define GEMM_DEFAULT_OFFSET_B 1024
  871. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  872. #define SGEMM_DEFAULT_UNROLL_M 16
  873. #define SGEMM_DEFAULT_UNROLL_N 4
  874. #define DGEMM_DEFAULT_UNROLL_M 4
  875. #define DGEMM_DEFAULT_UNROLL_N 4
  876. #define CGEMM_DEFAULT_UNROLL_M 8
  877. #define CGEMM_DEFAULT_UNROLL_N 2
  878. #define ZGEMM_DEFAULT_UNROLL_M 2
  879. #define ZGEMM_DEFAULT_UNROLL_N 2
  880. #define SGEMM_DEFAULT_P 256
  881. #define DGEMM_DEFAULT_P 128
  882. #define CGEMM_DEFAULT_P 128
  883. #define ZGEMM_DEFAULT_P 64
  884. #define SGEMM_DEFAULT_Q 256
  885. #define DGEMM_DEFAULT_Q 256
  886. #define CGEMM_DEFAULT_Q 256
  887. #define ZGEMM_DEFAULT_Q 256
  888. #define SYMV_P 4
  889. #endif
  890. #ifdef PPC970
  891. #define SNUMOPT 4
  892. #define DNUMOPT 4
  893. #define GEMM_DEFAULT_OFFSET_A 2688
  894. #define GEMM_DEFAULT_OFFSET_B 3072
  895. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  896. #define SGEMM_DEFAULT_UNROLL_M 16
  897. #define SGEMM_DEFAULT_UNROLL_N 4
  898. #define DGEMM_DEFAULT_UNROLL_M 4
  899. #define DGEMM_DEFAULT_UNROLL_N 4
  900. #define CGEMM_DEFAULT_UNROLL_M 8
  901. #define CGEMM_DEFAULT_UNROLL_N 2
  902. #define ZGEMM_DEFAULT_UNROLL_M 2
  903. #define ZGEMM_DEFAULT_UNROLL_N 2
  904. #ifdef OS_LINUX
  905. #if L2_SIZE == 1024976
  906. #define SGEMM_DEFAULT_P 320
  907. #define DGEMM_DEFAULT_P 256
  908. #define CGEMM_DEFAULT_P 256
  909. #define ZGEMM_DEFAULT_P 256
  910. #else
  911. #define SGEMM_DEFAULT_P 176
  912. #define DGEMM_DEFAULT_P 176
  913. #define CGEMM_DEFAULT_P 176
  914. #define ZGEMM_DEFAULT_P 176
  915. #endif
  916. #endif
  917. #define SGEMM_DEFAULT_Q 512
  918. #define DGEMM_DEFAULT_Q 256
  919. #define CGEMM_DEFAULT_Q 256
  920. #define ZGEMM_DEFAULT_Q 128
  921. #define SYMV_P 4
  922. #endif
  923. #ifdef PPC440
  924. #define SNUMOPT 2
  925. #define DNUMOPT 2
  926. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  927. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  928. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  929. #define SGEMM_DEFAULT_UNROLL_M 4
  930. #define SGEMM_DEFAULT_UNROLL_N 4
  931. #define DGEMM_DEFAULT_UNROLL_M 4
  932. #define DGEMM_DEFAULT_UNROLL_N 4
  933. #define CGEMM_DEFAULT_UNROLL_M 2
  934. #define CGEMM_DEFAULT_UNROLL_N 2
  935. #define ZGEMM_DEFAULT_UNROLL_M 2
  936. #define ZGEMM_DEFAULT_UNROLL_N 2
  937. #define SGEMM_DEFAULT_P 512
  938. #define DGEMM_DEFAULT_P 512
  939. #define CGEMM_DEFAULT_P 512
  940. #define ZGEMM_DEFAULT_P 512
  941. #define SGEMM_DEFAULT_Q 1024
  942. #define DGEMM_DEFAULT_Q 512
  943. #define CGEMM_DEFAULT_Q 512
  944. #define ZGEMM_DEFAULT_Q 256
  945. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  946. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  947. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  948. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  949. #define SYMV_P 4
  950. #endif
  951. #ifdef PPC440FP2
  952. #define SNUMOPT 4
  953. #define DNUMOPT 4
  954. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  955. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  956. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  957. #define SGEMM_DEFAULT_UNROLL_M 8
  958. #define SGEMM_DEFAULT_UNROLL_N 4
  959. #define DGEMM_DEFAULT_UNROLL_M 8
  960. #define DGEMM_DEFAULT_UNROLL_N 4
  961. #define CGEMM_DEFAULT_UNROLL_M 4
  962. #define CGEMM_DEFAULT_UNROLL_N 2
  963. #define ZGEMM_DEFAULT_UNROLL_M 4
  964. #define ZGEMM_DEFAULT_UNROLL_N 2
  965. #define SGEMM_DEFAULT_P 128
  966. #define DGEMM_DEFAULT_P 128
  967. #define CGEMM_DEFAULT_P 128
  968. #define ZGEMM_DEFAULT_P 128
  969. #if 1
  970. #define SGEMM_DEFAULT_Q 4096
  971. #define DGEMM_DEFAULT_Q 3072
  972. #define CGEMM_DEFAULT_Q 2048
  973. #define ZGEMM_DEFAULT_Q 1024
  974. #else
  975. #define SGEMM_DEFAULT_Q 512
  976. #define DGEMM_DEFAULT_Q 256
  977. #define CGEMM_DEFAULT_Q 256
  978. #define ZGEMM_DEFAULT_Q 128
  979. #endif
  980. #define SYMV_P 4
  981. #endif
  982. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  983. #define GEMM_DEFAULT_OFFSET_A 0
  984. #define GEMM_DEFAULT_OFFSET_B 2048
  985. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  986. #define SGEMM_DEFAULT_UNROLL_M 4
  987. #define SGEMM_DEFAULT_UNROLL_N 4
  988. #define DGEMM_DEFAULT_UNROLL_M 4
  989. #define DGEMM_DEFAULT_UNROLL_N 4
  990. #define CGEMM_DEFAULT_UNROLL_M 2
  991. #define CGEMM_DEFAULT_UNROLL_N 2
  992. #define ZGEMM_DEFAULT_UNROLL_M 2
  993. #define ZGEMM_DEFAULT_UNROLL_N 2
  994. #ifdef POWER3
  995. #define SNUMOPT 4
  996. #define DNUMOPT 4
  997. #define SGEMM_DEFAULT_P 256
  998. #define SGEMM_DEFAULT_Q 432
  999. #define SGEMM_DEFAULT_R 1012
  1000. #define DGEMM_DEFAULT_P 256
  1001. #define DGEMM_DEFAULT_Q 216
  1002. #define DGEMM_DEFAULT_R 1012
  1003. #define ZGEMM_DEFAULT_P 256
  1004. #define ZGEMM_DEFAULT_Q 104
  1005. #define ZGEMM_DEFAULT_R 1012
  1006. #endif
  1007. #if defined(POWER4)
  1008. #ifdef ALLOC_HUGETLB
  1009. #define SGEMM_DEFAULT_P 184
  1010. #define DGEMM_DEFAULT_P 184
  1011. #define CGEMM_DEFAULT_P 184
  1012. #define ZGEMM_DEFAULT_P 184
  1013. #else
  1014. #define SGEMM_DEFAULT_P 144
  1015. #define DGEMM_DEFAULT_P 144
  1016. #define CGEMM_DEFAULT_P 144
  1017. #define ZGEMM_DEFAULT_P 144
  1018. #endif
  1019. #endif
  1020. #if defined(POWER5)
  1021. #ifdef ALLOC_HUGETLB
  1022. #define SGEMM_DEFAULT_P 512
  1023. #define DGEMM_DEFAULT_P 256
  1024. #define CGEMM_DEFAULT_P 256
  1025. #define ZGEMM_DEFAULT_P 128
  1026. #else
  1027. #define SGEMM_DEFAULT_P 320
  1028. #define DGEMM_DEFAULT_P 160
  1029. #define CGEMM_DEFAULT_P 160
  1030. #define ZGEMM_DEFAULT_P 80
  1031. #endif
  1032. #define SGEMM_DEFAULT_Q 256
  1033. #define CGEMM_DEFAULT_Q 256
  1034. #define DGEMM_DEFAULT_Q 256
  1035. #define ZGEMM_DEFAULT_Q 256
  1036. #endif
  1037. #define SYMV_P 8
  1038. #endif
  1039. #if defined(POWER6)
  1040. #define SNUMOPT 4
  1041. #define DNUMOPT 4
  1042. #define GEMM_DEFAULT_OFFSET_A 384
  1043. #define GEMM_DEFAULT_OFFSET_B 1024
  1044. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1045. #define SGEMM_DEFAULT_UNROLL_M 4
  1046. #define SGEMM_DEFAULT_UNROLL_N 4
  1047. #define DGEMM_DEFAULT_UNROLL_M 4
  1048. #define DGEMM_DEFAULT_UNROLL_N 4
  1049. #define CGEMM_DEFAULT_UNROLL_M 2
  1050. #define CGEMM_DEFAULT_UNROLL_N 4
  1051. #define ZGEMM_DEFAULT_UNROLL_M 2
  1052. #define ZGEMM_DEFAULT_UNROLL_N 4
  1053. #define SGEMM_DEFAULT_P 992
  1054. #define DGEMM_DEFAULT_P 480
  1055. #define CGEMM_DEFAULT_P 488
  1056. #define ZGEMM_DEFAULT_P 248
  1057. #define SGEMM_DEFAULT_Q 504
  1058. #define DGEMM_DEFAULT_Q 504
  1059. #define CGEMM_DEFAULT_Q 400
  1060. #define ZGEMM_DEFAULT_Q 400
  1061. #define SYMV_P 8
  1062. #endif
  1063. #if defined(SPARC) && defined(V7)
  1064. #define SNUMOPT 4
  1065. #define DNUMOPT 4
  1066. #define GEMM_DEFAULT_OFFSET_A 0
  1067. #define GEMM_DEFAULT_OFFSET_B 2048
  1068. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1069. #define SGEMM_DEFAULT_UNROLL_M 2
  1070. #define SGEMM_DEFAULT_UNROLL_N 8
  1071. #define DGEMM_DEFAULT_UNROLL_M 2
  1072. #define DGEMM_DEFAULT_UNROLL_N 8
  1073. #define CGEMM_DEFAULT_UNROLL_M 1
  1074. #define CGEMM_DEFAULT_UNROLL_N 4
  1075. #define ZGEMM_DEFAULT_UNROLL_M 1
  1076. #define ZGEMM_DEFAULT_UNROLL_N 4
  1077. #define SGEMM_DEFAULT_P 256
  1078. #define DGEMM_DEFAULT_P 256
  1079. #define CGEMM_DEFAULT_P 256
  1080. #define ZGEMM_DEFAULT_P 256
  1081. #define SGEMM_DEFAULT_Q 512
  1082. #define DGEMM_DEFAULT_Q 256
  1083. #define CGEMM_DEFAULT_Q 256
  1084. #define ZGEMM_DEFAULT_Q 128
  1085. #define SYMV_P 8
  1086. #define GEMM_THREAD gemm_thread_mn
  1087. #endif
  1088. #if defined(SPARC) && defined(V9)
  1089. #define SNUMOPT 2
  1090. #define DNUMOPT 2
  1091. #define GEMM_DEFAULT_OFFSET_A 0
  1092. #define GEMM_DEFAULT_OFFSET_B 2048
  1093. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1094. #define SGEMM_DEFAULT_UNROLL_M 4
  1095. #define SGEMM_DEFAULT_UNROLL_N 4
  1096. #define DGEMM_DEFAULT_UNROLL_M 4
  1097. #define DGEMM_DEFAULT_UNROLL_N 4
  1098. #define CGEMM_DEFAULT_UNROLL_M 2
  1099. #define CGEMM_DEFAULT_UNROLL_N 2
  1100. #define ZGEMM_DEFAULT_UNROLL_M 2
  1101. #define ZGEMM_DEFAULT_UNROLL_N 2
  1102. #define SGEMM_DEFAULT_P 512
  1103. #define DGEMM_DEFAULT_P 512
  1104. #define CGEMM_DEFAULT_P 512
  1105. #define ZGEMM_DEFAULT_P 512
  1106. #define SGEMM_DEFAULT_Q 1024
  1107. #define DGEMM_DEFAULT_Q 512
  1108. #define CGEMM_DEFAULT_Q 512
  1109. #define ZGEMM_DEFAULT_Q 256
  1110. #define SYMV_P 8
  1111. #endif
  1112. #ifdef SICORTEX
  1113. #define SNUMOPT 2
  1114. #define DNUMOPT 2
  1115. #define GEMM_DEFAULT_OFFSET_A 0
  1116. #define GEMM_DEFAULT_OFFSET_B 0
  1117. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1118. #define SGEMM_DEFAULT_UNROLL_M 2
  1119. #define SGEMM_DEFAULT_UNROLL_N 8
  1120. #define DGEMM_DEFAULT_UNROLL_M 2
  1121. #define DGEMM_DEFAULT_UNROLL_N 8
  1122. #define CGEMM_DEFAULT_UNROLL_M 1
  1123. #define CGEMM_DEFAULT_UNROLL_N 4
  1124. #define ZGEMM_DEFAULT_UNROLL_M 1
  1125. #define ZGEMM_DEFAULT_UNROLL_N 4
  1126. #define SGEMM_DEFAULT_P 108
  1127. #define DGEMM_DEFAULT_P 112
  1128. #define CGEMM_DEFAULT_P 108
  1129. #define ZGEMM_DEFAULT_P 112
  1130. #define SGEMM_DEFAULT_Q 288
  1131. #define DGEMM_DEFAULT_Q 144
  1132. #define CGEMM_DEFAULT_Q 144
  1133. #define ZGEMM_DEFAULT_Q 72
  1134. #define SGEMM_DEFAULT_R 2000
  1135. #define DGEMM_DEFAULT_R 2000
  1136. #define CGEMM_DEFAULT_R 2000
  1137. #define ZGEMM_DEFAULT_R 2000
  1138. #define SYMV_P 16
  1139. #endif
  1140. #ifdef GENERIC
  1141. #define SNUMOPT 2
  1142. #define DNUMOPT 2
  1143. #define GEMM_DEFAULT_OFFSET_A 0
  1144. #define GEMM_DEFAULT_OFFSET_B 0
  1145. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1146. #define SGEMM_DEFAULT_UNROLL_N 4
  1147. #define DGEMM_DEFAULT_UNROLL_N 4
  1148. #define QGEMM_DEFAULT_UNROLL_N 2
  1149. #define CGEMM_DEFAULT_UNROLL_N 2
  1150. #define ZGEMM_DEFAULT_UNROLL_N 2
  1151. #define XGEMM_DEFAULT_UNROLL_N 1
  1152. #ifdef ARCH_X86
  1153. #define SGEMM_DEFAULT_UNROLL_M 4
  1154. #define DGEMM_DEFAULT_UNROLL_M 2
  1155. #define QGEMM_DEFAULT_UNROLL_M 2
  1156. #define CGEMM_DEFAULT_UNROLL_M 2
  1157. #define ZGEMM_DEFAULT_UNROLL_M 1
  1158. #define XGEMM_DEFAULT_UNROLL_M 1
  1159. #else
  1160. #define SGEMM_DEFAULT_UNROLL_M 8
  1161. #define DGEMM_DEFAULT_UNROLL_M 4
  1162. #define QGEMM_DEFAULT_UNROLL_M 2
  1163. #define CGEMM_DEFAULT_UNROLL_M 4
  1164. #define ZGEMM_DEFAULT_UNROLL_M 2
  1165. #define XGEMM_DEFAULT_UNROLL_M 1
  1166. #endif
  1167. #define SGEMM_P sgemm_p
  1168. #define DGEMM_P dgemm_p
  1169. #define QGEMM_P qgemm_p
  1170. #define CGEMM_P cgemm_p
  1171. #define ZGEMM_P zgemm_p
  1172. #define XGEMM_P xgemm_p
  1173. #define SGEMM_R sgemm_r
  1174. #define DGEMM_R dgemm_r
  1175. #define QGEMM_R qgemm_r
  1176. #define CGEMM_R cgemm_r
  1177. #define ZGEMM_R zgemm_r
  1178. #define XGEMM_R xgemm_r
  1179. #define SGEMM_Q 128
  1180. #define DGEMM_Q 128
  1181. #define QGEMM_Q 128
  1182. #define CGEMM_Q 128
  1183. #define ZGEMM_Q 128
  1184. #define XGEMM_Q 128
  1185. #define SYMV_P 16
  1186. #endif
  1187. #ifndef QGEMM_DEFAULT_UNROLL_M
  1188. #define QGEMM_DEFAULT_UNROLL_M 2
  1189. #endif
  1190. #ifndef QGEMM_DEFAULT_UNROLL_N
  1191. #define QGEMM_DEFAULT_UNROLL_N 2
  1192. #endif
  1193. #ifndef XGEMM_DEFAULT_UNROLL_M
  1194. #define XGEMM_DEFAULT_UNROLL_M 2
  1195. #endif
  1196. #ifndef XGEMM_DEFAULT_UNROLL_N
  1197. #define XGEMM_DEFAULT_UNROLL_N 2
  1198. #endif
  1199. #ifndef HAVE_SSE2
  1200. #define SHUFPD_0 shufps $0x44,
  1201. #define SHUFPD_1 shufps $0x4e,
  1202. #define SHUFPD_2 shufps $0xe4,
  1203. #define SHUFPD_3 shufps $0xee,
  1204. #endif
  1205. #ifndef SHUFPD_0
  1206. #define SHUFPD_0 shufpd $0,
  1207. #endif
  1208. #ifndef SHUFPD_1
  1209. #define SHUFPD_1 shufpd $1,
  1210. #endif
  1211. #ifndef SHUFPD_2
  1212. #define SHUFPD_2 shufpd $2,
  1213. #endif
  1214. #ifndef SHUFPD_3
  1215. #define SHUFPD_3 shufpd $3,
  1216. #endif
  1217. #ifndef SHUFPS_39
  1218. #define SHUFPS_39 shufps $0x39,
  1219. #endif
  1220. #endif

OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.

Contributors (1)