You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 45 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897
  1. /*****************************************************************************
  2. Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the ISCAS nor the names of its contributors may
  14. be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************************/
  27. /*********************************************************************/
  28. /* Copyright 2009, 2010 The University of Texas at Austin. */
  29. /* All rights reserved. */
  30. /* */
  31. /* Redistribution and use in source and binary forms, with or */
  32. /* without modification, are permitted provided that the following */
  33. /* conditions are met: */
  34. /* */
  35. /* 1. Redistributions of source code must retain the above */
  36. /* copyright notice, this list of conditions and the following */
  37. /* disclaimer. */
  38. /* */
  39. /* 2. Redistributions in binary form must reproduce the above */
  40. /* copyright notice, this list of conditions and the following */
  41. /* disclaimer in the documentation and/or other materials */
  42. /* provided with the distribution. */
  43. /* */
  44. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  45. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  46. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  47. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  48. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  49. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  50. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  51. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  52. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  53. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  54. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  55. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  56. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  57. /* POSSIBILITY OF SUCH DAMAGE. */
  58. /* */
  59. /* The views and conclusions contained in the software and */
  60. /* documentation are those of the authors and should not be */
  61. /* interpreted as representing official policies, either expressed */
  62. /* or implied, of The University of Texas at Austin. */
  63. /*********************************************************************/
  64. #ifndef PARAM_H
  65. #define PARAM_H
  66. #ifdef OPTERON
  67. #define SNUMOPT 4
  68. #define DNUMOPT 2
  69. #define GEMM_DEFAULT_OFFSET_A 64
  70. #define GEMM_DEFAULT_OFFSET_B 256
  71. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  72. #define SGEMM_DEFAULT_UNROLL_N 4
  73. #define DGEMM_DEFAULT_UNROLL_N 4
  74. #define QGEMM_DEFAULT_UNROLL_N 2
  75. #define CGEMM_DEFAULT_UNROLL_N 2
  76. #define ZGEMM_DEFAULT_UNROLL_N 2
  77. #define XGEMM_DEFAULT_UNROLL_N 1
  78. #ifdef ARCH_X86
  79. #define SGEMM_DEFAULT_UNROLL_M 4
  80. #define DGEMM_DEFAULT_UNROLL_M 2
  81. #define QGEMM_DEFAULT_UNROLL_M 2
  82. #define CGEMM_DEFAULT_UNROLL_M 2
  83. #define ZGEMM_DEFAULT_UNROLL_M 1
  84. #define XGEMM_DEFAULT_UNROLL_M 1
  85. #else
  86. #define SGEMM_DEFAULT_UNROLL_M 8
  87. #define DGEMM_DEFAULT_UNROLL_M 4
  88. #define QGEMM_DEFAULT_UNROLL_M 2
  89. #define CGEMM_DEFAULT_UNROLL_M 4
  90. #define ZGEMM_DEFAULT_UNROLL_M 2
  91. #define XGEMM_DEFAULT_UNROLL_M 1
  92. #endif
  93. #define SGEMM_DEFAULT_P sgemm_p
  94. #define DGEMM_DEFAULT_P dgemm_p
  95. #define QGEMM_DEFAULT_P qgemm_p
  96. #define CGEMM_DEFAULT_P cgemm_p
  97. #define ZGEMM_DEFAULT_P zgemm_p
  98. #define XGEMM_DEFAULT_P xgemm_p
  99. #define SGEMM_DEFAULT_R sgemm_r
  100. #define DGEMM_DEFAULT_R dgemm_r
  101. #define QGEMM_DEFAULT_R qgemm_r
  102. #define CGEMM_DEFAULT_R cgemm_r
  103. #define ZGEMM_DEFAULT_R zgemm_r
  104. #define XGEMM_DEFAULT_R xgemm_r
  105. #ifdef ALLOC_HUGETLB
  106. #define SGEMM_DEFAULT_Q 248
  107. #define DGEMM_DEFAULT_Q 248
  108. #define QGEMM_DEFAULT_Q 248
  109. #define CGEMM_DEFAULT_Q 248
  110. #define ZGEMM_DEFAULT_Q 248
  111. #define XGEMM_DEFAULT_Q 248
  112. #else
  113. #define SGEMM_DEFAULT_Q 240
  114. #define DGEMM_DEFAULT_Q 240
  115. #define QGEMM_DEFAULT_Q 240
  116. #define CGEMM_DEFAULT_Q 240
  117. #define ZGEMM_DEFAULT_Q 240
  118. #define XGEMM_DEFAULT_Q 240
  119. #endif
  120. #define SYMV_P 16
  121. #define HAVE_EXCLUSIVE_CACHE
  122. #endif
  123. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  124. #define SNUMOPT 8
  125. #define DNUMOPT 4
  126. #define GEMM_DEFAULT_OFFSET_A 64
  127. #define GEMM_DEFAULT_OFFSET_B 832
  128. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  129. #define SGEMM_DEFAULT_UNROLL_N 4
  130. #define DGEMM_DEFAULT_UNROLL_N 4
  131. #define QGEMM_DEFAULT_UNROLL_N 2
  132. #define CGEMM_DEFAULT_UNROLL_N 2
  133. #define ZGEMM_DEFAULT_UNROLL_N 2
  134. #define XGEMM_DEFAULT_UNROLL_N 1
  135. #ifdef ARCH_X86
  136. #define SGEMM_DEFAULT_UNROLL_M 4
  137. #define DGEMM_DEFAULT_UNROLL_M 2
  138. #define QGEMM_DEFAULT_UNROLL_M 2
  139. #define CGEMM_DEFAULT_UNROLL_M 2
  140. #define ZGEMM_DEFAULT_UNROLL_M 1
  141. #define XGEMM_DEFAULT_UNROLL_M 1
  142. #else
  143. #define SGEMM_DEFAULT_UNROLL_M 8
  144. #define DGEMM_DEFAULT_UNROLL_M 4
  145. #define QGEMM_DEFAULT_UNROLL_M 2
  146. #define CGEMM_DEFAULT_UNROLL_M 4
  147. #define ZGEMM_DEFAULT_UNROLL_M 2
  148. #define XGEMM_DEFAULT_UNROLL_M 1
  149. #endif
  150. #if 0
  151. #define SGEMM_DEFAULT_P 496
  152. #define DGEMM_DEFAULT_P 248
  153. #define QGEMM_DEFAULT_P 124
  154. #define CGEMM_DEFAULT_P 248
  155. #define ZGEMM_DEFAULT_P 124
  156. #define XGEMM_DEFAULT_P 62
  157. #define SGEMM_DEFAULT_Q 248
  158. #define DGEMM_DEFAULT_Q 248
  159. #define QGEMM_DEFAULT_Q 248
  160. #define CGEMM_DEFAULT_Q 248
  161. #define ZGEMM_DEFAULT_Q 248
  162. #define XGEMM_DEFAULT_Q 248
  163. #else
  164. #define SGEMM_DEFAULT_P 448
  165. #define DGEMM_DEFAULT_P 224
  166. #define QGEMM_DEFAULT_P 112
  167. #define CGEMM_DEFAULT_P 224
  168. #define ZGEMM_DEFAULT_P 112
  169. #define XGEMM_DEFAULT_P 56
  170. #define SGEMM_DEFAULT_Q 224
  171. #define DGEMM_DEFAULT_Q 224
  172. #define QGEMM_DEFAULT_Q 224
  173. #define CGEMM_DEFAULT_Q 224
  174. #define ZGEMM_DEFAULT_Q 224
  175. #define XGEMM_DEFAULT_Q 224
  176. #endif
  177. #define SGEMM_DEFAULT_R sgemm_r
  178. #define QGEMM_DEFAULT_R qgemm_r
  179. #define DGEMM_DEFAULT_R dgemm_r
  180. #define CGEMM_DEFAULT_R cgemm_r
  181. #define ZGEMM_DEFAULT_R zgemm_r
  182. #define XGEMM_DEFAULT_R xgemm_r
  183. #define SYMV_P 16
  184. #define HAVE_EXCLUSIVE_CACHE
  185. #define GEMM_THREAD gemm_thread_mn
  186. #endif
  187. #ifdef BULLDOZER
  188. #define SNUMOPT 8
  189. #define DNUMOPT 4
  190. #define GEMM_DEFAULT_OFFSET_A 64
  191. #define GEMM_DEFAULT_OFFSET_B 832
  192. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  193. #define QGEMM_DEFAULT_UNROLL_N 2
  194. #define CGEMM_DEFAULT_UNROLL_N 2
  195. #define ZGEMM_DEFAULT_UNROLL_N 2
  196. #define XGEMM_DEFAULT_UNROLL_N 1
  197. #ifdef ARCH_X86
  198. #define SGEMM_DEFAULT_UNROLL_N 4
  199. #define DGEMM_DEFAULT_UNROLL_N 4
  200. #define SGEMM_DEFAULT_UNROLL_M 4
  201. #define DGEMM_DEFAULT_UNROLL_M 2
  202. #define QGEMM_DEFAULT_UNROLL_M 2
  203. #define CGEMM_DEFAULT_UNROLL_M 2
  204. #define ZGEMM_DEFAULT_UNROLL_M 1
  205. #define XGEMM_DEFAULT_UNROLL_M 1
  206. #else
  207. #define SGEMM_DEFAULT_UNROLL_N 2
  208. #define DGEMM_DEFAULT_UNROLL_N 2
  209. #define SGEMM_DEFAULT_UNROLL_M 16
  210. #define DGEMM_DEFAULT_UNROLL_M 8
  211. #define QGEMM_DEFAULT_UNROLL_M 2
  212. #define CGEMM_DEFAULT_UNROLL_M 4
  213. #define ZGEMM_DEFAULT_UNROLL_M 2
  214. #define XGEMM_DEFAULT_UNROLL_M 1
  215. #define CGEMM3M_DEFAULT_UNROLL_N 4
  216. #define CGEMM3M_DEFAULT_UNROLL_M 8
  217. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  218. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  219. #define GEMV_UNROLL 8
  220. #endif
  221. #if defined(ARCH_X86_64)
  222. #define SGEMM_DEFAULT_P 768
  223. #define DGEMM_DEFAULT_P 384
  224. #else
  225. #define SGEMM_DEFAULT_P 448
  226. #define DGEMM_DEFAULT_P 224
  227. #endif
  228. #define QGEMM_DEFAULT_P 112
  229. #define CGEMM_DEFAULT_P 224
  230. #define ZGEMM_DEFAULT_P 112
  231. #define XGEMM_DEFAULT_P 56
  232. #if defined(ARCH_X86_64)
  233. #define SGEMM_DEFAULT_Q 168
  234. #define DGEMM_DEFAULT_Q 168
  235. #else
  236. #define SGEMM_DEFAULT_Q 224
  237. #define DGEMM_DEFAULT_Q 224
  238. #endif
  239. #define QGEMM_DEFAULT_Q 224
  240. #define CGEMM_DEFAULT_Q 224
  241. #define ZGEMM_DEFAULT_Q 224
  242. #define XGEMM_DEFAULT_Q 224
  243. #define SGEMM_DEFAULT_R sgemm_r
  244. #define QGEMM_DEFAULT_R qgemm_r
  245. #define DGEMM_DEFAULT_R dgemm_r
  246. #define CGEMM_DEFAULT_R cgemm_r
  247. #define ZGEMM_DEFAULT_R zgemm_r
  248. #define XGEMM_DEFAULT_R xgemm_r
  249. #define SYMV_P 16
  250. #define HAVE_EXCLUSIVE_CACHE
  251. #define GEMM_THREAD gemm_thread_mn
  252. #endif
  253. #ifdef PILEDRIVER
  254. #define SNUMOPT 8
  255. #define DNUMOPT 4
  256. #define GEMM_DEFAULT_OFFSET_A 64
  257. #define GEMM_DEFAULT_OFFSET_B 832
  258. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  259. #define QGEMM_DEFAULT_UNROLL_N 2
  260. #define CGEMM_DEFAULT_UNROLL_N 2
  261. #define ZGEMM_DEFAULT_UNROLL_N 2
  262. #define XGEMM_DEFAULT_UNROLL_N 1
  263. #ifdef ARCH_X86
  264. #define SGEMM_DEFAULT_UNROLL_N 4
  265. #define DGEMM_DEFAULT_UNROLL_N 4
  266. #define SGEMM_DEFAULT_UNROLL_M 4
  267. #define DGEMM_DEFAULT_UNROLL_M 2
  268. #define QGEMM_DEFAULT_UNROLL_M 2
  269. #define CGEMM_DEFAULT_UNROLL_M 2
  270. #define ZGEMM_DEFAULT_UNROLL_M 1
  271. #define XGEMM_DEFAULT_UNROLL_M 1
  272. #else
  273. #define SGEMM_DEFAULT_UNROLL_N 2
  274. #define DGEMM_DEFAULT_UNROLL_N 2
  275. #define SGEMM_DEFAULT_UNROLL_M 16
  276. #define DGEMM_DEFAULT_UNROLL_M 8
  277. #define QGEMM_DEFAULT_UNROLL_M 2
  278. #define CGEMM_DEFAULT_UNROLL_M 4
  279. #define ZGEMM_DEFAULT_UNROLL_M 2
  280. #define XGEMM_DEFAULT_UNROLL_M 1
  281. #define CGEMM3M_DEFAULT_UNROLL_N 4
  282. #define CGEMM3M_DEFAULT_UNROLL_M 8
  283. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  284. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  285. #define GEMV_UNROLL 8
  286. #endif
  287. #if defined(ARCH_X86_64)
  288. #define SGEMM_DEFAULT_P 768
  289. #define DGEMM_DEFAULT_P 384
  290. #else
  291. #define SGEMM_DEFAULT_P 448
  292. #define DGEMM_DEFAULT_P 224
  293. #endif
  294. #define QGEMM_DEFAULT_P 112
  295. #define CGEMM_DEFAULT_P 224
  296. #define ZGEMM_DEFAULT_P 112
  297. #define XGEMM_DEFAULT_P 56
  298. #if defined(ARCH_X86_64)
  299. #define SGEMM_DEFAULT_Q 168
  300. #define DGEMM_DEFAULT_Q 168
  301. #else
  302. #define SGEMM_DEFAULT_Q 224
  303. #define DGEMM_DEFAULT_Q 224
  304. #endif
  305. #define QGEMM_DEFAULT_Q 224
  306. #define CGEMM_DEFAULT_Q 224
  307. #define ZGEMM_DEFAULT_Q 224
  308. #define XGEMM_DEFAULT_Q 224
  309. #define SGEMM_DEFAULT_R sgemm_r
  310. #define QGEMM_DEFAULT_R qgemm_r
  311. #define DGEMM_DEFAULT_R dgemm_r
  312. #define CGEMM_DEFAULT_R cgemm_r
  313. #define ZGEMM_DEFAULT_R zgemm_r
  314. #define XGEMM_DEFAULT_R xgemm_r
  315. #define SYMV_P 16
  316. #define HAVE_EXCLUSIVE_CACHE
  317. #define GEMM_THREAD gemm_thread_mn
  318. #endif
  319. #ifdef ATHLON
  320. #define SNUMOPT 4
  321. #define DNUMOPT 2
  322. #define GEMM_DEFAULT_OFFSET_A 0
  323. #define GEMM_DEFAULT_OFFSET_B 384
  324. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  325. #define SGEMM_DEFAULT_UNROLL_N 4
  326. #define DGEMM_DEFAULT_UNROLL_N 4
  327. #define QGEMM_DEFAULT_UNROLL_N 2
  328. #define CGEMM_DEFAULT_UNROLL_N 2
  329. #define ZGEMM_DEFAULT_UNROLL_N 2
  330. #define XGEMM_DEFAULT_UNROLL_N 1
  331. #define SGEMM_DEFAULT_UNROLL_M 2
  332. #define DGEMM_DEFAULT_UNROLL_M 1
  333. #define QGEMM_DEFAULT_UNROLL_M 2
  334. #define CGEMM_DEFAULT_UNROLL_M 1
  335. #define ZGEMM_DEFAULT_UNROLL_M 1
  336. #define XGEMM_DEFAULT_UNROLL_M 1
  337. #define SGEMM_DEFAULT_R sgemm_r
  338. #define DGEMM_DEFAULT_R dgemm_r
  339. #define QGEMM_DEFAULT_R qgemm_r
  340. #define CGEMM_DEFAULT_R cgemm_r
  341. #define ZGEMM_DEFAULT_R zgemm_r
  342. #define XGEMM_DEFAULT_R xgemm_r
  343. #define SGEMM_DEFAULT_P 208
  344. #define DGEMM_DEFAULT_P 104
  345. #define QGEMM_DEFAULT_P 56
  346. #define CGEMM_DEFAULT_P 104
  347. #define ZGEMM_DEFAULT_P 56
  348. #define XGEMM_DEFAULT_P 28
  349. #define SGEMM_DEFAULT_Q 208
  350. #define DGEMM_DEFAULT_Q 208
  351. #define QGEMM_DEFAULT_Q 208
  352. #define CGEMM_DEFAULT_Q 208
  353. #define ZGEMM_DEFAULT_Q 208
  354. #define XGEMM_DEFAULT_Q 208
  355. #define SYMV_P 16
  356. #define HAVE_EXCLUSIVE_CACHE
  357. #endif
  358. #ifdef VIAC3
  359. #define SNUMOPT 2
  360. #define DNUMOPT 1
  361. #define GEMM_DEFAULT_OFFSET_A 0
  362. #define GEMM_DEFAULT_OFFSET_B 256
  363. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  364. #define SGEMM_DEFAULT_UNROLL_N 4
  365. #define DGEMM_DEFAULT_UNROLL_N 4
  366. #define QGEMM_DEFAULT_UNROLL_N 2
  367. #define CGEMM_DEFAULT_UNROLL_N 2
  368. #define ZGEMM_DEFAULT_UNROLL_N 2
  369. #define XGEMM_DEFAULT_UNROLL_N 1
  370. #define SGEMM_DEFAULT_UNROLL_M 2
  371. #define DGEMM_DEFAULT_UNROLL_M 1
  372. #define QGEMM_DEFAULT_UNROLL_M 2
  373. #define CGEMM_DEFAULT_UNROLL_M 1
  374. #define ZGEMM_DEFAULT_UNROLL_M 1
  375. #define XGEMM_DEFAULT_UNROLL_M 1
  376. #define SGEMM_DEFAULT_R sgemm_r
  377. #define DGEMM_DEFAULT_R dgemm_r
  378. #define QGEMM_DEFAULT_R qgemm_r
  379. #define CGEMM_DEFAULT_R cgemm_r
  380. #define ZGEMM_DEFAULT_R zgemm_r
  381. #define XGEMM_DEFAULT_R xgemm_r
  382. #define SGEMM_DEFAULT_P 128
  383. #define DGEMM_DEFAULT_P 128
  384. #define QGEMM_DEFAULT_P 128
  385. #define CGEMM_DEFAULT_P 128
  386. #define ZGEMM_DEFAULT_P 128
  387. #define XGEMM_DEFAULT_P 128
  388. #define SGEMM_DEFAULT_Q 512
  389. #define DGEMM_DEFAULT_Q 256
  390. #define QGEMM_DEFAULT_Q 256
  391. #define CGEMM_DEFAULT_Q 256
  392. #define ZGEMM_DEFAULT_Q 128
  393. #define XGEMM_DEFAULT_Q 128
  394. #define SYMV_P 16
  395. #endif
  396. #ifdef NANO
  397. #define SNUMOPT 4
  398. #define DNUMOPT 2
  399. #define GEMM_DEFAULT_OFFSET_A 64
  400. #define GEMM_DEFAULT_OFFSET_B 256
  401. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  402. #ifdef ARCH_X86
  403. #define SGEMM_DEFAULT_UNROLL_N 4
  404. #define DGEMM_DEFAULT_UNROLL_N 4
  405. #define QGEMM_DEFAULT_UNROLL_N 2
  406. #define CGEMM_DEFAULT_UNROLL_N 2
  407. #define ZGEMM_DEFAULT_UNROLL_N 2
  408. #define XGEMM_DEFAULT_UNROLL_N 1
  409. #define SGEMM_DEFAULT_UNROLL_M 4
  410. #define DGEMM_DEFAULT_UNROLL_M 2
  411. #define QGEMM_DEFAULT_UNROLL_M 2
  412. #define CGEMM_DEFAULT_UNROLL_M 2
  413. #define ZGEMM_DEFAULT_UNROLL_M 1
  414. #define XGEMM_DEFAULT_UNROLL_M 1
  415. #else
  416. #define SGEMM_DEFAULT_UNROLL_N 8
  417. #define DGEMM_DEFAULT_UNROLL_N 4
  418. #define QGEMM_DEFAULT_UNROLL_N 2
  419. #define CGEMM_DEFAULT_UNROLL_N 4
  420. #define ZGEMM_DEFAULT_UNROLL_N 2
  421. #define XGEMM_DEFAULT_UNROLL_N 1
  422. #define SGEMM_DEFAULT_UNROLL_M 4
  423. #define DGEMM_DEFAULT_UNROLL_M 4
  424. #define QGEMM_DEFAULT_UNROLL_M 2
  425. #define CGEMM_DEFAULT_UNROLL_M 2
  426. #define ZGEMM_DEFAULT_UNROLL_M 2
  427. #define XGEMM_DEFAULT_UNROLL_M 1
  428. #endif
  429. #define SGEMM_DEFAULT_P 288
  430. #define DGEMM_DEFAULT_P 288
  431. #define QGEMM_DEFAULT_P 288
  432. #define CGEMM_DEFAULT_P 288
  433. #define ZGEMM_DEFAULT_P 288
  434. #define XGEMM_DEFAULT_P 288
  435. #define SGEMM_DEFAULT_R sgemm_r
  436. #define DGEMM_DEFAULT_R dgemm_r
  437. #define QGEMM_DEFAULT_R qgemm_r
  438. #define CGEMM_DEFAULT_R cgemm_r
  439. #define ZGEMM_DEFAULT_R zgemm_r
  440. #define XGEMM_DEFAULT_R xgemm_r
  441. #define SGEMM_DEFAULT_Q 256
  442. #define DGEMM_DEFAULT_Q 128
  443. #define QGEMM_DEFAULT_Q 64
  444. #define CGEMM_DEFAULT_Q 128
  445. #define ZGEMM_DEFAULT_Q 64
  446. #define XGEMM_DEFAULT_Q 32
  447. #define SYMV_P 16
  448. #define HAVE_EXCLUSIVE_CACHE
  449. #endif
  450. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  451. #ifdef HAVE_SSE
  452. #define SNUMOPT 2
  453. #else
  454. #define SNUMOPT 1
  455. #endif
  456. #define DNUMOPT 1
  457. #define GEMM_DEFAULT_OFFSET_A 0
  458. #define GEMM_DEFAULT_OFFSET_B 0
  459. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  460. #ifdef HAVE_SSE
  461. #define SGEMM_DEFAULT_UNROLL_M 8
  462. #define CGEMM_DEFAULT_UNROLL_M 4
  463. #else
  464. #define SGEMM_DEFAULT_UNROLL_M 4
  465. #define CGEMM_DEFAULT_UNROLL_M 2
  466. #endif
  467. #define DGEMM_DEFAULT_UNROLL_M 2
  468. #define SGEMM_DEFAULT_UNROLL_N 2
  469. #define DGEMM_DEFAULT_UNROLL_N 2
  470. #define QGEMM_DEFAULT_UNROLL_M 2
  471. #define QGEMM_DEFAULT_UNROLL_N 2
  472. #define CGEMM_DEFAULT_UNROLL_N 1
  473. #define ZGEMM_DEFAULT_UNROLL_M 1
  474. #define ZGEMM_DEFAULT_UNROLL_N 1
  475. #define XGEMM_DEFAULT_UNROLL_M 1
  476. #define XGEMM_DEFAULT_UNROLL_N 1
  477. #define SGEMM_DEFAULT_P sgemm_p
  478. #define SGEMM_DEFAULT_Q 256
  479. #define SGEMM_DEFAULT_R sgemm_r
  480. #define DGEMM_DEFAULT_P dgemm_p
  481. #define DGEMM_DEFAULT_Q 256
  482. #define DGEMM_DEFAULT_R dgemm_r
  483. #define QGEMM_DEFAULT_P qgemm_p
  484. #define QGEMM_DEFAULT_Q 256
  485. #define QGEMM_DEFAULT_R qgemm_r
  486. #define CGEMM_DEFAULT_P cgemm_p
  487. #define CGEMM_DEFAULT_Q 256
  488. #define CGEMM_DEFAULT_R cgemm_r
  489. #define ZGEMM_DEFAULT_P zgemm_p
  490. #define ZGEMM_DEFAULT_Q 256
  491. #define ZGEMM_DEFAULT_R zgemm_r
  492. #define XGEMM_DEFAULT_P xgemm_p
  493. #define XGEMM_DEFAULT_Q 256
  494. #define XGEMM_DEFAULT_R xgemm_r
  495. #define SYMV_P 4
  496. #endif
  497. #ifdef PENTIUMM
  498. #define SNUMOPT 2
  499. #define DNUMOPT 1
  500. #define GEMM_DEFAULT_OFFSET_A 0
  501. #define GEMM_DEFAULT_OFFSET_B 0
  502. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  503. #ifdef CORE_YONAH
  504. #define SGEMM_DEFAULT_UNROLL_M 4
  505. #define SGEMM_DEFAULT_UNROLL_N 4
  506. #define DGEMM_DEFAULT_UNROLL_M 2
  507. #define DGEMM_DEFAULT_UNROLL_N 4
  508. #define QGEMM_DEFAULT_UNROLL_M 2
  509. #define QGEMM_DEFAULT_UNROLL_N 2
  510. #define CGEMM_DEFAULT_UNROLL_M 2
  511. #define CGEMM_DEFAULT_UNROLL_N 2
  512. #define ZGEMM_DEFAULT_UNROLL_M 1
  513. #define ZGEMM_DEFAULT_UNROLL_N 2
  514. #define XGEMM_DEFAULT_UNROLL_M 1
  515. #define XGEMM_DEFAULT_UNROLL_N 1
  516. #else
  517. #define SGEMM_DEFAULT_UNROLL_M 8
  518. #define SGEMM_DEFAULT_UNROLL_N 2
  519. #define DGEMM_DEFAULT_UNROLL_M 2
  520. #define DGEMM_DEFAULT_UNROLL_N 2
  521. #define QGEMM_DEFAULT_UNROLL_M 2
  522. #define QGEMM_DEFAULT_UNROLL_N 2
  523. #define CGEMM_DEFAULT_UNROLL_M 4
  524. #define CGEMM_DEFAULT_UNROLL_N 1
  525. #define ZGEMM_DEFAULT_UNROLL_M 1
  526. #define ZGEMM_DEFAULT_UNROLL_N 1
  527. #define XGEMM_DEFAULT_UNROLL_M 1
  528. #define XGEMM_DEFAULT_UNROLL_N 1
  529. #endif
  530. #define SGEMM_DEFAULT_P sgemm_p
  531. #define SGEMM_DEFAULT_Q 256
  532. #define SGEMM_DEFAULT_R sgemm_r
  533. #define DGEMM_DEFAULT_P dgemm_p
  534. #define DGEMM_DEFAULT_Q 256
  535. #define DGEMM_DEFAULT_R dgemm_r
  536. #define QGEMM_DEFAULT_P qgemm_p
  537. #define QGEMM_DEFAULT_Q 256
  538. #define QGEMM_DEFAULT_R qgemm_r
  539. #define CGEMM_DEFAULT_P cgemm_p
  540. #define CGEMM_DEFAULT_Q 256
  541. #define CGEMM_DEFAULT_R cgemm_r
  542. #define ZGEMM_DEFAULT_P zgemm_p
  543. #define ZGEMM_DEFAULT_Q 256
  544. #define ZGEMM_DEFAULT_R zgemm_r
  545. #define XGEMM_DEFAULT_P xgemm_p
  546. #define XGEMM_DEFAULT_Q 256
  547. #define XGEMM_DEFAULT_R xgemm_r
  548. #define SYMV_P 4
  549. #endif
  550. #ifdef CORE_NORTHWOOD
  551. #define SNUMOPT 4
  552. #define DNUMOPT 2
  553. #define GEMM_DEFAULT_OFFSET_A 0
  554. #define GEMM_DEFAULT_OFFSET_B 32
  555. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  556. #define SYMV_P 8
  557. #define SGEMM_DEFAULT_UNROLL_M 8
  558. #define DGEMM_DEFAULT_UNROLL_M 4
  559. #define QGEMM_DEFAULT_UNROLL_M 2
  560. #define CGEMM_DEFAULT_UNROLL_M 4
  561. #define ZGEMM_DEFAULT_UNROLL_M 2
  562. #define XGEMM_DEFAULT_UNROLL_M 1
  563. #define SGEMM_DEFAULT_UNROLL_N 2
  564. #define DGEMM_DEFAULT_UNROLL_N 2
  565. #define QGEMM_DEFAULT_UNROLL_N 2
  566. #define CGEMM_DEFAULT_UNROLL_N 1
  567. #define ZGEMM_DEFAULT_UNROLL_N 1
  568. #define XGEMM_DEFAULT_UNROLL_N 1
  569. #define SGEMM_DEFAULT_P sgemm_p
  570. #define SGEMM_DEFAULT_R sgemm_r
  571. #define DGEMM_DEFAULT_P dgemm_p
  572. #define DGEMM_DEFAULT_R dgemm_r
  573. #define QGEMM_DEFAULT_P qgemm_p
  574. #define QGEMM_DEFAULT_R qgemm_r
  575. #define CGEMM_DEFAULT_P cgemm_p
  576. #define CGEMM_DEFAULT_R cgemm_r
  577. #define ZGEMM_DEFAULT_P zgemm_p
  578. #define ZGEMM_DEFAULT_R zgemm_r
  579. #define XGEMM_DEFAULT_P xgemm_p
  580. #define XGEMM_DEFAULT_R xgemm_r
  581. #define SGEMM_DEFAULT_Q 128
  582. #define DGEMM_DEFAULT_Q 128
  583. #define QGEMM_DEFAULT_Q 128
  584. #define CGEMM_DEFAULT_Q 128
  585. #define ZGEMM_DEFAULT_Q 128
  586. #define XGEMM_DEFAULT_Q 128
  587. #endif
  588. #ifdef CORE_PRESCOTT
  589. #define SNUMOPT 4
  590. #define DNUMOPT 2
  591. #ifndef __64BIT__
  592. #define GEMM_DEFAULT_OFFSET_A 128
  593. #define GEMM_DEFAULT_OFFSET_B 192
  594. #else
  595. #define GEMM_DEFAULT_OFFSET_A 0
  596. #define GEMM_DEFAULT_OFFSET_B 256
  597. #endif
  598. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  599. #define SYMV_P 8
  600. #ifdef ARCH_X86
  601. #define SGEMM_DEFAULT_UNROLL_M 4
  602. #define DGEMM_DEFAULT_UNROLL_M 2
  603. #define QGEMM_DEFAULT_UNROLL_M 2
  604. #define CGEMM_DEFAULT_UNROLL_M 2
  605. #define ZGEMM_DEFAULT_UNROLL_M 1
  606. #define XGEMM_DEFAULT_UNROLL_M 1
  607. #else
  608. #define SGEMM_DEFAULT_UNROLL_M 8
  609. #define DGEMM_DEFAULT_UNROLL_M 4
  610. #define QGEMM_DEFAULT_UNROLL_M 2
  611. #define CGEMM_DEFAULT_UNROLL_M 4
  612. #define ZGEMM_DEFAULT_UNROLL_M 2
  613. #define XGEMM_DEFAULT_UNROLL_M 1
  614. #endif
  615. #define SGEMM_DEFAULT_UNROLL_N 4
  616. #define DGEMM_DEFAULT_UNROLL_N 4
  617. #define QGEMM_DEFAULT_UNROLL_N 2
  618. #define CGEMM_DEFAULT_UNROLL_N 2
  619. #define ZGEMM_DEFAULT_UNROLL_N 2
  620. #define XGEMM_DEFAULT_UNROLL_N 1
  621. #define SGEMM_DEFAULT_P sgemm_p
  622. #define SGEMM_DEFAULT_R sgemm_r
  623. #define DGEMM_DEFAULT_P dgemm_p
  624. #define DGEMM_DEFAULT_R dgemm_r
  625. #define QGEMM_DEFAULT_P qgemm_p
  626. #define QGEMM_DEFAULT_R qgemm_r
  627. #define CGEMM_DEFAULT_P cgemm_p
  628. #define CGEMM_DEFAULT_R cgemm_r
  629. #define ZGEMM_DEFAULT_P zgemm_p
  630. #define ZGEMM_DEFAULT_R zgemm_r
  631. #define XGEMM_DEFAULT_P xgemm_p
  632. #define XGEMM_DEFAULT_R xgemm_r
  633. #define SGEMM_DEFAULT_Q 128
  634. #define DGEMM_DEFAULT_Q 128
  635. #define QGEMM_DEFAULT_Q 128
  636. #define CGEMM_DEFAULT_Q 128
  637. #define ZGEMM_DEFAULT_Q 128
  638. #define XGEMM_DEFAULT_Q 128
  639. #endif
  640. #ifdef CORE2
  641. #define SNUMOPT 8
  642. #define DNUMOPT 4
  643. #define GEMM_DEFAULT_OFFSET_A 448
  644. #define GEMM_DEFAULT_OFFSET_B 128
  645. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  646. #define SYMV_P 8
  647. #define SWITCH_RATIO 4
  648. #ifdef ARCH_X86
  649. #define SGEMM_DEFAULT_UNROLL_M 8
  650. #define DGEMM_DEFAULT_UNROLL_M 4
  651. #define QGEMM_DEFAULT_UNROLL_M 2
  652. #define CGEMM_DEFAULT_UNROLL_M 4
  653. #define ZGEMM_DEFAULT_UNROLL_M 2
  654. #define XGEMM_DEFAULT_UNROLL_M 1
  655. #define SGEMM_DEFAULT_UNROLL_N 2
  656. #define DGEMM_DEFAULT_UNROLL_N 2
  657. #define QGEMM_DEFAULT_UNROLL_N 2
  658. #define CGEMM_DEFAULT_UNROLL_N 1
  659. #define ZGEMM_DEFAULT_UNROLL_N 1
  660. #define XGEMM_DEFAULT_UNROLL_N 1
  661. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  662. #else
  663. #define SGEMM_DEFAULT_UNROLL_M 8
  664. #define DGEMM_DEFAULT_UNROLL_M 4
  665. #define QGEMM_DEFAULT_UNROLL_M 2
  666. #define CGEMM_DEFAULT_UNROLL_M 4
  667. #define ZGEMM_DEFAULT_UNROLL_M 2
  668. #define XGEMM_DEFAULT_UNROLL_M 1
  669. #define SGEMM_DEFAULT_UNROLL_N 4
  670. #define DGEMM_DEFAULT_UNROLL_N 4
  671. #define QGEMM_DEFAULT_UNROLL_N 2
  672. #define CGEMM_DEFAULT_UNROLL_N 2
  673. #define ZGEMM_DEFAULT_UNROLL_N 2
  674. #define XGEMM_DEFAULT_UNROLL_N 1
  675. #endif
  676. #define SGEMM_DEFAULT_P sgemm_p
  677. #define SGEMM_DEFAULT_R sgemm_r
  678. #define DGEMM_DEFAULT_P dgemm_p
  679. #define DGEMM_DEFAULT_R dgemm_r
  680. #define QGEMM_DEFAULT_P qgemm_p
  681. #define QGEMM_DEFAULT_R qgemm_r
  682. #define CGEMM_DEFAULT_P cgemm_p
  683. #define CGEMM_DEFAULT_R cgemm_r
  684. #define ZGEMM_DEFAULT_P zgemm_p
  685. #define ZGEMM_DEFAULT_R zgemm_r
  686. #define XGEMM_DEFAULT_P xgemm_p
  687. #define XGEMM_DEFAULT_R xgemm_r
  688. #define SGEMM_DEFAULT_Q 256
  689. #define DGEMM_DEFAULT_Q 256
  690. #define QGEMM_DEFAULT_Q 256
  691. #define CGEMM_DEFAULT_Q 256
  692. #define ZGEMM_DEFAULT_Q 256
  693. #define XGEMM_DEFAULT_Q 256
  694. #endif
  695. #ifdef PENRYN
  696. #define SNUMOPT 8
  697. #define DNUMOPT 4
  698. #define GEMM_DEFAULT_OFFSET_A 128
  699. #define GEMM_DEFAULT_OFFSET_B 0
  700. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  701. #define SYMV_P 8
  702. #define SWITCH_RATIO 4
  703. #ifdef ARCH_X86
  704. #define SGEMM_DEFAULT_UNROLL_M 4
  705. #define DGEMM_DEFAULT_UNROLL_M 2
  706. #define QGEMM_DEFAULT_UNROLL_M 2
  707. #define CGEMM_DEFAULT_UNROLL_M 2
  708. #define ZGEMM_DEFAULT_UNROLL_M 1
  709. #define XGEMM_DEFAULT_UNROLL_M 1
  710. #define SGEMM_DEFAULT_UNROLL_N 4
  711. #define DGEMM_DEFAULT_UNROLL_N 4
  712. #define QGEMM_DEFAULT_UNROLL_N 2
  713. #define CGEMM_DEFAULT_UNROLL_N 2
  714. #define ZGEMM_DEFAULT_UNROLL_N 2
  715. #define XGEMM_DEFAULT_UNROLL_N 1
  716. #else
  717. #define SGEMM_DEFAULT_UNROLL_M 8
  718. #define DGEMM_DEFAULT_UNROLL_M 4
  719. #define QGEMM_DEFAULT_UNROLL_M 2
  720. #define CGEMM_DEFAULT_UNROLL_M 4
  721. #define ZGEMM_DEFAULT_UNROLL_M 2
  722. #define XGEMM_DEFAULT_UNROLL_M 1
  723. #define SGEMM_DEFAULT_UNROLL_N 4
  724. #define DGEMM_DEFAULT_UNROLL_N 4
  725. #define QGEMM_DEFAULT_UNROLL_N 2
  726. #define CGEMM_DEFAULT_UNROLL_N 2
  727. #define ZGEMM_DEFAULT_UNROLL_N 2
  728. #define XGEMM_DEFAULT_UNROLL_N 1
  729. #endif
  730. #define SGEMM_DEFAULT_P sgemm_p
  731. #define SGEMM_DEFAULT_R sgemm_r
  732. #define DGEMM_DEFAULT_P dgemm_p
  733. #define DGEMM_DEFAULT_R dgemm_r
  734. #define QGEMM_DEFAULT_P qgemm_p
  735. #define QGEMM_DEFAULT_R qgemm_r
  736. #define CGEMM_DEFAULT_P cgemm_p
  737. #define CGEMM_DEFAULT_R cgemm_r
  738. #define ZGEMM_DEFAULT_P zgemm_p
  739. #define ZGEMM_DEFAULT_R zgemm_r
  740. #define XGEMM_DEFAULT_P xgemm_p
  741. #define XGEMM_DEFAULT_R xgemm_r
  742. #define SGEMM_DEFAULT_Q 512
  743. #define DGEMM_DEFAULT_Q 256
  744. #define QGEMM_DEFAULT_Q 128
  745. #define CGEMM_DEFAULT_Q 512
  746. #define ZGEMM_DEFAULT_Q 256
  747. #define XGEMM_DEFAULT_Q 128
  748. #define GETRF_FACTOR 0.75
  749. #endif
  750. #ifdef DUNNINGTON
  751. #define SNUMOPT 8
  752. #define DNUMOPT 4
  753. #define GEMM_DEFAULT_OFFSET_A 128
  754. #define GEMM_DEFAULT_OFFSET_B 0
  755. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  756. #define SYMV_P 8
  757. #define SWITCH_RATIO 4
  758. #ifdef ARCH_X86
  759. #define SGEMM_DEFAULT_UNROLL_M 4
  760. #define DGEMM_DEFAULT_UNROLL_M 2
  761. #define QGEMM_DEFAULT_UNROLL_M 2
  762. #define CGEMM_DEFAULT_UNROLL_M 2
  763. #define ZGEMM_DEFAULT_UNROLL_M 1
  764. #define XGEMM_DEFAULT_UNROLL_M 1
  765. #define SGEMM_DEFAULT_UNROLL_N 4
  766. #define DGEMM_DEFAULT_UNROLL_N 4
  767. #define QGEMM_DEFAULT_UNROLL_N 2
  768. #define CGEMM_DEFAULT_UNROLL_N 2
  769. #define ZGEMM_DEFAULT_UNROLL_N 2
  770. #define XGEMM_DEFAULT_UNROLL_N 1
  771. #else
  772. #define SGEMM_DEFAULT_UNROLL_M 8
  773. #define DGEMM_DEFAULT_UNROLL_M 4
  774. #define QGEMM_DEFAULT_UNROLL_M 2
  775. #define CGEMM_DEFAULT_UNROLL_M 4
  776. #define ZGEMM_DEFAULT_UNROLL_M 2
  777. #define XGEMM_DEFAULT_UNROLL_M 1
  778. #define SGEMM_DEFAULT_UNROLL_N 4
  779. #define DGEMM_DEFAULT_UNROLL_N 4
  780. #define QGEMM_DEFAULT_UNROLL_N 2
  781. #define CGEMM_DEFAULT_UNROLL_N 2
  782. #define ZGEMM_DEFAULT_UNROLL_N 2
  783. #define XGEMM_DEFAULT_UNROLL_N 1
  784. #endif
  785. #define SGEMM_DEFAULT_P sgemm_p
  786. #define SGEMM_DEFAULT_R sgemm_r
  787. #define DGEMM_DEFAULT_P dgemm_p
  788. #define DGEMM_DEFAULT_R dgemm_r
  789. #define QGEMM_DEFAULT_P qgemm_p
  790. #define QGEMM_DEFAULT_R qgemm_r
  791. #define CGEMM_DEFAULT_P cgemm_p
  792. #define CGEMM_DEFAULT_R cgemm_r
  793. #define ZGEMM_DEFAULT_P zgemm_p
  794. #define ZGEMM_DEFAULT_R zgemm_r
  795. #define XGEMM_DEFAULT_P xgemm_p
  796. #define XGEMM_DEFAULT_R xgemm_r
  797. #define SGEMM_DEFAULT_Q 768
  798. #define DGEMM_DEFAULT_Q 384
  799. #define QGEMM_DEFAULT_Q 192
  800. #define CGEMM_DEFAULT_Q 768
  801. #define ZGEMM_DEFAULT_Q 384
  802. #define XGEMM_DEFAULT_Q 192
  803. #define GETRF_FACTOR 0.75
  804. #define GEMM_THREAD gemm_thread_mn
  805. #endif
  806. #ifdef NEHALEM
  807. #define SNUMOPT 8
  808. #define DNUMOPT 4
  809. #define GEMM_DEFAULT_OFFSET_A 32
  810. #define GEMM_DEFAULT_OFFSET_B 0
  811. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  812. #define SYMV_P 8
  813. #define SWITCH_RATIO 4
  814. #ifdef ARCH_X86
  815. #define SGEMM_DEFAULT_UNROLL_M 4
  816. #define DGEMM_DEFAULT_UNROLL_M 2
  817. #define QGEMM_DEFAULT_UNROLL_M 2
  818. #define CGEMM_DEFAULT_UNROLL_M 2
  819. #define ZGEMM_DEFAULT_UNROLL_M 1
  820. #define XGEMM_DEFAULT_UNROLL_M 1
  821. #define SGEMM_DEFAULT_UNROLL_N 4
  822. #define DGEMM_DEFAULT_UNROLL_N 4
  823. #define QGEMM_DEFAULT_UNROLL_N 2
  824. #define CGEMM_DEFAULT_UNROLL_N 2
  825. #define ZGEMM_DEFAULT_UNROLL_N 2
  826. #define XGEMM_DEFAULT_UNROLL_N 1
  827. #else
  828. #define SGEMM_DEFAULT_UNROLL_M 4
  829. #define DGEMM_DEFAULT_UNROLL_M 2
  830. #define QGEMM_DEFAULT_UNROLL_M 2
  831. #define CGEMM_DEFAULT_UNROLL_M 2
  832. #define ZGEMM_DEFAULT_UNROLL_M 1
  833. #define XGEMM_DEFAULT_UNROLL_M 1
  834. #define SGEMM_DEFAULT_UNROLL_N 8
  835. #define DGEMM_DEFAULT_UNROLL_N 8
  836. #define QGEMM_DEFAULT_UNROLL_N 2
  837. #define CGEMM_DEFAULT_UNROLL_N 4
  838. #define ZGEMM_DEFAULT_UNROLL_N 4
  839. #define XGEMM_DEFAULT_UNROLL_N 1
  840. #endif
  841. #define SGEMM_DEFAULT_P 504
  842. #define SGEMM_DEFAULT_R sgemm_r
  843. #define DGEMM_DEFAULT_P 504
  844. #define DGEMM_DEFAULT_R dgemm_r
  845. #define QGEMM_DEFAULT_P 504
  846. #define QGEMM_DEFAULT_R qgemm_r
  847. #define CGEMM_DEFAULT_P 252
  848. #define CGEMM_DEFAULT_R cgemm_r
  849. #define ZGEMM_DEFAULT_P 252
  850. #define ZGEMM_DEFAULT_R zgemm_r
  851. #define XGEMM_DEFAULT_P 252
  852. #define XGEMM_DEFAULT_R xgemm_r
  853. #define SGEMM_DEFAULT_Q 512
  854. #define DGEMM_DEFAULT_Q 256
  855. #define QGEMM_DEFAULT_Q 128
  856. #define CGEMM_DEFAULT_Q 512
  857. #define ZGEMM_DEFAULT_Q 256
  858. #define XGEMM_DEFAULT_Q 128
  859. #define GETRF_FACTOR 0.72
  860. #endif
  861. #ifdef SANDYBRIDGE
  862. #define SNUMOPT 8
  863. #define DNUMOPT 4
  864. #define GEMM_DEFAULT_OFFSET_A 0
  865. #define GEMM_DEFAULT_OFFSET_B 0
  866. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  867. #define SYMV_P 8
  868. #define SWITCH_RATIO 4
  869. #ifdef ARCH_X86
  870. #define SGEMM_DEFAULT_UNROLL_M 4
  871. #define DGEMM_DEFAULT_UNROLL_M 2
  872. #define QGEMM_DEFAULT_UNROLL_M 2
  873. #define CGEMM_DEFAULT_UNROLL_M 2
  874. #define ZGEMM_DEFAULT_UNROLL_M 1
  875. #define XGEMM_DEFAULT_UNROLL_M 1
  876. #define SGEMM_DEFAULT_UNROLL_N 4
  877. #define DGEMM_DEFAULT_UNROLL_N 4
  878. #define QGEMM_DEFAULT_UNROLL_N 2
  879. #define CGEMM_DEFAULT_UNROLL_N 2
  880. #define ZGEMM_DEFAULT_UNROLL_N 2
  881. #define XGEMM_DEFAULT_UNROLL_N 1
  882. #else
  883. #define SGEMM_DEFAULT_UNROLL_M 8
  884. #define DGEMM_DEFAULT_UNROLL_M 8
  885. #define QGEMM_DEFAULT_UNROLL_M 2
  886. #define CGEMM_DEFAULT_UNROLL_M 8
  887. #define ZGEMM_DEFAULT_UNROLL_M 4
  888. #define XGEMM_DEFAULT_UNROLL_M 1
  889. #define SGEMM_DEFAULT_UNROLL_N 8
  890. #define DGEMM_DEFAULT_UNROLL_N 4
  891. #define QGEMM_DEFAULT_UNROLL_N 2
  892. #define CGEMM_DEFAULT_UNROLL_N 4
  893. #define ZGEMM_DEFAULT_UNROLL_N 4
  894. #define XGEMM_DEFAULT_UNROLL_N 1
  895. #endif
  896. #define SGEMM_DEFAULT_P 512
  897. #define SGEMM_DEFAULT_R sgemm_r
  898. //#define SGEMM_DEFAULT_R 1024
  899. #define DGEMM_DEFAULT_P 512
  900. #define DGEMM_DEFAULT_R dgemm_r
  901. //#define DGEMM_DEFAULT_R 1024
  902. #define QGEMM_DEFAULT_P 504
  903. #define QGEMM_DEFAULT_R qgemm_r
  904. #define CGEMM_DEFAULT_P 128
  905. //#define CGEMM_DEFAULT_R cgemm_r
  906. #define CGEMM_DEFAULT_R 1024
  907. #define ZGEMM_DEFAULT_P 512
  908. #define ZGEMM_DEFAULT_R zgemm_r
  909. //#define ZGEMM_DEFAULT_R 1024
  910. #define XGEMM_DEFAULT_P 252
  911. #define XGEMM_DEFAULT_R xgemm_r
  912. #define SGEMM_DEFAULT_Q 256
  913. #define DGEMM_DEFAULT_Q 256
  914. #define QGEMM_DEFAULT_Q 128
  915. #define CGEMM_DEFAULT_Q 256
  916. #define ZGEMM_DEFAULT_Q 192
  917. #define XGEMM_DEFAULT_Q 128
  918. #define GETRF_FACTOR 0.72
  919. #endif
  920. #ifdef ATOM
  921. #define SNUMOPT 2
  922. #define DNUMOPT 1
  923. #define GEMM_DEFAULT_OFFSET_A 64
  924. #define GEMM_DEFAULT_OFFSET_B 0
  925. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  926. #define SYMV_P 8
  927. #ifdef ARCH_X86
  928. #define SGEMM_DEFAULT_UNROLL_M 4
  929. #define DGEMM_DEFAULT_UNROLL_M 2
  930. #define QGEMM_DEFAULT_UNROLL_M 2
  931. #define CGEMM_DEFAULT_UNROLL_M 2
  932. #define ZGEMM_DEFAULT_UNROLL_M 1
  933. #define XGEMM_DEFAULT_UNROLL_M 1
  934. #else
  935. #define SGEMM_DEFAULT_UNROLL_M 8
  936. #define DGEMM_DEFAULT_UNROLL_M 4
  937. #define QGEMM_DEFAULT_UNROLL_M 2
  938. #define CGEMM_DEFAULT_UNROLL_M 4
  939. #define ZGEMM_DEFAULT_UNROLL_M 2
  940. #define XGEMM_DEFAULT_UNROLL_M 1
  941. #endif
  942. #define SGEMM_DEFAULT_UNROLL_N 4
  943. #define DGEMM_DEFAULT_UNROLL_N 2
  944. #define QGEMM_DEFAULT_UNROLL_N 2
  945. #define CGEMM_DEFAULT_UNROLL_N 2
  946. #define ZGEMM_DEFAULT_UNROLL_N 1
  947. #define XGEMM_DEFAULT_UNROLL_N 1
  948. #define SGEMM_DEFAULT_P sgemm_p
  949. #define SGEMM_DEFAULT_R sgemm_r
  950. #define DGEMM_DEFAULT_P dgemm_p
  951. #define DGEMM_DEFAULT_R dgemm_r
  952. #define QGEMM_DEFAULT_P qgemm_p
  953. #define QGEMM_DEFAULT_R qgemm_r
  954. #define CGEMM_DEFAULT_P cgemm_p
  955. #define CGEMM_DEFAULT_R cgemm_r
  956. #define ZGEMM_DEFAULT_P zgemm_p
  957. #define ZGEMM_DEFAULT_R zgemm_r
  958. #define XGEMM_DEFAULT_P xgemm_p
  959. #define XGEMM_DEFAULT_R xgemm_r
  960. #define SGEMM_DEFAULT_Q 256
  961. #define DGEMM_DEFAULT_Q 256
  962. #define QGEMM_DEFAULT_Q 256
  963. #define CGEMM_DEFAULT_Q 256
  964. #define ZGEMM_DEFAULT_Q 256
  965. #define XGEMM_DEFAULT_Q 256
  966. #endif
  967. #ifdef ITANIUM2
  968. #define SNUMOPT 4
  969. #define DNUMOPT 4
  970. #define GEMM_DEFAULT_OFFSET_A 0
  971. #define GEMM_DEFAULT_OFFSET_B 128
  972. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  973. #define SGEMM_DEFAULT_UNROLL_M 8
  974. #define SGEMM_DEFAULT_UNROLL_N 8
  975. #define DGEMM_DEFAULT_UNROLL_M 8
  976. #define DGEMM_DEFAULT_UNROLL_N 8
  977. #define QGEMM_DEFAULT_UNROLL_M 8
  978. #define QGEMM_DEFAULT_UNROLL_N 8
  979. #define CGEMM_DEFAULT_UNROLL_M 4
  980. #define CGEMM_DEFAULT_UNROLL_N 4
  981. #define ZGEMM_DEFAULT_UNROLL_M 4
  982. #define ZGEMM_DEFAULT_UNROLL_N 4
  983. #define XGEMM_DEFAULT_UNROLL_M 4
  984. #define XGEMM_DEFAULT_UNROLL_N 4
  985. #define SGEMM_DEFAULT_P sgemm_p
  986. #define DGEMM_DEFAULT_P dgemm_p
  987. #define QGEMM_DEFAULT_P qgemm_p
  988. #define CGEMM_DEFAULT_P cgemm_p
  989. #define ZGEMM_DEFAULT_P zgemm_p
  990. #define XGEMM_DEFAULT_P xgemm_p
  991. #define SGEMM_DEFAULT_Q 1024
  992. #define DGEMM_DEFAULT_Q 1024
  993. #define QGEMM_DEFAULT_Q 1024
  994. #define CGEMM_DEFAULT_Q 1024
  995. #define ZGEMM_DEFAULT_Q 1024
  996. #define XGEMM_DEFAULT_Q 1024
  997. #define SGEMM_DEFAULT_R sgemm_r
  998. #define DGEMM_DEFAULT_R dgemm_r
  999. #define QGEMM_DEFAULT_R qgemm_r
  1000. #define CGEMM_DEFAULT_R cgemm_r
  1001. #define ZGEMM_DEFAULT_R zgemm_r
  1002. #define XGEMM_DEFAULT_R xgemm_r
  1003. #define SYMV_P 16
  1004. #define GETRF_FACTOR 0.65
  1005. #endif
  1006. #if defined(EV4) || defined(EV5) || defined(EV6)
  1007. #ifdef EV4
  1008. #define SNUMOPT 1
  1009. #define DNUMOPT 1
  1010. #else
  1011. #define SNUMOPT 2
  1012. #define DNUMOPT 2
  1013. #endif
  1014. #define GEMM_DEFAULT_OFFSET_A 512
  1015. #define GEMM_DEFAULT_OFFSET_B 512
  1016. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1017. #define SGEMM_DEFAULT_UNROLL_M 4
  1018. #define SGEMM_DEFAULT_UNROLL_N 4
  1019. #define DGEMM_DEFAULT_UNROLL_M 4
  1020. #define DGEMM_DEFAULT_UNROLL_N 4
  1021. #define CGEMM_DEFAULT_UNROLL_M 2
  1022. #define CGEMM_DEFAULT_UNROLL_N 2
  1023. #define ZGEMM_DEFAULT_UNROLL_M 2
  1024. #define ZGEMM_DEFAULT_UNROLL_N 2
  1025. #define SYMV_P 8
  1026. #ifdef EV4
  1027. #define SGEMM_DEFAULT_P 32
  1028. #define SGEMM_DEFAULT_Q 112
  1029. #define SGEMM_DEFAULT_R 256
  1030. #define DGEMM_DEFAULT_P 32
  1031. #define DGEMM_DEFAULT_Q 56
  1032. #define DGEMM_DEFAULT_R 256
  1033. #define CGEMM_DEFAULT_P 32
  1034. #define CGEMM_DEFAULT_Q 64
  1035. #define CGEMM_DEFAULT_R 240
  1036. #define ZGEMM_DEFAULT_P 32
  1037. #define ZGEMM_DEFAULT_Q 32
  1038. #define ZGEMM_DEFAULT_R 240
  1039. #endif
  1040. #ifdef EV5
  1041. #define SGEMM_DEFAULT_P 64
  1042. #define SGEMM_DEFAULT_Q 256
  1043. #define DGEMM_DEFAULT_P 64
  1044. #define DGEMM_DEFAULT_Q 128
  1045. #define CGEMM_DEFAULT_P 64
  1046. #define CGEMM_DEFAULT_Q 128
  1047. #define ZGEMM_DEFAULT_P 64
  1048. #define ZGEMM_DEFAULT_Q 64
  1049. #endif
  1050. #ifdef EV6
  1051. #define SGEMM_DEFAULT_P 256
  1052. #define SGEMM_DEFAULT_Q 512
  1053. #define DGEMM_DEFAULT_P 256
  1054. #define DGEMM_DEFAULT_Q 256
  1055. #define CGEMM_DEFAULT_P 256
  1056. #define CGEMM_DEFAULT_Q 256
  1057. #define ZGEMM_DEFAULT_P 128
  1058. #define ZGEMM_DEFAULT_Q 256
  1059. #endif
  1060. #endif
  1061. #ifdef CELL
  1062. #define SNUMOPT 2
  1063. #define DNUMOPT 2
  1064. #define GEMM_DEFAULT_OFFSET_A 0
  1065. #define GEMM_DEFAULT_OFFSET_B 8192
  1066. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1067. #define SGEMM_DEFAULT_UNROLL_M 16
  1068. #define SGEMM_DEFAULT_UNROLL_N 4
  1069. #define DGEMM_DEFAULT_UNROLL_M 4
  1070. #define DGEMM_DEFAULT_UNROLL_N 4
  1071. #define CGEMM_DEFAULT_UNROLL_M 8
  1072. #define CGEMM_DEFAULT_UNROLL_N 2
  1073. #define ZGEMM_DEFAULT_UNROLL_M 2
  1074. #define ZGEMM_DEFAULT_UNROLL_N 2
  1075. #define SGEMM_DEFAULT_P 128
  1076. #define DGEMM_DEFAULT_P 128
  1077. #define CGEMM_DEFAULT_P 128
  1078. #define ZGEMM_DEFAULT_P 128
  1079. #define SGEMM_DEFAULT_Q 512
  1080. #define DGEMM_DEFAULT_Q 256
  1081. #define CGEMM_DEFAULT_Q 256
  1082. #define ZGEMM_DEFAULT_Q 128
  1083. #define SYMV_P 4
  1084. #endif
  1085. #ifdef PPCG4
  1086. #define GEMM_DEFAULT_OFFSET_A 0
  1087. #define GEMM_DEFAULT_OFFSET_B 1024
  1088. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1089. #define SGEMM_DEFAULT_UNROLL_M 16
  1090. #define SGEMM_DEFAULT_UNROLL_N 4
  1091. #define DGEMM_DEFAULT_UNROLL_M 4
  1092. #define DGEMM_DEFAULT_UNROLL_N 4
  1093. #define CGEMM_DEFAULT_UNROLL_M 8
  1094. #define CGEMM_DEFAULT_UNROLL_N 2
  1095. #define ZGEMM_DEFAULT_UNROLL_M 2
  1096. #define ZGEMM_DEFAULT_UNROLL_N 2
  1097. #define SGEMM_DEFAULT_P 256
  1098. #define DGEMM_DEFAULT_P 128
  1099. #define CGEMM_DEFAULT_P 128
  1100. #define ZGEMM_DEFAULT_P 64
  1101. #define SGEMM_DEFAULT_Q 256
  1102. #define DGEMM_DEFAULT_Q 256
  1103. #define CGEMM_DEFAULT_Q 256
  1104. #define ZGEMM_DEFAULT_Q 256
  1105. #define SYMV_P 4
  1106. #endif
  1107. #ifdef PPC970
  1108. #define SNUMOPT 4
  1109. #define DNUMOPT 4
  1110. #define GEMM_DEFAULT_OFFSET_A 2688
  1111. #define GEMM_DEFAULT_OFFSET_B 3072
  1112. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1113. #define SGEMM_DEFAULT_UNROLL_M 16
  1114. #define SGEMM_DEFAULT_UNROLL_N 4
  1115. #define DGEMM_DEFAULT_UNROLL_M 4
  1116. #define DGEMM_DEFAULT_UNROLL_N 4
  1117. #define CGEMM_DEFAULT_UNROLL_M 8
  1118. #define CGEMM_DEFAULT_UNROLL_N 2
  1119. #define ZGEMM_DEFAULT_UNROLL_M 2
  1120. #define ZGEMM_DEFAULT_UNROLL_N 2
  1121. #ifdef OS_LINUX
  1122. #if L2_SIZE == 1024976
  1123. #define SGEMM_DEFAULT_P 320
  1124. #define DGEMM_DEFAULT_P 256
  1125. #define CGEMM_DEFAULT_P 256
  1126. #define ZGEMM_DEFAULT_P 256
  1127. #else
  1128. #define SGEMM_DEFAULT_P 176
  1129. #define DGEMM_DEFAULT_P 176
  1130. #define CGEMM_DEFAULT_P 176
  1131. #define ZGEMM_DEFAULT_P 176
  1132. #endif
  1133. #endif
  1134. #define SGEMM_DEFAULT_Q 512
  1135. #define DGEMM_DEFAULT_Q 256
  1136. #define CGEMM_DEFAULT_Q 256
  1137. #define ZGEMM_DEFAULT_Q 128
  1138. #define SYMV_P 4
  1139. #endif
  1140. #ifdef PPC440
  1141. #define SNUMOPT 2
  1142. #define DNUMOPT 2
  1143. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1144. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1145. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1146. #define SGEMM_DEFAULT_UNROLL_M 4
  1147. #define SGEMM_DEFAULT_UNROLL_N 4
  1148. #define DGEMM_DEFAULT_UNROLL_M 4
  1149. #define DGEMM_DEFAULT_UNROLL_N 4
  1150. #define CGEMM_DEFAULT_UNROLL_M 2
  1151. #define CGEMM_DEFAULT_UNROLL_N 2
  1152. #define ZGEMM_DEFAULT_UNROLL_M 2
  1153. #define ZGEMM_DEFAULT_UNROLL_N 2
  1154. #define SGEMM_DEFAULT_P 512
  1155. #define DGEMM_DEFAULT_P 512
  1156. #define CGEMM_DEFAULT_P 512
  1157. #define ZGEMM_DEFAULT_P 512
  1158. #define SGEMM_DEFAULT_Q 1024
  1159. #define DGEMM_DEFAULT_Q 512
  1160. #define CGEMM_DEFAULT_Q 512
  1161. #define ZGEMM_DEFAULT_Q 256
  1162. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1163. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1164. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1165. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1166. #define SYMV_P 4
  1167. #endif
  1168. #ifdef PPC440FP2
  1169. #define SNUMOPT 4
  1170. #define DNUMOPT 4
  1171. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1172. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1173. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1174. #define SGEMM_DEFAULT_UNROLL_M 8
  1175. #define SGEMM_DEFAULT_UNROLL_N 4
  1176. #define DGEMM_DEFAULT_UNROLL_M 8
  1177. #define DGEMM_DEFAULT_UNROLL_N 4
  1178. #define CGEMM_DEFAULT_UNROLL_M 4
  1179. #define CGEMM_DEFAULT_UNROLL_N 2
  1180. #define ZGEMM_DEFAULT_UNROLL_M 4
  1181. #define ZGEMM_DEFAULT_UNROLL_N 2
  1182. #define SGEMM_DEFAULT_P 128
  1183. #define DGEMM_DEFAULT_P 128
  1184. #define CGEMM_DEFAULT_P 128
  1185. #define ZGEMM_DEFAULT_P 128
  1186. #if 1
  1187. #define SGEMM_DEFAULT_Q 4096
  1188. #define DGEMM_DEFAULT_Q 3072
  1189. #define CGEMM_DEFAULT_Q 2048
  1190. #define ZGEMM_DEFAULT_Q 1024
  1191. #else
  1192. #define SGEMM_DEFAULT_Q 512
  1193. #define DGEMM_DEFAULT_Q 256
  1194. #define CGEMM_DEFAULT_Q 256
  1195. #define ZGEMM_DEFAULT_Q 128
  1196. #endif
  1197. #define SYMV_P 4
  1198. #endif
  1199. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1200. #define GEMM_DEFAULT_OFFSET_A 0
  1201. #define GEMM_DEFAULT_OFFSET_B 2048
  1202. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1203. #define SGEMM_DEFAULT_UNROLL_M 4
  1204. #define SGEMM_DEFAULT_UNROLL_N 4
  1205. #define DGEMM_DEFAULT_UNROLL_M 4
  1206. #define DGEMM_DEFAULT_UNROLL_N 4
  1207. #define CGEMM_DEFAULT_UNROLL_M 2
  1208. #define CGEMM_DEFAULT_UNROLL_N 2
  1209. #define ZGEMM_DEFAULT_UNROLL_M 2
  1210. #define ZGEMM_DEFAULT_UNROLL_N 2
  1211. #ifdef POWER3
  1212. #define SNUMOPT 4
  1213. #define DNUMOPT 4
  1214. #define SGEMM_DEFAULT_P 256
  1215. #define SGEMM_DEFAULT_Q 432
  1216. #define SGEMM_DEFAULT_R 1012
  1217. #define DGEMM_DEFAULT_P 256
  1218. #define DGEMM_DEFAULT_Q 216
  1219. #define DGEMM_DEFAULT_R 1012
  1220. #define ZGEMM_DEFAULT_P 256
  1221. #define ZGEMM_DEFAULT_Q 104
  1222. #define ZGEMM_DEFAULT_R 1012
  1223. #endif
  1224. #if defined(POWER4)
  1225. #ifdef ALLOC_HUGETLB
  1226. #define SGEMM_DEFAULT_P 184
  1227. #define DGEMM_DEFAULT_P 184
  1228. #define CGEMM_DEFAULT_P 184
  1229. #define ZGEMM_DEFAULT_P 184
  1230. #else
  1231. #define SGEMM_DEFAULT_P 144
  1232. #define DGEMM_DEFAULT_P 144
  1233. #define CGEMM_DEFAULT_P 144
  1234. #define ZGEMM_DEFAULT_P 144
  1235. #endif
  1236. #endif
  1237. #if defined(POWER5)
  1238. #ifdef ALLOC_HUGETLB
  1239. #define SGEMM_DEFAULT_P 512
  1240. #define DGEMM_DEFAULT_P 256
  1241. #define CGEMM_DEFAULT_P 256
  1242. #define ZGEMM_DEFAULT_P 128
  1243. #else
  1244. #define SGEMM_DEFAULT_P 320
  1245. #define DGEMM_DEFAULT_P 160
  1246. #define CGEMM_DEFAULT_P 160
  1247. #define ZGEMM_DEFAULT_P 80
  1248. #endif
  1249. #define SGEMM_DEFAULT_Q 256
  1250. #define CGEMM_DEFAULT_Q 256
  1251. #define DGEMM_DEFAULT_Q 256
  1252. #define ZGEMM_DEFAULT_Q 256
  1253. #endif
  1254. #define SYMV_P 8
  1255. #endif
  1256. #if defined(POWER6)
  1257. #define SNUMOPT 4
  1258. #define DNUMOPT 4
  1259. #define GEMM_DEFAULT_OFFSET_A 384
  1260. #define GEMM_DEFAULT_OFFSET_B 1024
  1261. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1262. #define SGEMM_DEFAULT_UNROLL_M 4
  1263. #define SGEMM_DEFAULT_UNROLL_N 4
  1264. #define DGEMM_DEFAULT_UNROLL_M 4
  1265. #define DGEMM_DEFAULT_UNROLL_N 4
  1266. #define CGEMM_DEFAULT_UNROLL_M 2
  1267. #define CGEMM_DEFAULT_UNROLL_N 4
  1268. #define ZGEMM_DEFAULT_UNROLL_M 2
  1269. #define ZGEMM_DEFAULT_UNROLL_N 4
  1270. #define SGEMM_DEFAULT_P 992
  1271. #define DGEMM_DEFAULT_P 480
  1272. #define CGEMM_DEFAULT_P 488
  1273. #define ZGEMM_DEFAULT_P 248
  1274. #define SGEMM_DEFAULT_Q 504
  1275. #define DGEMM_DEFAULT_Q 504
  1276. #define CGEMM_DEFAULT_Q 400
  1277. #define ZGEMM_DEFAULT_Q 400
  1278. #define SYMV_P 8
  1279. #endif
  1280. #if defined(SPARC) && defined(V7)
  1281. #define SNUMOPT 4
  1282. #define DNUMOPT 4
  1283. #define GEMM_DEFAULT_OFFSET_A 0
  1284. #define GEMM_DEFAULT_OFFSET_B 2048
  1285. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1286. #define SGEMM_DEFAULT_UNROLL_M 2
  1287. #define SGEMM_DEFAULT_UNROLL_N 8
  1288. #define DGEMM_DEFAULT_UNROLL_M 2
  1289. #define DGEMM_DEFAULT_UNROLL_N 8
  1290. #define CGEMM_DEFAULT_UNROLL_M 1
  1291. #define CGEMM_DEFAULT_UNROLL_N 4
  1292. #define ZGEMM_DEFAULT_UNROLL_M 1
  1293. #define ZGEMM_DEFAULT_UNROLL_N 4
  1294. #define SGEMM_DEFAULT_P 256
  1295. #define DGEMM_DEFAULT_P 256
  1296. #define CGEMM_DEFAULT_P 256
  1297. #define ZGEMM_DEFAULT_P 256
  1298. #define SGEMM_DEFAULT_Q 512
  1299. #define DGEMM_DEFAULT_Q 256
  1300. #define CGEMM_DEFAULT_Q 256
  1301. #define ZGEMM_DEFAULT_Q 128
  1302. #define SYMV_P 8
  1303. #define GEMM_THREAD gemm_thread_mn
  1304. #endif
  1305. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1306. #define SNUMOPT 2
  1307. #define DNUMOPT 2
  1308. #define GEMM_DEFAULT_OFFSET_A 0
  1309. #define GEMM_DEFAULT_OFFSET_B 2048
  1310. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1311. #define SGEMM_DEFAULT_UNROLL_M 4
  1312. #define SGEMM_DEFAULT_UNROLL_N 4
  1313. #define DGEMM_DEFAULT_UNROLL_M 4
  1314. #define DGEMM_DEFAULT_UNROLL_N 4
  1315. #define CGEMM_DEFAULT_UNROLL_M 2
  1316. #define CGEMM_DEFAULT_UNROLL_N 2
  1317. #define ZGEMM_DEFAULT_UNROLL_M 2
  1318. #define ZGEMM_DEFAULT_UNROLL_N 2
  1319. #define SGEMM_DEFAULT_P 512
  1320. #define DGEMM_DEFAULT_P 512
  1321. #define CGEMM_DEFAULT_P 512
  1322. #define ZGEMM_DEFAULT_P 512
  1323. #define SGEMM_DEFAULT_Q 1024
  1324. #define DGEMM_DEFAULT_Q 512
  1325. #define CGEMM_DEFAULT_Q 512
  1326. #define ZGEMM_DEFAULT_Q 256
  1327. #define SYMV_P 8
  1328. #endif
  1329. #ifdef SICORTEX
  1330. #define SNUMOPT 2
  1331. #define DNUMOPT 2
  1332. #define GEMM_DEFAULT_OFFSET_A 0
  1333. #define GEMM_DEFAULT_OFFSET_B 0
  1334. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1335. #define SGEMM_DEFAULT_UNROLL_M 2
  1336. #define SGEMM_DEFAULT_UNROLL_N 8
  1337. #define DGEMM_DEFAULT_UNROLL_M 2
  1338. #define DGEMM_DEFAULT_UNROLL_N 8
  1339. #define CGEMM_DEFAULT_UNROLL_M 1
  1340. #define CGEMM_DEFAULT_UNROLL_N 4
  1341. #define ZGEMM_DEFAULT_UNROLL_M 1
  1342. #define ZGEMM_DEFAULT_UNROLL_N 4
  1343. #define SGEMM_DEFAULT_P 108
  1344. #define DGEMM_DEFAULT_P 112
  1345. #define CGEMM_DEFAULT_P 108
  1346. #define ZGEMM_DEFAULT_P 112
  1347. #define SGEMM_DEFAULT_Q 288
  1348. #define DGEMM_DEFAULT_Q 144
  1349. #define CGEMM_DEFAULT_Q 144
  1350. #define ZGEMM_DEFAULT_Q 72
  1351. #define SGEMM_DEFAULT_R 2000
  1352. #define DGEMM_DEFAULT_R 2000
  1353. #define CGEMM_DEFAULT_R 2000
  1354. #define ZGEMM_DEFAULT_R 2000
  1355. #define SYMV_P 16
  1356. #endif
  1357. #ifdef LOONGSON3A
  1358. ////Copy from SICORTEX
  1359. #define SNUMOPT 2
  1360. #define DNUMOPT 2
  1361. #define GEMM_DEFAULT_OFFSET_A 0
  1362. #define GEMM_DEFAULT_OFFSET_B 0
  1363. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1364. #define SGEMM_DEFAULT_UNROLL_M 8
  1365. #define SGEMM_DEFAULT_UNROLL_N 4
  1366. #define DGEMM_DEFAULT_UNROLL_M 4
  1367. #define DGEMM_DEFAULT_UNROLL_N 4
  1368. #define CGEMM_DEFAULT_UNROLL_M 4
  1369. #define CGEMM_DEFAULT_UNROLL_N 2
  1370. #define ZGEMM_DEFAULT_UNROLL_M 2
  1371. #define ZGEMM_DEFAULT_UNROLL_N 2
  1372. #define SGEMM_DEFAULT_P 64
  1373. #define DGEMM_DEFAULT_P 44
  1374. #define CGEMM_DEFAULT_P 64
  1375. #define ZGEMM_DEFAULT_P 32
  1376. #define SGEMM_DEFAULT_Q 192
  1377. #define DGEMM_DEFAULT_Q 92
  1378. #define CGEMM_DEFAULT_Q 128
  1379. #define ZGEMM_DEFAULT_Q 80
  1380. #define SGEMM_DEFAULT_R 640
  1381. #define DGEMM_DEFAULT_R dgemm_r
  1382. #define CGEMM_DEFAULT_R 640
  1383. #define ZGEMM_DEFAULT_R 640
  1384. #define GEMM_OFFSET_A1 0x10000
  1385. #define GEMM_OFFSET_B1 0x100000
  1386. #define SYMV_P 16
  1387. #endif
  1388. #ifdef LOONGSON3B
  1389. #define SNUMOPT 2
  1390. #define DNUMOPT 2
  1391. #define GEMM_DEFAULT_OFFSET_A 0
  1392. #define GEMM_DEFAULT_OFFSET_B 0
  1393. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1394. #define SGEMM_DEFAULT_UNROLL_M 2
  1395. #define SGEMM_DEFAULT_UNROLL_N 2
  1396. #define DGEMM_DEFAULT_UNROLL_M 2
  1397. #define DGEMM_DEFAULT_UNROLL_N 2
  1398. #define CGEMM_DEFAULT_UNROLL_M 2
  1399. #define CGEMM_DEFAULT_UNROLL_N 2
  1400. #define ZGEMM_DEFAULT_UNROLL_M 2
  1401. #define ZGEMM_DEFAULT_UNROLL_N 2
  1402. #define SGEMM_DEFAULT_P 64
  1403. #define DGEMM_DEFAULT_P 24
  1404. #define CGEMM_DEFAULT_P 24
  1405. #define ZGEMM_DEFAULT_P 20
  1406. #define SGEMM_DEFAULT_Q 192
  1407. #define DGEMM_DEFAULT_Q 128
  1408. #define CGEMM_DEFAULT_Q 128
  1409. #define ZGEMM_DEFAULT_Q 64
  1410. #define SGEMM_DEFAULT_R 512
  1411. #define DGEMM_DEFAULT_R 512
  1412. #define CGEMM_DEFAULT_R 512
  1413. #define ZGEMM_DEFAULT_R 512
  1414. #define GEMM_OFFSET_A1 0x10000
  1415. #define GEMM_OFFSET_B1 0x100000
  1416. #define SYMV_P 16
  1417. #endif
  1418. #ifdef GENERIC
  1419. #define SNUMOPT 2
  1420. #define DNUMOPT 2
  1421. #define GEMM_DEFAULT_OFFSET_A 0
  1422. #define GEMM_DEFAULT_OFFSET_B 0
  1423. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1424. #define SGEMM_DEFAULT_UNROLL_N 4
  1425. #define DGEMM_DEFAULT_UNROLL_N 4
  1426. #define QGEMM_DEFAULT_UNROLL_N 2
  1427. #define CGEMM_DEFAULT_UNROLL_N 2
  1428. #define ZGEMM_DEFAULT_UNROLL_N 2
  1429. #define XGEMM_DEFAULT_UNROLL_N 1
  1430. #ifdef ARCH_X86
  1431. #define SGEMM_DEFAULT_UNROLL_M 4
  1432. #define DGEMM_DEFAULT_UNROLL_M 2
  1433. #define QGEMM_DEFAULT_UNROLL_M 2
  1434. #define CGEMM_DEFAULT_UNROLL_M 2
  1435. #define ZGEMM_DEFAULT_UNROLL_M 1
  1436. #define XGEMM_DEFAULT_UNROLL_M 1
  1437. #else
  1438. #define SGEMM_DEFAULT_UNROLL_M 8
  1439. #define DGEMM_DEFAULT_UNROLL_M 4
  1440. #define QGEMM_DEFAULT_UNROLL_M 2
  1441. #define CGEMM_DEFAULT_UNROLL_M 4
  1442. #define ZGEMM_DEFAULT_UNROLL_M 2
  1443. #define XGEMM_DEFAULT_UNROLL_M 1
  1444. #endif
  1445. #define SGEMM_DEFAULT_P sgemm_p
  1446. #define DGEMM_DEFAULT_P dgemm_p
  1447. #define QGEMM_DEFAULT_P qgemm_p
  1448. #define CGEMM_DEFAULT_P cgemm_p
  1449. #define ZGEMM_DEFAULT_P zgemm_p
  1450. #define XGEMM_DEFAULT_P xgemm_p
  1451. #define SGEMM_DEFAULT_R sgemm_r
  1452. #define DGEMM_DEFAULT_R dgemm_r
  1453. #define QGEMM_DEFAULT_R qgemm_r
  1454. #define CGEMM_DEFAULT_R cgemm_r
  1455. #define ZGEMM_DEFAULT_R zgemm_r
  1456. #define XGEMM_DEFAULT_R xgemm_r
  1457. #define SGEMM_DEFAULT_Q 128
  1458. #define DGEMM_DEFAULT_Q 128
  1459. #define QGEMM_DEFAULT_Q 128
  1460. #define CGEMM_DEFAULT_Q 128
  1461. #define ZGEMM_DEFAULT_Q 128
  1462. #define XGEMM_DEFAULT_Q 128
  1463. #define SYMV_P 16
  1464. #endif
  1465. #ifndef QGEMM_DEFAULT_UNROLL_M
  1466. #define QGEMM_DEFAULT_UNROLL_M 2
  1467. #endif
  1468. #ifndef QGEMM_DEFAULT_UNROLL_N
  1469. #define QGEMM_DEFAULT_UNROLL_N 2
  1470. #endif
  1471. #ifndef XGEMM_DEFAULT_UNROLL_M
  1472. #define XGEMM_DEFAULT_UNROLL_M 2
  1473. #endif
  1474. #ifndef XGEMM_DEFAULT_UNROLL_N
  1475. #define XGEMM_DEFAULT_UNROLL_N 2
  1476. #endif
  1477. #ifndef HAVE_SSE2
  1478. #define SHUFPD_0 shufps $0x44,
  1479. #define SHUFPD_1 shufps $0x4e,
  1480. #define SHUFPD_2 shufps $0xe4,
  1481. #define SHUFPD_3 shufps $0xee,
  1482. #endif
  1483. #ifndef SHUFPD_0
  1484. #define SHUFPD_0 shufpd $0,
  1485. #endif
  1486. #ifndef SHUFPD_1
  1487. #define SHUFPD_1 shufpd $1,
  1488. #endif
  1489. #ifndef SHUFPD_2
  1490. #define SHUFPD_2 shufpd $2,
  1491. #endif
  1492. #ifndef SHUFPD_3
  1493. #define SHUFPD_3 shufpd $3,
  1494. #endif
  1495. #ifndef SHUFPS_39
  1496. #define SHUFPS_39 shufps $0x39,
  1497. #endif
  1498. #endif