You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 54 kB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311
  1. /*****************************************************************************
  2. Copyright (c) 2011-2014, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written
  16. permission.
  17. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  23. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  25. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  26. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. **********************************************************************************/
  28. /*********************************************************************/
  29. /* Copyright 2009, 2010 The University of Texas at Austin. */
  30. /* All rights reserved. */
  31. /* */
  32. /* Redistribution and use in source and binary forms, with or */
  33. /* without modification, are permitted provided that the following */
  34. /* conditions are met: */
  35. /* */
  36. /* 1. Redistributions of source code must retain the above */
  37. /* copyright notice, this list of conditions and the following */
  38. /* disclaimer. */
  39. /* */
  40. /* 2. Redistributions in binary form must reproduce the above */
  41. /* copyright notice, this list of conditions and the following */
  42. /* disclaimer in the documentation and/or other materials */
  43. /* provided with the distribution. */
  44. /* */
  45. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  46. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  47. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  48. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  49. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  50. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  51. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  52. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  53. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  54. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  55. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  56. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  57. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  58. /* POSSIBILITY OF SUCH DAMAGE. */
  59. /* */
  60. /* The views and conclusions contained in the software and */
  61. /* documentation are those of the authors and should not be */
  62. /* interpreted as representing official policies, either expressed */
  63. /* or implied, of The University of Texas at Austin. */
  64. /*********************************************************************/
  65. #ifndef PARAM_H
  66. #define PARAM_H
  67. #ifdef OPTERON
  68. #define SNUMOPT 4
  69. #define DNUMOPT 2
  70. #define GEMM_DEFAULT_OFFSET_A 64
  71. #define GEMM_DEFAULT_OFFSET_B 256
  72. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  73. #define SGEMM_DEFAULT_UNROLL_N 4
  74. #define DGEMM_DEFAULT_UNROLL_N 4
  75. #define QGEMM_DEFAULT_UNROLL_N 2
  76. #define CGEMM_DEFAULT_UNROLL_N 2
  77. #define ZGEMM_DEFAULT_UNROLL_N 2
  78. #define XGEMM_DEFAULT_UNROLL_N 1
  79. #ifdef ARCH_X86
  80. #define SGEMM_DEFAULT_UNROLL_M 4
  81. #define DGEMM_DEFAULT_UNROLL_M 2
  82. #define QGEMM_DEFAULT_UNROLL_M 2
  83. #define CGEMM_DEFAULT_UNROLL_M 2
  84. #define ZGEMM_DEFAULT_UNROLL_M 1
  85. #define XGEMM_DEFAULT_UNROLL_M 1
  86. #else
  87. #define SGEMM_DEFAULT_UNROLL_M 8
  88. #define DGEMM_DEFAULT_UNROLL_M 4
  89. #define QGEMM_DEFAULT_UNROLL_M 2
  90. #define CGEMM_DEFAULT_UNROLL_M 4
  91. #define ZGEMM_DEFAULT_UNROLL_M 2
  92. #define XGEMM_DEFAULT_UNROLL_M 1
  93. #endif
  94. #define SGEMM_DEFAULT_P sgemm_p
  95. #define DGEMM_DEFAULT_P dgemm_p
  96. #define QGEMM_DEFAULT_P qgemm_p
  97. #define CGEMM_DEFAULT_P cgemm_p
  98. #define ZGEMM_DEFAULT_P zgemm_p
  99. #define XGEMM_DEFAULT_P xgemm_p
  100. #define SGEMM_DEFAULT_R sgemm_r
  101. #define DGEMM_DEFAULT_R dgemm_r
  102. #define QGEMM_DEFAULT_R qgemm_r
  103. #define CGEMM_DEFAULT_R cgemm_r
  104. #define ZGEMM_DEFAULT_R zgemm_r
  105. #define XGEMM_DEFAULT_R xgemm_r
  106. #ifdef ALLOC_HUGETLB
  107. #define SGEMM_DEFAULT_Q 248
  108. #define DGEMM_DEFAULT_Q 248
  109. #define QGEMM_DEFAULT_Q 248
  110. #define CGEMM_DEFAULT_Q 248
  111. #define ZGEMM_DEFAULT_Q 248
  112. #define XGEMM_DEFAULT_Q 248
  113. #else
  114. #define SGEMM_DEFAULT_Q 240
  115. #define DGEMM_DEFAULT_Q 240
  116. #define QGEMM_DEFAULT_Q 240
  117. #define CGEMM_DEFAULT_Q 240
  118. #define ZGEMM_DEFAULT_Q 240
  119. #define XGEMM_DEFAULT_Q 240
  120. #endif
  121. #define SYMV_P 16
  122. #define HAVE_EXCLUSIVE_CACHE
  123. #endif
  124. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  125. #define SNUMOPT 8
  126. #define DNUMOPT 4
  127. #define GEMM_DEFAULT_OFFSET_A 64
  128. #define GEMM_DEFAULT_OFFSET_B 832
  129. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  130. #define SGEMM_DEFAULT_UNROLL_N 4
  131. #define DGEMM_DEFAULT_UNROLL_N 4
  132. #define QGEMM_DEFAULT_UNROLL_N 2
  133. #define CGEMM_DEFAULT_UNROLL_N 2
  134. #define ZGEMM_DEFAULT_UNROLL_N 2
  135. #define XGEMM_DEFAULT_UNROLL_N 1
  136. #ifdef ARCH_X86
  137. #define SGEMM_DEFAULT_UNROLL_M 4
  138. #define DGEMM_DEFAULT_UNROLL_M 2
  139. #define QGEMM_DEFAULT_UNROLL_M 2
  140. #define CGEMM_DEFAULT_UNROLL_M 2
  141. #define ZGEMM_DEFAULT_UNROLL_M 1
  142. #define XGEMM_DEFAULT_UNROLL_M 1
  143. #else
  144. #define SGEMM_DEFAULT_UNROLL_M 8
  145. #define DGEMM_DEFAULT_UNROLL_M 4
  146. #define QGEMM_DEFAULT_UNROLL_M 2
  147. #define CGEMM_DEFAULT_UNROLL_M 4
  148. #define ZGEMM_DEFAULT_UNROLL_M 2
  149. #define XGEMM_DEFAULT_UNROLL_M 1
  150. #endif
  151. #if 0
  152. #define SGEMM_DEFAULT_P 496
  153. #define DGEMM_DEFAULT_P 248
  154. #define QGEMM_DEFAULT_P 124
  155. #define CGEMM_DEFAULT_P 248
  156. #define ZGEMM_DEFAULT_P 124
  157. #define XGEMM_DEFAULT_P 62
  158. #define SGEMM_DEFAULT_Q 248
  159. #define DGEMM_DEFAULT_Q 248
  160. #define QGEMM_DEFAULT_Q 248
  161. #define CGEMM_DEFAULT_Q 248
  162. #define ZGEMM_DEFAULT_Q 248
  163. #define XGEMM_DEFAULT_Q 248
  164. #else
  165. #define SGEMM_DEFAULT_P 448
  166. #define DGEMM_DEFAULT_P 224
  167. #define QGEMM_DEFAULT_P 112
  168. #define CGEMM_DEFAULT_P 224
  169. #define ZGEMM_DEFAULT_P 112
  170. #define XGEMM_DEFAULT_P 56
  171. #define SGEMM_DEFAULT_Q 224
  172. #define DGEMM_DEFAULT_Q 224
  173. #define QGEMM_DEFAULT_Q 224
  174. #define CGEMM_DEFAULT_Q 224
  175. #define ZGEMM_DEFAULT_Q 224
  176. #define XGEMM_DEFAULT_Q 224
  177. #endif
  178. #define SGEMM_DEFAULT_R sgemm_r
  179. #define QGEMM_DEFAULT_R qgemm_r
  180. #define DGEMM_DEFAULT_R dgemm_r
  181. #define CGEMM_DEFAULT_R cgemm_r
  182. #define ZGEMM_DEFAULT_R zgemm_r
  183. #define XGEMM_DEFAULT_R xgemm_r
  184. #define SYMV_P 16
  185. #define HAVE_EXCLUSIVE_CACHE
  186. #define GEMM_THREAD gemm_thread_mn
  187. #endif
  188. #ifdef BULLDOZER
  189. #define SNUMOPT 8
  190. #define DNUMOPT 4
  191. #define GEMM_DEFAULT_OFFSET_A 64
  192. #define GEMM_DEFAULT_OFFSET_B 832
  193. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  194. #define QGEMM_DEFAULT_UNROLL_N 2
  195. #define CGEMM_DEFAULT_UNROLL_N 2
  196. #define ZGEMM_DEFAULT_UNROLL_N 2
  197. #define XGEMM_DEFAULT_UNROLL_N 1
  198. #ifdef ARCH_X86
  199. #define SGEMM_DEFAULT_UNROLL_N 4
  200. #define DGEMM_DEFAULT_UNROLL_N 4
  201. #define SGEMM_DEFAULT_UNROLL_M 4
  202. #define DGEMM_DEFAULT_UNROLL_M 2
  203. #define QGEMM_DEFAULT_UNROLL_M 2
  204. #define CGEMM_DEFAULT_UNROLL_M 2
  205. #define ZGEMM_DEFAULT_UNROLL_M 1
  206. #define XGEMM_DEFAULT_UNROLL_M 1
  207. #else
  208. #define SGEMM_DEFAULT_UNROLL_N 2
  209. #define DGEMM_DEFAULT_UNROLL_N 2
  210. #define SGEMM_DEFAULT_UNROLL_M 16
  211. #define DGEMM_DEFAULT_UNROLL_M 8
  212. #define QGEMM_DEFAULT_UNROLL_M 2
  213. #define CGEMM_DEFAULT_UNROLL_M 4
  214. #define ZGEMM_DEFAULT_UNROLL_M 2
  215. #define XGEMM_DEFAULT_UNROLL_M 1
  216. #define CGEMM3M_DEFAULT_UNROLL_N 4
  217. #define CGEMM3M_DEFAULT_UNROLL_M 8
  218. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  219. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  220. #define GEMV_UNROLL 8
  221. #endif
  222. #if defined(ARCH_X86_64)
  223. #define SGEMM_DEFAULT_P 768
  224. #define DGEMM_DEFAULT_P 384
  225. #else
  226. #define SGEMM_DEFAULT_P 448
  227. #define DGEMM_DEFAULT_P 224
  228. #endif
  229. #define QGEMM_DEFAULT_P 112
  230. #define CGEMM_DEFAULT_P 224
  231. #define ZGEMM_DEFAULT_P 112
  232. #define XGEMM_DEFAULT_P 56
  233. #if defined(ARCH_X86_64)
  234. #define SGEMM_DEFAULT_Q 168
  235. #define DGEMM_DEFAULT_Q 168
  236. #else
  237. #define SGEMM_DEFAULT_Q 224
  238. #define DGEMM_DEFAULT_Q 224
  239. #endif
  240. #define QGEMM_DEFAULT_Q 224
  241. #define CGEMM_DEFAULT_Q 224
  242. #define ZGEMM_DEFAULT_Q 224
  243. #define XGEMM_DEFAULT_Q 224
  244. #define CGEMM3M_DEFAULT_P 448
  245. #define ZGEMM3M_DEFAULT_P 224
  246. #define XGEMM3M_DEFAULT_P 112
  247. #define CGEMM3M_DEFAULT_Q 224
  248. #define ZGEMM3M_DEFAULT_Q 224
  249. #define XGEMM3M_DEFAULT_Q 224
  250. #define CGEMM3M_DEFAULT_R 12288
  251. #define ZGEMM3M_DEFAULT_R 12288
  252. #define XGEMM3M_DEFAULT_R 12288
  253. #define SGEMM_DEFAULT_R sgemm_r
  254. #define QGEMM_DEFAULT_R qgemm_r
  255. #define DGEMM_DEFAULT_R dgemm_r
  256. #define CGEMM_DEFAULT_R cgemm_r
  257. #define ZGEMM_DEFAULT_R zgemm_r
  258. #define XGEMM_DEFAULT_R xgemm_r
  259. #define SYMV_P 16
  260. #define HAVE_EXCLUSIVE_CACHE
  261. #define GEMM_THREAD gemm_thread_mn
  262. #endif
  263. #ifdef PILEDRIVER
  264. #define SNUMOPT 8
  265. #define DNUMOPT 4
  266. #define GEMM_DEFAULT_OFFSET_A 64
  267. #define GEMM_DEFAULT_OFFSET_B 832
  268. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  269. #define QGEMM_DEFAULT_UNROLL_N 2
  270. #define CGEMM_DEFAULT_UNROLL_N 2
  271. #define ZGEMM_DEFAULT_UNROLL_N 2
  272. #define XGEMM_DEFAULT_UNROLL_N 1
  273. #ifdef ARCH_X86
  274. #define SGEMM_DEFAULT_UNROLL_N 4
  275. #define DGEMM_DEFAULT_UNROLL_N 4
  276. #define SGEMM_DEFAULT_UNROLL_M 4
  277. #define DGEMM_DEFAULT_UNROLL_M 2
  278. #define QGEMM_DEFAULT_UNROLL_M 2
  279. #define CGEMM_DEFAULT_UNROLL_M 2
  280. #define ZGEMM_DEFAULT_UNROLL_M 1
  281. #define XGEMM_DEFAULT_UNROLL_M 1
  282. #else
  283. #define SGEMM_DEFAULT_UNROLL_N 2
  284. #define DGEMM_DEFAULT_UNROLL_N 2
  285. #define SGEMM_DEFAULT_UNROLL_M 16
  286. #define DGEMM_DEFAULT_UNROLL_M 8
  287. #define QGEMM_DEFAULT_UNROLL_M 2
  288. #define CGEMM_DEFAULT_UNROLL_M 4
  289. #define ZGEMM_DEFAULT_UNROLL_M 2
  290. #define XGEMM_DEFAULT_UNROLL_M 1
  291. #define CGEMM3M_DEFAULT_UNROLL_N 4
  292. #define CGEMM3M_DEFAULT_UNROLL_M 8
  293. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  294. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  295. #define GEMV_UNROLL 8
  296. #endif
  297. #if defined(ARCH_X86_64)
  298. #define SGEMM_DEFAULT_P 768
  299. #define DGEMM_DEFAULT_P 768
  300. #define ZGEMM_DEFAULT_P 384
  301. #define CGEMM_DEFAULT_P 768
  302. #else
  303. #define SGEMM_DEFAULT_P 448
  304. #define DGEMM_DEFAULT_P 480
  305. #define ZGEMM_DEFAULT_P 112
  306. #define CGEMM_DEFAULT_P 224
  307. #endif
  308. #define QGEMM_DEFAULT_P 112
  309. #define XGEMM_DEFAULT_P 56
  310. #if defined(ARCH_X86_64)
  311. #define SGEMM_DEFAULT_Q 192
  312. #define DGEMM_DEFAULT_Q 168
  313. #define ZGEMM_DEFAULT_Q 168
  314. #define CGEMM_DEFAULT_Q 168
  315. #else
  316. #define SGEMM_DEFAULT_Q 224
  317. #define DGEMM_DEFAULT_Q 224
  318. #define ZGEMM_DEFAULT_Q 224
  319. #define CGEMM_DEFAULT_Q 224
  320. #endif
  321. #define QGEMM_DEFAULT_Q 224
  322. #define XGEMM_DEFAULT_Q 224
  323. #define CGEMM3M_DEFAULT_P 448
  324. #define ZGEMM3M_DEFAULT_P 224
  325. #define XGEMM3M_DEFAULT_P 112
  326. #define CGEMM3M_DEFAULT_Q 224
  327. #define ZGEMM3M_DEFAULT_Q 224
  328. #define XGEMM3M_DEFAULT_Q 224
  329. #define CGEMM3M_DEFAULT_R 12288
  330. #define ZGEMM3M_DEFAULT_R 12288
  331. #define XGEMM3M_DEFAULT_R 12288
  332. #define SGEMM_DEFAULT_R 12288
  333. #define QGEMM_DEFAULT_R qgemm_r
  334. #define DGEMM_DEFAULT_R 12288
  335. #define CGEMM_DEFAULT_R cgemm_r
  336. #define ZGEMM_DEFAULT_R zgemm_r
  337. #define XGEMM_DEFAULT_R xgemm_r
  338. #define SYMV_P 16
  339. #define HAVE_EXCLUSIVE_CACHE
  340. #define GEMM_THREAD gemm_thread_mn
  341. #endif
  342. #ifdef STEAMROLLER
  343. #define SNUMOPT 8
  344. #define DNUMOPT 4
  345. #define GEMM_DEFAULT_OFFSET_A 64
  346. #define GEMM_DEFAULT_OFFSET_B 832
  347. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  348. #define QGEMM_DEFAULT_UNROLL_N 2
  349. #define CGEMM_DEFAULT_UNROLL_N 2
  350. #define ZGEMM_DEFAULT_UNROLL_N 2
  351. #define XGEMM_DEFAULT_UNROLL_N 1
  352. #ifdef ARCH_X86
  353. #define SGEMM_DEFAULT_UNROLL_N 4
  354. #define DGEMM_DEFAULT_UNROLL_N 4
  355. #define SGEMM_DEFAULT_UNROLL_M 4
  356. #define DGEMM_DEFAULT_UNROLL_M 2
  357. #define QGEMM_DEFAULT_UNROLL_M 2
  358. #define CGEMM_DEFAULT_UNROLL_M 2
  359. #define ZGEMM_DEFAULT_UNROLL_M 1
  360. #define XGEMM_DEFAULT_UNROLL_M 1
  361. #else
  362. #define SGEMM_DEFAULT_UNROLL_N 2
  363. #define DGEMM_DEFAULT_UNROLL_N 2
  364. #define SGEMM_DEFAULT_UNROLL_M 16
  365. #define DGEMM_DEFAULT_UNROLL_M 8
  366. #define QGEMM_DEFAULT_UNROLL_M 2
  367. #define CGEMM_DEFAULT_UNROLL_M 4
  368. #define ZGEMM_DEFAULT_UNROLL_M 2
  369. #define XGEMM_DEFAULT_UNROLL_M 1
  370. #define CGEMM3M_DEFAULT_UNROLL_N 4
  371. #define CGEMM3M_DEFAULT_UNROLL_M 8
  372. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  373. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  374. #define GEMV_UNROLL 8
  375. #endif
  376. #if defined(ARCH_X86_64)
  377. #define SGEMM_DEFAULT_P 768
  378. #define DGEMM_DEFAULT_P 576
  379. #define ZGEMM_DEFAULT_P 288
  380. #define CGEMM_DEFAULT_P 576
  381. #else
  382. #define SGEMM_DEFAULT_P 448
  383. #define DGEMM_DEFAULT_P 480
  384. #define ZGEMM_DEFAULT_P 112
  385. #define CGEMM_DEFAULT_P 224
  386. #endif
  387. #define QGEMM_DEFAULT_P 112
  388. #define XGEMM_DEFAULT_P 56
  389. #if defined(ARCH_X86_64)
  390. #define SGEMM_DEFAULT_Q 192
  391. #define DGEMM_DEFAULT_Q 160
  392. #define ZGEMM_DEFAULT_Q 160
  393. #define CGEMM_DEFAULT_Q 160
  394. #else
  395. #define SGEMM_DEFAULT_Q 224
  396. #define DGEMM_DEFAULT_Q 224
  397. #define ZGEMM_DEFAULT_Q 224
  398. #define CGEMM_DEFAULT_Q 224
  399. #endif
  400. #define QGEMM_DEFAULT_Q 224
  401. #define XGEMM_DEFAULT_Q 224
  402. #define CGEMM3M_DEFAULT_P 448
  403. #define ZGEMM3M_DEFAULT_P 224
  404. #define XGEMM3M_DEFAULT_P 112
  405. #define CGEMM3M_DEFAULT_Q 224
  406. #define ZGEMM3M_DEFAULT_Q 224
  407. #define XGEMM3M_DEFAULT_Q 224
  408. #define CGEMM3M_DEFAULT_R 12288
  409. #define ZGEMM3M_DEFAULT_R 12288
  410. #define XGEMM3M_DEFAULT_R 12288
  411. #define SGEMM_DEFAULT_R 12288
  412. #define QGEMM_DEFAULT_R qgemm_r
  413. #define DGEMM_DEFAULT_R 12288
  414. #define CGEMM_DEFAULT_R cgemm_r
  415. #define ZGEMM_DEFAULT_R zgemm_r
  416. #define XGEMM_DEFAULT_R xgemm_r
  417. #define SYMV_P 16
  418. #define HAVE_EXCLUSIVE_CACHE
  419. #define GEMM_THREAD gemm_thread_mn
  420. #endif
  421. #ifdef ATHLON
  422. #define SNUMOPT 4
  423. #define DNUMOPT 2
  424. #define GEMM_DEFAULT_OFFSET_A 0
  425. #define GEMM_DEFAULT_OFFSET_B 384
  426. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  427. #define SGEMM_DEFAULT_UNROLL_N 4
  428. #define DGEMM_DEFAULT_UNROLL_N 4
  429. #define QGEMM_DEFAULT_UNROLL_N 2
  430. #define CGEMM_DEFAULT_UNROLL_N 2
  431. #define ZGEMM_DEFAULT_UNROLL_N 2
  432. #define XGEMM_DEFAULT_UNROLL_N 1
  433. #define SGEMM_DEFAULT_UNROLL_M 2
  434. #define DGEMM_DEFAULT_UNROLL_M 1
  435. #define QGEMM_DEFAULT_UNROLL_M 2
  436. #define CGEMM_DEFAULT_UNROLL_M 1
  437. #define ZGEMM_DEFAULT_UNROLL_M 1
  438. #define XGEMM_DEFAULT_UNROLL_M 1
  439. #define SGEMM_DEFAULT_R sgemm_r
  440. #define DGEMM_DEFAULT_R dgemm_r
  441. #define QGEMM_DEFAULT_R qgemm_r
  442. #define CGEMM_DEFAULT_R cgemm_r
  443. #define ZGEMM_DEFAULT_R zgemm_r
  444. #define XGEMM_DEFAULT_R xgemm_r
  445. #define SGEMM_DEFAULT_P 208
  446. #define DGEMM_DEFAULT_P 104
  447. #define QGEMM_DEFAULT_P 56
  448. #define CGEMM_DEFAULT_P 104
  449. #define ZGEMM_DEFAULT_P 56
  450. #define XGEMM_DEFAULT_P 28
  451. #define SGEMM_DEFAULT_Q 208
  452. #define DGEMM_DEFAULT_Q 208
  453. #define QGEMM_DEFAULT_Q 208
  454. #define CGEMM_DEFAULT_Q 208
  455. #define ZGEMM_DEFAULT_Q 208
  456. #define XGEMM_DEFAULT_Q 208
  457. #define SYMV_P 16
  458. #define HAVE_EXCLUSIVE_CACHE
  459. #endif
  460. #ifdef VIAC3
  461. #define SNUMOPT 2
  462. #define DNUMOPT 1
  463. #define GEMM_DEFAULT_OFFSET_A 0
  464. #define GEMM_DEFAULT_OFFSET_B 256
  465. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  466. #define SGEMM_DEFAULT_UNROLL_N 4
  467. #define DGEMM_DEFAULT_UNROLL_N 4
  468. #define QGEMM_DEFAULT_UNROLL_N 2
  469. #define CGEMM_DEFAULT_UNROLL_N 2
  470. #define ZGEMM_DEFAULT_UNROLL_N 2
  471. #define XGEMM_DEFAULT_UNROLL_N 1
  472. #define SGEMM_DEFAULT_UNROLL_M 2
  473. #define DGEMM_DEFAULT_UNROLL_M 1
  474. #define QGEMM_DEFAULT_UNROLL_M 2
  475. #define CGEMM_DEFAULT_UNROLL_M 1
  476. #define ZGEMM_DEFAULT_UNROLL_M 1
  477. #define XGEMM_DEFAULT_UNROLL_M 1
  478. #define SGEMM_DEFAULT_R sgemm_r
  479. #define DGEMM_DEFAULT_R dgemm_r
  480. #define QGEMM_DEFAULT_R qgemm_r
  481. #define CGEMM_DEFAULT_R cgemm_r
  482. #define ZGEMM_DEFAULT_R zgemm_r
  483. #define XGEMM_DEFAULT_R xgemm_r
  484. #define SGEMM_DEFAULT_P 128
  485. #define DGEMM_DEFAULT_P 128
  486. #define QGEMM_DEFAULT_P 128
  487. #define CGEMM_DEFAULT_P 128
  488. #define ZGEMM_DEFAULT_P 128
  489. #define XGEMM_DEFAULT_P 128
  490. #define SGEMM_DEFAULT_Q 512
  491. #define DGEMM_DEFAULT_Q 256
  492. #define QGEMM_DEFAULT_Q 256
  493. #define CGEMM_DEFAULT_Q 256
  494. #define ZGEMM_DEFAULT_Q 128
  495. #define XGEMM_DEFAULT_Q 128
  496. #define SYMV_P 16
  497. #endif
  498. #ifdef NANO
  499. #define SNUMOPT 4
  500. #define DNUMOPT 2
  501. #define GEMM_DEFAULT_OFFSET_A 64
  502. #define GEMM_DEFAULT_OFFSET_B 256
  503. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  504. #ifdef ARCH_X86
  505. #define SGEMM_DEFAULT_UNROLL_N 4
  506. #define DGEMM_DEFAULT_UNROLL_N 4
  507. #define QGEMM_DEFAULT_UNROLL_N 2
  508. #define CGEMM_DEFAULT_UNROLL_N 2
  509. #define ZGEMM_DEFAULT_UNROLL_N 2
  510. #define XGEMM_DEFAULT_UNROLL_N 1
  511. #define SGEMM_DEFAULT_UNROLL_M 4
  512. #define DGEMM_DEFAULT_UNROLL_M 2
  513. #define QGEMM_DEFAULT_UNROLL_M 2
  514. #define CGEMM_DEFAULT_UNROLL_M 2
  515. #define ZGEMM_DEFAULT_UNROLL_M 1
  516. #define XGEMM_DEFAULT_UNROLL_M 1
  517. #else
  518. #define SGEMM_DEFAULT_UNROLL_N 8
  519. #define DGEMM_DEFAULT_UNROLL_N 4
  520. #define QGEMM_DEFAULT_UNROLL_N 2
  521. #define CGEMM_DEFAULT_UNROLL_N 4
  522. #define ZGEMM_DEFAULT_UNROLL_N 2
  523. #define XGEMM_DEFAULT_UNROLL_N 1
  524. #define SGEMM_DEFAULT_UNROLL_M 4
  525. #define DGEMM_DEFAULT_UNROLL_M 4
  526. #define QGEMM_DEFAULT_UNROLL_M 2
  527. #define CGEMM_DEFAULT_UNROLL_M 2
  528. #define ZGEMM_DEFAULT_UNROLL_M 2
  529. #define XGEMM_DEFAULT_UNROLL_M 1
  530. #endif
  531. #define SGEMM_DEFAULT_P 288
  532. #define DGEMM_DEFAULT_P 288
  533. #define QGEMM_DEFAULT_P 288
  534. #define CGEMM_DEFAULT_P 288
  535. #define ZGEMM_DEFAULT_P 288
  536. #define XGEMM_DEFAULT_P 288
  537. #define SGEMM_DEFAULT_R sgemm_r
  538. #define DGEMM_DEFAULT_R dgemm_r
  539. #define QGEMM_DEFAULT_R qgemm_r
  540. #define CGEMM_DEFAULT_R cgemm_r
  541. #define ZGEMM_DEFAULT_R zgemm_r
  542. #define XGEMM_DEFAULT_R xgemm_r
  543. #define SGEMM_DEFAULT_Q 256
  544. #define DGEMM_DEFAULT_Q 128
  545. #define QGEMM_DEFAULT_Q 64
  546. #define CGEMM_DEFAULT_Q 128
  547. #define ZGEMM_DEFAULT_Q 64
  548. #define XGEMM_DEFAULT_Q 32
  549. #define SYMV_P 16
  550. #define HAVE_EXCLUSIVE_CACHE
  551. #endif
  552. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  553. #ifdef HAVE_SSE
  554. #define SNUMOPT 2
  555. #else
  556. #define SNUMOPT 1
  557. #endif
  558. #define DNUMOPT 1
  559. #define GEMM_DEFAULT_OFFSET_A 0
  560. #define GEMM_DEFAULT_OFFSET_B 0
  561. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  562. #ifdef HAVE_SSE
  563. #define SGEMM_DEFAULT_UNROLL_M 8
  564. #define CGEMM_DEFAULT_UNROLL_M 4
  565. #else
  566. #define SGEMM_DEFAULT_UNROLL_M 4
  567. #define CGEMM_DEFAULT_UNROLL_M 2
  568. #endif
  569. #define DGEMM_DEFAULT_UNROLL_M 2
  570. #define SGEMM_DEFAULT_UNROLL_N 2
  571. #define DGEMM_DEFAULT_UNROLL_N 2
  572. #define QGEMM_DEFAULT_UNROLL_M 2
  573. #define QGEMM_DEFAULT_UNROLL_N 2
  574. #define CGEMM_DEFAULT_UNROLL_N 1
  575. #define ZGEMM_DEFAULT_UNROLL_M 1
  576. #define ZGEMM_DEFAULT_UNROLL_N 1
  577. #define XGEMM_DEFAULT_UNROLL_M 1
  578. #define XGEMM_DEFAULT_UNROLL_N 1
  579. #define SGEMM_DEFAULT_P sgemm_p
  580. #define SGEMM_DEFAULT_Q 256
  581. #define SGEMM_DEFAULT_R sgemm_r
  582. #define DGEMM_DEFAULT_P dgemm_p
  583. #define DGEMM_DEFAULT_Q 256
  584. #define DGEMM_DEFAULT_R dgemm_r
  585. #define QGEMM_DEFAULT_P qgemm_p
  586. #define QGEMM_DEFAULT_Q 256
  587. #define QGEMM_DEFAULT_R qgemm_r
  588. #define CGEMM_DEFAULT_P cgemm_p
  589. #define CGEMM_DEFAULT_Q 256
  590. #define CGEMM_DEFAULT_R cgemm_r
  591. #define ZGEMM_DEFAULT_P zgemm_p
  592. #define ZGEMM_DEFAULT_Q 256
  593. #define ZGEMM_DEFAULT_R zgemm_r
  594. #define XGEMM_DEFAULT_P xgemm_p
  595. #define XGEMM_DEFAULT_Q 256
  596. #define XGEMM_DEFAULT_R xgemm_r
  597. #define SYMV_P 4
  598. #endif
  599. #ifdef PENTIUMM
  600. #define SNUMOPT 2
  601. #define DNUMOPT 1
  602. #define GEMM_DEFAULT_OFFSET_A 0
  603. #define GEMM_DEFAULT_OFFSET_B 0
  604. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  605. #ifdef CORE_YONAH
  606. #define SGEMM_DEFAULT_UNROLL_M 4
  607. #define SGEMM_DEFAULT_UNROLL_N 4
  608. #define DGEMM_DEFAULT_UNROLL_M 2
  609. #define DGEMM_DEFAULT_UNROLL_N 4
  610. #define QGEMM_DEFAULT_UNROLL_M 2
  611. #define QGEMM_DEFAULT_UNROLL_N 2
  612. #define CGEMM_DEFAULT_UNROLL_M 2
  613. #define CGEMM_DEFAULT_UNROLL_N 2
  614. #define ZGEMM_DEFAULT_UNROLL_M 1
  615. #define ZGEMM_DEFAULT_UNROLL_N 2
  616. #define XGEMM_DEFAULT_UNROLL_M 1
  617. #define XGEMM_DEFAULT_UNROLL_N 1
  618. #else
  619. #define SGEMM_DEFAULT_UNROLL_M 8
  620. #define SGEMM_DEFAULT_UNROLL_N 2
  621. #define DGEMM_DEFAULT_UNROLL_M 2
  622. #define DGEMM_DEFAULT_UNROLL_N 2
  623. #define QGEMM_DEFAULT_UNROLL_M 2
  624. #define QGEMM_DEFAULT_UNROLL_N 2
  625. #define CGEMM_DEFAULT_UNROLL_M 4
  626. #define CGEMM_DEFAULT_UNROLL_N 1
  627. #define ZGEMM_DEFAULT_UNROLL_M 1
  628. #define ZGEMM_DEFAULT_UNROLL_N 1
  629. #define XGEMM_DEFAULT_UNROLL_M 1
  630. #define XGEMM_DEFAULT_UNROLL_N 1
  631. #endif
  632. #define SGEMM_DEFAULT_P sgemm_p
  633. #define SGEMM_DEFAULT_Q 256
  634. #define SGEMM_DEFAULT_R sgemm_r
  635. #define DGEMM_DEFAULT_P dgemm_p
  636. #define DGEMM_DEFAULT_Q 256
  637. #define DGEMM_DEFAULT_R dgemm_r
  638. #define QGEMM_DEFAULT_P qgemm_p
  639. #define QGEMM_DEFAULT_Q 256
  640. #define QGEMM_DEFAULT_R qgemm_r
  641. #define CGEMM_DEFAULT_P cgemm_p
  642. #define CGEMM_DEFAULT_Q 256
  643. #define CGEMM_DEFAULT_R cgemm_r
  644. #define ZGEMM_DEFAULT_P zgemm_p
  645. #define ZGEMM_DEFAULT_Q 256
  646. #define ZGEMM_DEFAULT_R zgemm_r
  647. #define XGEMM_DEFAULT_P xgemm_p
  648. #define XGEMM_DEFAULT_Q 256
  649. #define XGEMM_DEFAULT_R xgemm_r
  650. #define SYMV_P 4
  651. #endif
  652. #ifdef CORE_NORTHWOOD
  653. #define SNUMOPT 4
  654. #define DNUMOPT 2
  655. #define GEMM_DEFAULT_OFFSET_A 0
  656. #define GEMM_DEFAULT_OFFSET_B 32
  657. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  658. #define SYMV_P 8
  659. #define SGEMM_DEFAULT_UNROLL_M 8
  660. #define DGEMM_DEFAULT_UNROLL_M 4
  661. #define QGEMM_DEFAULT_UNROLL_M 2
  662. #define CGEMM_DEFAULT_UNROLL_M 4
  663. #define ZGEMM_DEFAULT_UNROLL_M 2
  664. #define XGEMM_DEFAULT_UNROLL_M 1
  665. #define SGEMM_DEFAULT_UNROLL_N 2
  666. #define DGEMM_DEFAULT_UNROLL_N 2
  667. #define QGEMM_DEFAULT_UNROLL_N 2
  668. #define CGEMM_DEFAULT_UNROLL_N 1
  669. #define ZGEMM_DEFAULT_UNROLL_N 1
  670. #define XGEMM_DEFAULT_UNROLL_N 1
  671. #define SGEMM_DEFAULT_P sgemm_p
  672. #define SGEMM_DEFAULT_R sgemm_r
  673. #define DGEMM_DEFAULT_P dgemm_p
  674. #define DGEMM_DEFAULT_R dgemm_r
  675. #define QGEMM_DEFAULT_P qgemm_p
  676. #define QGEMM_DEFAULT_R qgemm_r
  677. #define CGEMM_DEFAULT_P cgemm_p
  678. #define CGEMM_DEFAULT_R cgemm_r
  679. #define ZGEMM_DEFAULT_P zgemm_p
  680. #define ZGEMM_DEFAULT_R zgemm_r
  681. #define XGEMM_DEFAULT_P xgemm_p
  682. #define XGEMM_DEFAULT_R xgemm_r
  683. #define SGEMM_DEFAULT_Q 128
  684. #define DGEMM_DEFAULT_Q 128
  685. #define QGEMM_DEFAULT_Q 128
  686. #define CGEMM_DEFAULT_Q 128
  687. #define ZGEMM_DEFAULT_Q 128
  688. #define XGEMM_DEFAULT_Q 128
  689. #endif
  690. #ifdef CORE_PRESCOTT
  691. #define SNUMOPT 4
  692. #define DNUMOPT 2
  693. #ifndef __64BIT__
  694. #define GEMM_DEFAULT_OFFSET_A 128
  695. #define GEMM_DEFAULT_OFFSET_B 192
  696. #else
  697. #define GEMM_DEFAULT_OFFSET_A 0
  698. #define GEMM_DEFAULT_OFFSET_B 256
  699. #endif
  700. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  701. #define SYMV_P 8
  702. #ifdef ARCH_X86
  703. #define SGEMM_DEFAULT_UNROLL_M 4
  704. #define DGEMM_DEFAULT_UNROLL_M 2
  705. #define QGEMM_DEFAULT_UNROLL_M 2
  706. #define CGEMM_DEFAULT_UNROLL_M 2
  707. #define ZGEMM_DEFAULT_UNROLL_M 1
  708. #define XGEMM_DEFAULT_UNROLL_M 1
  709. #else
  710. #define SGEMM_DEFAULT_UNROLL_M 8
  711. #define DGEMM_DEFAULT_UNROLL_M 4
  712. #define QGEMM_DEFAULT_UNROLL_M 2
  713. #define CGEMM_DEFAULT_UNROLL_M 4
  714. #define ZGEMM_DEFAULT_UNROLL_M 2
  715. #define XGEMM_DEFAULT_UNROLL_M 1
  716. #endif
  717. #define SGEMM_DEFAULT_UNROLL_N 4
  718. #define DGEMM_DEFAULT_UNROLL_N 4
  719. #define QGEMM_DEFAULT_UNROLL_N 2
  720. #define CGEMM_DEFAULT_UNROLL_N 2
  721. #define ZGEMM_DEFAULT_UNROLL_N 2
  722. #define XGEMM_DEFAULT_UNROLL_N 1
  723. #define SGEMM_DEFAULT_P sgemm_p
  724. #define SGEMM_DEFAULT_R sgemm_r
  725. #define DGEMM_DEFAULT_P dgemm_p
  726. #define DGEMM_DEFAULT_R dgemm_r
  727. #define QGEMM_DEFAULT_P qgemm_p
  728. #define QGEMM_DEFAULT_R qgemm_r
  729. #define CGEMM_DEFAULT_P cgemm_p
  730. #define CGEMM_DEFAULT_R cgemm_r
  731. #define ZGEMM_DEFAULT_P zgemm_p
  732. #define ZGEMM_DEFAULT_R zgemm_r
  733. #define XGEMM_DEFAULT_P xgemm_p
  734. #define XGEMM_DEFAULT_R xgemm_r
  735. #define SGEMM_DEFAULT_Q 128
  736. #define DGEMM_DEFAULT_Q 128
  737. #define QGEMM_DEFAULT_Q 128
  738. #define CGEMM_DEFAULT_Q 128
  739. #define ZGEMM_DEFAULT_Q 128
  740. #define XGEMM_DEFAULT_Q 128
  741. #endif
  742. #ifdef CORE2
  743. #define SNUMOPT 8
  744. #define DNUMOPT 4
  745. #define GEMM_DEFAULT_OFFSET_A 448
  746. #define GEMM_DEFAULT_OFFSET_B 128
  747. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  748. #define SYMV_P 8
  749. #define SWITCH_RATIO 4
  750. #ifdef ARCH_X86
  751. #define SGEMM_DEFAULT_UNROLL_M 8
  752. #define DGEMM_DEFAULT_UNROLL_M 4
  753. #define QGEMM_DEFAULT_UNROLL_M 2
  754. #define CGEMM_DEFAULT_UNROLL_M 4
  755. #define ZGEMM_DEFAULT_UNROLL_M 2
  756. #define XGEMM_DEFAULT_UNROLL_M 1
  757. #define SGEMM_DEFAULT_UNROLL_N 2
  758. #define DGEMM_DEFAULT_UNROLL_N 2
  759. #define QGEMM_DEFAULT_UNROLL_N 2
  760. #define CGEMM_DEFAULT_UNROLL_N 1
  761. #define ZGEMM_DEFAULT_UNROLL_N 1
  762. #define XGEMM_DEFAULT_UNROLL_N 1
  763. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  764. #else
  765. #define SGEMM_DEFAULT_UNROLL_M 8
  766. #define DGEMM_DEFAULT_UNROLL_M 4
  767. #define QGEMM_DEFAULT_UNROLL_M 2
  768. #define CGEMM_DEFAULT_UNROLL_M 4
  769. #define ZGEMM_DEFAULT_UNROLL_M 2
  770. #define XGEMM_DEFAULT_UNROLL_M 1
  771. #define SGEMM_DEFAULT_UNROLL_N 4
  772. #define DGEMM_DEFAULT_UNROLL_N 4
  773. #define QGEMM_DEFAULT_UNROLL_N 2
  774. #define CGEMM_DEFAULT_UNROLL_N 2
  775. #define ZGEMM_DEFAULT_UNROLL_N 2
  776. #define XGEMM_DEFAULT_UNROLL_N 1
  777. #endif
  778. #define SGEMM_DEFAULT_P sgemm_p
  779. #define SGEMM_DEFAULT_R sgemm_r
  780. #define DGEMM_DEFAULT_P dgemm_p
  781. #define DGEMM_DEFAULT_R dgemm_r
  782. #define QGEMM_DEFAULT_P qgemm_p
  783. #define QGEMM_DEFAULT_R qgemm_r
  784. #define CGEMM_DEFAULT_P cgemm_p
  785. #define CGEMM_DEFAULT_R cgemm_r
  786. #define ZGEMM_DEFAULT_P zgemm_p
  787. #define ZGEMM_DEFAULT_R zgemm_r
  788. #define XGEMM_DEFAULT_P xgemm_p
  789. #define XGEMM_DEFAULT_R xgemm_r
  790. #define SGEMM_DEFAULT_Q 256
  791. #define DGEMM_DEFAULT_Q 256
  792. #define QGEMM_DEFAULT_Q 256
  793. #define CGEMM_DEFAULT_Q 256
  794. #define ZGEMM_DEFAULT_Q 256
  795. #define XGEMM_DEFAULT_Q 256
  796. #endif
  797. #ifdef PENRYN
  798. #define SNUMOPT 8
  799. #define DNUMOPT 4
  800. #define GEMM_DEFAULT_OFFSET_A 128
  801. #define GEMM_DEFAULT_OFFSET_B 0
  802. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  803. #define SYMV_P 8
  804. #define SWITCH_RATIO 4
  805. #ifdef ARCH_X86
  806. #define SGEMM_DEFAULT_UNROLL_M 4
  807. #define DGEMM_DEFAULT_UNROLL_M 2
  808. #define QGEMM_DEFAULT_UNROLL_M 2
  809. #define CGEMM_DEFAULT_UNROLL_M 2
  810. #define ZGEMM_DEFAULT_UNROLL_M 1
  811. #define XGEMM_DEFAULT_UNROLL_M 1
  812. #define SGEMM_DEFAULT_UNROLL_N 4
  813. #define DGEMM_DEFAULT_UNROLL_N 4
  814. #define QGEMM_DEFAULT_UNROLL_N 2
  815. #define CGEMM_DEFAULT_UNROLL_N 2
  816. #define ZGEMM_DEFAULT_UNROLL_N 2
  817. #define XGEMM_DEFAULT_UNROLL_N 1
  818. #else
  819. #define SGEMM_DEFAULT_UNROLL_M 8
  820. #define DGEMM_DEFAULT_UNROLL_M 4
  821. #define QGEMM_DEFAULT_UNROLL_M 2
  822. #define CGEMM_DEFAULT_UNROLL_M 4
  823. #define ZGEMM_DEFAULT_UNROLL_M 2
  824. #define XGEMM_DEFAULT_UNROLL_M 1
  825. #define SGEMM_DEFAULT_UNROLL_N 4
  826. #define DGEMM_DEFAULT_UNROLL_N 4
  827. #define QGEMM_DEFAULT_UNROLL_N 2
  828. #define CGEMM_DEFAULT_UNROLL_N 2
  829. #define ZGEMM_DEFAULT_UNROLL_N 2
  830. #define XGEMM_DEFAULT_UNROLL_N 1
  831. #endif
  832. #define SGEMM_DEFAULT_P sgemm_p
  833. #define SGEMM_DEFAULT_R sgemm_r
  834. #define DGEMM_DEFAULT_P dgemm_p
  835. #define DGEMM_DEFAULT_R dgemm_r
  836. #define QGEMM_DEFAULT_P qgemm_p
  837. #define QGEMM_DEFAULT_R qgemm_r
  838. #define CGEMM_DEFAULT_P cgemm_p
  839. #define CGEMM_DEFAULT_R cgemm_r
  840. #define ZGEMM_DEFAULT_P zgemm_p
  841. #define ZGEMM_DEFAULT_R zgemm_r
  842. #define XGEMM_DEFAULT_P xgemm_p
  843. #define XGEMM_DEFAULT_R xgemm_r
  844. #define SGEMM_DEFAULT_Q 512
  845. #define DGEMM_DEFAULT_Q 256
  846. #define QGEMM_DEFAULT_Q 128
  847. #define CGEMM_DEFAULT_Q 512
  848. #define ZGEMM_DEFAULT_Q 256
  849. #define XGEMM_DEFAULT_Q 128
  850. #define GETRF_FACTOR 0.75
  851. #endif
  852. #ifdef DUNNINGTON
  853. #define SNUMOPT 8
  854. #define DNUMOPT 4
  855. #define GEMM_DEFAULT_OFFSET_A 128
  856. #define GEMM_DEFAULT_OFFSET_B 0
  857. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  858. #define SYMV_P 8
  859. #define SWITCH_RATIO 4
  860. #ifdef ARCH_X86
  861. #define SGEMM_DEFAULT_UNROLL_M 4
  862. #define DGEMM_DEFAULT_UNROLL_M 2
  863. #define QGEMM_DEFAULT_UNROLL_M 2
  864. #define CGEMM_DEFAULT_UNROLL_M 2
  865. #define ZGEMM_DEFAULT_UNROLL_M 1
  866. #define XGEMM_DEFAULT_UNROLL_M 1
  867. #define SGEMM_DEFAULT_UNROLL_N 4
  868. #define DGEMM_DEFAULT_UNROLL_N 4
  869. #define QGEMM_DEFAULT_UNROLL_N 2
  870. #define CGEMM_DEFAULT_UNROLL_N 2
  871. #define ZGEMM_DEFAULT_UNROLL_N 2
  872. #define XGEMM_DEFAULT_UNROLL_N 1
  873. #else
  874. #define SGEMM_DEFAULT_UNROLL_M 8
  875. #define DGEMM_DEFAULT_UNROLL_M 4
  876. #define QGEMM_DEFAULT_UNROLL_M 2
  877. #define CGEMM_DEFAULT_UNROLL_M 4
  878. #define ZGEMM_DEFAULT_UNROLL_M 2
  879. #define XGEMM_DEFAULT_UNROLL_M 1
  880. #define SGEMM_DEFAULT_UNROLL_N 4
  881. #define DGEMM_DEFAULT_UNROLL_N 4
  882. #define QGEMM_DEFAULT_UNROLL_N 2
  883. #define CGEMM_DEFAULT_UNROLL_N 2
  884. #define ZGEMM_DEFAULT_UNROLL_N 2
  885. #define XGEMM_DEFAULT_UNROLL_N 1
  886. #endif
  887. #define SGEMM_DEFAULT_P sgemm_p
  888. #define SGEMM_DEFAULT_R sgemm_r
  889. #define DGEMM_DEFAULT_P dgemm_p
  890. #define DGEMM_DEFAULT_R dgemm_r
  891. #define QGEMM_DEFAULT_P qgemm_p
  892. #define QGEMM_DEFAULT_R qgemm_r
  893. #define CGEMM_DEFAULT_P cgemm_p
  894. #define CGEMM_DEFAULT_R cgemm_r
  895. #define ZGEMM_DEFAULT_P zgemm_p
  896. #define ZGEMM_DEFAULT_R zgemm_r
  897. #define XGEMM_DEFAULT_P xgemm_p
  898. #define XGEMM_DEFAULT_R xgemm_r
  899. #define SGEMM_DEFAULT_Q 768
  900. #define DGEMM_DEFAULT_Q 384
  901. #define QGEMM_DEFAULT_Q 192
  902. #define CGEMM_DEFAULT_Q 768
  903. #define ZGEMM_DEFAULT_Q 384
  904. #define XGEMM_DEFAULT_Q 192
  905. #define GETRF_FACTOR 0.75
  906. #define GEMM_THREAD gemm_thread_mn
  907. #endif
  908. #ifdef NEHALEM
  909. #define SNUMOPT 8
  910. #define DNUMOPT 4
  911. #define GEMM_DEFAULT_OFFSET_A 32
  912. #define GEMM_DEFAULT_OFFSET_B 0
  913. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  914. #define SYMV_P 8
  915. #define SWITCH_RATIO 4
  916. #ifdef ARCH_X86
  917. #define SGEMM_DEFAULT_UNROLL_M 4
  918. #define DGEMM_DEFAULT_UNROLL_M 2
  919. #define QGEMM_DEFAULT_UNROLL_M 2
  920. #define CGEMM_DEFAULT_UNROLL_M 2
  921. #define ZGEMM_DEFAULT_UNROLL_M 1
  922. #define XGEMM_DEFAULT_UNROLL_M 1
  923. #define SGEMM_DEFAULT_UNROLL_N 4
  924. #define DGEMM_DEFAULT_UNROLL_N 4
  925. #define QGEMM_DEFAULT_UNROLL_N 2
  926. #define CGEMM_DEFAULT_UNROLL_N 2
  927. #define ZGEMM_DEFAULT_UNROLL_N 2
  928. #define XGEMM_DEFAULT_UNROLL_N 1
  929. #else
  930. #define SGEMM_DEFAULT_UNROLL_M 4
  931. #define DGEMM_DEFAULT_UNROLL_M 2
  932. #define QGEMM_DEFAULT_UNROLL_M 2
  933. #define CGEMM_DEFAULT_UNROLL_M 2
  934. #define ZGEMM_DEFAULT_UNROLL_M 1
  935. #define XGEMM_DEFAULT_UNROLL_M 1
  936. #define SGEMM_DEFAULT_UNROLL_N 8
  937. #define DGEMM_DEFAULT_UNROLL_N 8
  938. #define QGEMM_DEFAULT_UNROLL_N 2
  939. #define CGEMM_DEFAULT_UNROLL_N 4
  940. #define ZGEMM_DEFAULT_UNROLL_N 4
  941. #define XGEMM_DEFAULT_UNROLL_N 1
  942. #endif
  943. #define SGEMM_DEFAULT_P 504
  944. #define SGEMM_DEFAULT_R sgemm_r
  945. #define DGEMM_DEFAULT_P 504
  946. #define DGEMM_DEFAULT_R dgemm_r
  947. #define QGEMM_DEFAULT_P 504
  948. #define QGEMM_DEFAULT_R qgemm_r
  949. #define CGEMM_DEFAULT_P 252
  950. #define CGEMM_DEFAULT_R cgemm_r
  951. #define ZGEMM_DEFAULT_P 252
  952. #define ZGEMM_DEFAULT_R zgemm_r
  953. #define XGEMM_DEFAULT_P 252
  954. #define XGEMM_DEFAULT_R xgemm_r
  955. #define SGEMM_DEFAULT_Q 512
  956. #define DGEMM_DEFAULT_Q 256
  957. #define QGEMM_DEFAULT_Q 128
  958. #define CGEMM_DEFAULT_Q 512
  959. #define ZGEMM_DEFAULT_Q 256
  960. #define XGEMM_DEFAULT_Q 128
  961. #define GETRF_FACTOR 0.72
  962. #endif
  963. #ifdef SANDYBRIDGE
  964. #define SNUMOPT 8
  965. #define DNUMOPT 4
  966. #define GEMM_DEFAULT_OFFSET_A 0
  967. #define GEMM_DEFAULT_OFFSET_B 0
  968. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  969. #define SYMV_P 8
  970. #define SWITCH_RATIO 4
  971. #ifdef ARCH_X86
  972. #define SGEMM_DEFAULT_UNROLL_M 4
  973. #define DGEMM_DEFAULT_UNROLL_M 2
  974. #define QGEMM_DEFAULT_UNROLL_M 2
  975. #define CGEMM_DEFAULT_UNROLL_M 2
  976. #define ZGEMM_DEFAULT_UNROLL_M 1
  977. #define XGEMM_DEFAULT_UNROLL_M 1
  978. #define SGEMM_DEFAULT_UNROLL_N 4
  979. #define DGEMM_DEFAULT_UNROLL_N 4
  980. #define QGEMM_DEFAULT_UNROLL_N 2
  981. #define CGEMM_DEFAULT_UNROLL_N 2
  982. #define ZGEMM_DEFAULT_UNROLL_N 2
  983. #define XGEMM_DEFAULT_UNROLL_N 1
  984. #else
  985. #define SGEMM_DEFAULT_UNROLL_M 16
  986. #define DGEMM_DEFAULT_UNROLL_M 8
  987. #define QGEMM_DEFAULT_UNROLL_M 2
  988. #define CGEMM_DEFAULT_UNROLL_M 8
  989. #define ZGEMM_DEFAULT_UNROLL_M 1
  990. #define XGEMM_DEFAULT_UNROLL_M 1
  991. #define SGEMM_DEFAULT_UNROLL_N 4
  992. #define DGEMM_DEFAULT_UNROLL_N 4
  993. #define QGEMM_DEFAULT_UNROLL_N 2
  994. #define CGEMM_DEFAULT_UNROLL_N 2
  995. #define ZGEMM_DEFAULT_UNROLL_N 4
  996. #define XGEMM_DEFAULT_UNROLL_N 1
  997. #endif
  998. #define SGEMM_DEFAULT_P 768
  999. #define SGEMM_DEFAULT_R sgemm_r
  1000. //#define SGEMM_DEFAULT_R 1024
  1001. #define DGEMM_DEFAULT_P 512
  1002. #define DGEMM_DEFAULT_R dgemm_r
  1003. //#define DGEMM_DEFAULT_R 1024
  1004. #define QGEMM_DEFAULT_P 504
  1005. #define QGEMM_DEFAULT_R qgemm_r
  1006. #define CGEMM_DEFAULT_P 768
  1007. #define CGEMM_DEFAULT_R cgemm_r
  1008. //#define CGEMM_DEFAULT_R 1024
  1009. #define ZGEMM_DEFAULT_P 512
  1010. #define ZGEMM_DEFAULT_R zgemm_r
  1011. //#define ZGEMM_DEFAULT_R 1024
  1012. #define XGEMM_DEFAULT_P 252
  1013. #define XGEMM_DEFAULT_R xgemm_r
  1014. #define SGEMM_DEFAULT_Q 384
  1015. #define DGEMM_DEFAULT_Q 256
  1016. #define QGEMM_DEFAULT_Q 128
  1017. #define CGEMM_DEFAULT_Q 512
  1018. #define ZGEMM_DEFAULT_Q 192
  1019. #define XGEMM_DEFAULT_Q 128
  1020. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1021. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1022. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1023. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1024. #define CGEMM3M_DEFAULT_P 448
  1025. #define ZGEMM3M_DEFAULT_P 224
  1026. #define XGEMM3M_DEFAULT_P 112
  1027. #define CGEMM3M_DEFAULT_Q 224
  1028. #define ZGEMM3M_DEFAULT_Q 224
  1029. #define XGEMM3M_DEFAULT_Q 224
  1030. #define CGEMM3M_DEFAULT_R 12288
  1031. #define ZGEMM3M_DEFAULT_R 12288
  1032. #define XGEMM3M_DEFAULT_R 12288
  1033. #define GETRF_FACTOR 0.72
  1034. #endif
  1035. #ifdef HASWELL
  1036. #define SNUMOPT 16
  1037. #define DNUMOPT 8
  1038. #define GEMM_DEFAULT_OFFSET_A 0
  1039. #define GEMM_DEFAULT_OFFSET_B 0
  1040. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1041. #define SYMV_P 8
  1042. #define SWITCH_RATIO 4
  1043. #ifdef ARCH_X86
  1044. #define SGEMM_DEFAULT_UNROLL_M 4
  1045. #define DGEMM_DEFAULT_UNROLL_M 2
  1046. #define QGEMM_DEFAULT_UNROLL_M 2
  1047. #define CGEMM_DEFAULT_UNROLL_M 2
  1048. #define ZGEMM_DEFAULT_UNROLL_M 1
  1049. #define XGEMM_DEFAULT_UNROLL_M 1
  1050. #define SGEMM_DEFAULT_UNROLL_N 4
  1051. #define DGEMM_DEFAULT_UNROLL_N 4
  1052. #define QGEMM_DEFAULT_UNROLL_N 2
  1053. #define CGEMM_DEFAULT_UNROLL_N 2
  1054. #define ZGEMM_DEFAULT_UNROLL_N 2
  1055. #define XGEMM_DEFAULT_UNROLL_N 1
  1056. #else
  1057. #define SGEMM_DEFAULT_UNROLL_M 16
  1058. #define DGEMM_DEFAULT_UNROLL_M 4
  1059. #define QGEMM_DEFAULT_UNROLL_M 2
  1060. #define CGEMM_DEFAULT_UNROLL_M 8
  1061. #define ZGEMM_DEFAULT_UNROLL_M 4
  1062. #define XGEMM_DEFAULT_UNROLL_M 1
  1063. #define SGEMM_DEFAULT_UNROLL_N 4
  1064. #define DGEMM_DEFAULT_UNROLL_N 4
  1065. #define QGEMM_DEFAULT_UNROLL_N 2
  1066. #define CGEMM_DEFAULT_UNROLL_N 2
  1067. #define ZGEMM_DEFAULT_UNROLL_N 2
  1068. #define XGEMM_DEFAULT_UNROLL_N 1
  1069. #define DGEMM_DEFAULT_UNROLL_MN 16
  1070. #endif
  1071. #ifdef ARCH_X86
  1072. #define SGEMM_DEFAULT_P 512
  1073. #define SGEMM_DEFAULT_R sgemm_r
  1074. #define DGEMM_DEFAULT_P 512
  1075. #define DGEMM_DEFAULT_R dgemm_r
  1076. #define QGEMM_DEFAULT_P 504
  1077. #define QGEMM_DEFAULT_R qgemm_r
  1078. #define CGEMM_DEFAULT_P 128
  1079. #define CGEMM_DEFAULT_R 1024
  1080. #define ZGEMM_DEFAULT_P 512
  1081. #define ZGEMM_DEFAULT_R zgemm_r
  1082. #define XGEMM_DEFAULT_P 252
  1083. #define XGEMM_DEFAULT_R xgemm_r
  1084. #define SGEMM_DEFAULT_Q 256
  1085. #define DGEMM_DEFAULT_Q 256
  1086. #define QGEMM_DEFAULT_Q 128
  1087. #define CGEMM_DEFAULT_Q 256
  1088. #define ZGEMM_DEFAULT_Q 192
  1089. #define XGEMM_DEFAULT_Q 128
  1090. #else
  1091. #define SGEMM_DEFAULT_P 768
  1092. #define DGEMM_DEFAULT_P 512
  1093. #define CGEMM_DEFAULT_P 384
  1094. #define ZGEMM_DEFAULT_P 256
  1095. #ifdef WINDOWS_ABI
  1096. #define SGEMM_DEFAULT_Q 320
  1097. #define DGEMM_DEFAULT_Q 128
  1098. #else
  1099. #define SGEMM_DEFAULT_Q 384
  1100. #define DGEMM_DEFAULT_Q 256
  1101. #endif
  1102. #define CGEMM_DEFAULT_Q 192
  1103. #define ZGEMM_DEFAULT_Q 128
  1104. #define SGEMM_DEFAULT_R sgemm_r
  1105. #define DGEMM_DEFAULT_R 13824
  1106. #define CGEMM_DEFAULT_R cgemm_r
  1107. #define ZGEMM_DEFAULT_R zgemm_r
  1108. #define QGEMM_DEFAULT_Q 128
  1109. #define QGEMM_DEFAULT_P 504
  1110. #define QGEMM_DEFAULT_R qgemm_r
  1111. #define XGEMM_DEFAULT_P 252
  1112. #define XGEMM_DEFAULT_R xgemm_r
  1113. #define XGEMM_DEFAULT_Q 128
  1114. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1115. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1116. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1117. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1118. #define CGEMM3M_DEFAULT_P 448
  1119. #define ZGEMM3M_DEFAULT_P 224
  1120. #define XGEMM3M_DEFAULT_P 112
  1121. #define CGEMM3M_DEFAULT_Q 224
  1122. #define ZGEMM3M_DEFAULT_Q 224
  1123. #define XGEMM3M_DEFAULT_Q 224
  1124. #define CGEMM3M_DEFAULT_R 12288
  1125. #define ZGEMM3M_DEFAULT_R 12288
  1126. #define XGEMM3M_DEFAULT_R 12288
  1127. #endif
  1128. #endif
  1129. #ifdef ATOM
  1130. #define SNUMOPT 2
  1131. #define DNUMOPT 1
  1132. #define GEMM_DEFAULT_OFFSET_A 64
  1133. #define GEMM_DEFAULT_OFFSET_B 0
  1134. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1135. #define SYMV_P 8
  1136. #ifdef ARCH_X86
  1137. #define SGEMM_DEFAULT_UNROLL_M 4
  1138. #define DGEMM_DEFAULT_UNROLL_M 2
  1139. #define QGEMM_DEFAULT_UNROLL_M 2
  1140. #define CGEMM_DEFAULT_UNROLL_M 2
  1141. #define ZGEMM_DEFAULT_UNROLL_M 1
  1142. #define XGEMM_DEFAULT_UNROLL_M 1
  1143. #else
  1144. #define SGEMM_DEFAULT_UNROLL_M 8
  1145. #define DGEMM_DEFAULT_UNROLL_M 4
  1146. #define QGEMM_DEFAULT_UNROLL_M 2
  1147. #define CGEMM_DEFAULT_UNROLL_M 4
  1148. #define ZGEMM_DEFAULT_UNROLL_M 2
  1149. #define XGEMM_DEFAULT_UNROLL_M 1
  1150. #endif
  1151. #define SGEMM_DEFAULT_UNROLL_N 4
  1152. #define DGEMM_DEFAULT_UNROLL_N 2
  1153. #define QGEMM_DEFAULT_UNROLL_N 2
  1154. #define CGEMM_DEFAULT_UNROLL_N 2
  1155. #define ZGEMM_DEFAULT_UNROLL_N 1
  1156. #define XGEMM_DEFAULT_UNROLL_N 1
  1157. #define SGEMM_DEFAULT_P sgemm_p
  1158. #define SGEMM_DEFAULT_R sgemm_r
  1159. #define DGEMM_DEFAULT_P dgemm_p
  1160. #define DGEMM_DEFAULT_R dgemm_r
  1161. #define QGEMM_DEFAULT_P qgemm_p
  1162. #define QGEMM_DEFAULT_R qgemm_r
  1163. #define CGEMM_DEFAULT_P cgemm_p
  1164. #define CGEMM_DEFAULT_R cgemm_r
  1165. #define ZGEMM_DEFAULT_P zgemm_p
  1166. #define ZGEMM_DEFAULT_R zgemm_r
  1167. #define XGEMM_DEFAULT_P xgemm_p
  1168. #define XGEMM_DEFAULT_R xgemm_r
  1169. #define SGEMM_DEFAULT_Q 256
  1170. #define DGEMM_DEFAULT_Q 256
  1171. #define QGEMM_DEFAULT_Q 256
  1172. #define CGEMM_DEFAULT_Q 256
  1173. #define ZGEMM_DEFAULT_Q 256
  1174. #define XGEMM_DEFAULT_Q 256
  1175. #endif
  1176. #ifdef ITANIUM2
  1177. #define SNUMOPT 4
  1178. #define DNUMOPT 4
  1179. #define GEMM_DEFAULT_OFFSET_A 0
  1180. #define GEMM_DEFAULT_OFFSET_B 128
  1181. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1182. #define SGEMM_DEFAULT_UNROLL_M 8
  1183. #define SGEMM_DEFAULT_UNROLL_N 8
  1184. #define DGEMM_DEFAULT_UNROLL_M 8
  1185. #define DGEMM_DEFAULT_UNROLL_N 8
  1186. #define QGEMM_DEFAULT_UNROLL_M 8
  1187. #define QGEMM_DEFAULT_UNROLL_N 8
  1188. #define CGEMM_DEFAULT_UNROLL_M 4
  1189. #define CGEMM_DEFAULT_UNROLL_N 4
  1190. #define ZGEMM_DEFAULT_UNROLL_M 4
  1191. #define ZGEMM_DEFAULT_UNROLL_N 4
  1192. #define XGEMM_DEFAULT_UNROLL_M 4
  1193. #define XGEMM_DEFAULT_UNROLL_N 4
  1194. #define SGEMM_DEFAULT_P sgemm_p
  1195. #define DGEMM_DEFAULT_P dgemm_p
  1196. #define QGEMM_DEFAULT_P qgemm_p
  1197. #define CGEMM_DEFAULT_P cgemm_p
  1198. #define ZGEMM_DEFAULT_P zgemm_p
  1199. #define XGEMM_DEFAULT_P xgemm_p
  1200. #define SGEMM_DEFAULT_Q 1024
  1201. #define DGEMM_DEFAULT_Q 1024
  1202. #define QGEMM_DEFAULT_Q 1024
  1203. #define CGEMM_DEFAULT_Q 1024
  1204. #define ZGEMM_DEFAULT_Q 1024
  1205. #define XGEMM_DEFAULT_Q 1024
  1206. #define SGEMM_DEFAULT_R sgemm_r
  1207. #define DGEMM_DEFAULT_R dgemm_r
  1208. #define QGEMM_DEFAULT_R qgemm_r
  1209. #define CGEMM_DEFAULT_R cgemm_r
  1210. #define ZGEMM_DEFAULT_R zgemm_r
  1211. #define XGEMM_DEFAULT_R xgemm_r
  1212. #define SYMV_P 16
  1213. #define GETRF_FACTOR 0.65
  1214. #endif
  1215. #if defined(EV4) || defined(EV5) || defined(EV6)
  1216. #ifdef EV4
  1217. #define SNUMOPT 1
  1218. #define DNUMOPT 1
  1219. #else
  1220. #define SNUMOPT 2
  1221. #define DNUMOPT 2
  1222. #endif
  1223. #define GEMM_DEFAULT_OFFSET_A 512
  1224. #define GEMM_DEFAULT_OFFSET_B 512
  1225. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1226. #define SGEMM_DEFAULT_UNROLL_M 4
  1227. #define SGEMM_DEFAULT_UNROLL_N 4
  1228. #define DGEMM_DEFAULT_UNROLL_M 4
  1229. #define DGEMM_DEFAULT_UNROLL_N 4
  1230. #define CGEMM_DEFAULT_UNROLL_M 2
  1231. #define CGEMM_DEFAULT_UNROLL_N 2
  1232. #define ZGEMM_DEFAULT_UNROLL_M 2
  1233. #define ZGEMM_DEFAULT_UNROLL_N 2
  1234. #define SYMV_P 8
  1235. #ifdef EV4
  1236. #define SGEMM_DEFAULT_P 32
  1237. #define SGEMM_DEFAULT_Q 112
  1238. #define SGEMM_DEFAULT_R 256
  1239. #define DGEMM_DEFAULT_P 32
  1240. #define DGEMM_DEFAULT_Q 56
  1241. #define DGEMM_DEFAULT_R 256
  1242. #define CGEMM_DEFAULT_P 32
  1243. #define CGEMM_DEFAULT_Q 64
  1244. #define CGEMM_DEFAULT_R 240
  1245. #define ZGEMM_DEFAULT_P 32
  1246. #define ZGEMM_DEFAULT_Q 32
  1247. #define ZGEMM_DEFAULT_R 240
  1248. #endif
  1249. #ifdef EV5
  1250. #define SGEMM_DEFAULT_P 64
  1251. #define SGEMM_DEFAULT_Q 256
  1252. #define DGEMM_DEFAULT_P 64
  1253. #define DGEMM_DEFAULT_Q 128
  1254. #define CGEMM_DEFAULT_P 64
  1255. #define CGEMM_DEFAULT_Q 128
  1256. #define ZGEMM_DEFAULT_P 64
  1257. #define ZGEMM_DEFAULT_Q 64
  1258. #endif
  1259. #ifdef EV6
  1260. #define SGEMM_DEFAULT_P 256
  1261. #define SGEMM_DEFAULT_Q 512
  1262. #define DGEMM_DEFAULT_P 256
  1263. #define DGEMM_DEFAULT_Q 256
  1264. #define CGEMM_DEFAULT_P 256
  1265. #define CGEMM_DEFAULT_Q 256
  1266. #define ZGEMM_DEFAULT_P 128
  1267. #define ZGEMM_DEFAULT_Q 256
  1268. #endif
  1269. #endif
  1270. #ifdef CELL
  1271. #define SNUMOPT 2
  1272. #define DNUMOPT 2
  1273. #define GEMM_DEFAULT_OFFSET_A 0
  1274. #define GEMM_DEFAULT_OFFSET_B 8192
  1275. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1276. #define SGEMM_DEFAULT_UNROLL_M 16
  1277. #define SGEMM_DEFAULT_UNROLL_N 4
  1278. #define DGEMM_DEFAULT_UNROLL_M 4
  1279. #define DGEMM_DEFAULT_UNROLL_N 4
  1280. #define CGEMM_DEFAULT_UNROLL_M 8
  1281. #define CGEMM_DEFAULT_UNROLL_N 2
  1282. #define ZGEMM_DEFAULT_UNROLL_M 2
  1283. #define ZGEMM_DEFAULT_UNROLL_N 2
  1284. #define SGEMM_DEFAULT_P 128
  1285. #define DGEMM_DEFAULT_P 128
  1286. #define CGEMM_DEFAULT_P 128
  1287. #define ZGEMM_DEFAULT_P 128
  1288. #define SGEMM_DEFAULT_Q 512
  1289. #define DGEMM_DEFAULT_Q 256
  1290. #define CGEMM_DEFAULT_Q 256
  1291. #define ZGEMM_DEFAULT_Q 128
  1292. #define SYMV_P 4
  1293. #endif
  1294. #ifdef PPCG4
  1295. #define GEMM_DEFAULT_OFFSET_A 0
  1296. #define GEMM_DEFAULT_OFFSET_B 1024
  1297. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1298. #define SGEMM_DEFAULT_UNROLL_M 16
  1299. #define SGEMM_DEFAULT_UNROLL_N 4
  1300. #define DGEMM_DEFAULT_UNROLL_M 4
  1301. #define DGEMM_DEFAULT_UNROLL_N 4
  1302. #define CGEMM_DEFAULT_UNROLL_M 8
  1303. #define CGEMM_DEFAULT_UNROLL_N 2
  1304. #define ZGEMM_DEFAULT_UNROLL_M 2
  1305. #define ZGEMM_DEFAULT_UNROLL_N 2
  1306. #define SGEMM_DEFAULT_P 256
  1307. #define DGEMM_DEFAULT_P 128
  1308. #define CGEMM_DEFAULT_P 128
  1309. #define ZGEMM_DEFAULT_P 64
  1310. #define SGEMM_DEFAULT_Q 256
  1311. #define DGEMM_DEFAULT_Q 256
  1312. #define CGEMM_DEFAULT_Q 256
  1313. #define ZGEMM_DEFAULT_Q 256
  1314. #define SYMV_P 4
  1315. #endif
  1316. #ifdef PPC970
  1317. #define SNUMOPT 4
  1318. #define DNUMOPT 4
  1319. #define GEMM_DEFAULT_OFFSET_A 2688
  1320. #define GEMM_DEFAULT_OFFSET_B 3072
  1321. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1322. #define SGEMM_DEFAULT_UNROLL_M 16
  1323. #define SGEMM_DEFAULT_UNROLL_N 4
  1324. #define DGEMM_DEFAULT_UNROLL_M 4
  1325. #define DGEMM_DEFAULT_UNROLL_N 4
  1326. #define CGEMM_DEFAULT_UNROLL_M 8
  1327. #define CGEMM_DEFAULT_UNROLL_N 2
  1328. #define ZGEMM_DEFAULT_UNROLL_M 2
  1329. #define ZGEMM_DEFAULT_UNROLL_N 2
  1330. #ifdef OS_LINUX
  1331. #if L2_SIZE == 1024976
  1332. #define SGEMM_DEFAULT_P 320
  1333. #define DGEMM_DEFAULT_P 256
  1334. #define CGEMM_DEFAULT_P 256
  1335. #define ZGEMM_DEFAULT_P 256
  1336. #else
  1337. #define SGEMM_DEFAULT_P 176
  1338. #define DGEMM_DEFAULT_P 176
  1339. #define CGEMM_DEFAULT_P 176
  1340. #define ZGEMM_DEFAULT_P 176
  1341. #endif
  1342. #endif
  1343. #define SGEMM_DEFAULT_Q 512
  1344. #define DGEMM_DEFAULT_Q 256
  1345. #define CGEMM_DEFAULT_Q 256
  1346. #define ZGEMM_DEFAULT_Q 128
  1347. #define SYMV_P 4
  1348. #endif
  1349. #ifdef PPC440
  1350. #define SNUMOPT 2
  1351. #define DNUMOPT 2
  1352. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1353. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1354. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1355. #define SGEMM_DEFAULT_UNROLL_M 4
  1356. #define SGEMM_DEFAULT_UNROLL_N 4
  1357. #define DGEMM_DEFAULT_UNROLL_M 4
  1358. #define DGEMM_DEFAULT_UNROLL_N 4
  1359. #define CGEMM_DEFAULT_UNROLL_M 2
  1360. #define CGEMM_DEFAULT_UNROLL_N 2
  1361. #define ZGEMM_DEFAULT_UNROLL_M 2
  1362. #define ZGEMM_DEFAULT_UNROLL_N 2
  1363. #define SGEMM_DEFAULT_P 512
  1364. #define DGEMM_DEFAULT_P 512
  1365. #define CGEMM_DEFAULT_P 512
  1366. #define ZGEMM_DEFAULT_P 512
  1367. #define SGEMM_DEFAULT_Q 1024
  1368. #define DGEMM_DEFAULT_Q 512
  1369. #define CGEMM_DEFAULT_Q 512
  1370. #define ZGEMM_DEFAULT_Q 256
  1371. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1372. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1373. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1374. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1375. #define SYMV_P 4
  1376. #endif
  1377. #ifdef PPC440FP2
  1378. #define SNUMOPT 4
  1379. #define DNUMOPT 4
  1380. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1381. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1382. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1383. #define SGEMM_DEFAULT_UNROLL_M 8
  1384. #define SGEMM_DEFAULT_UNROLL_N 4
  1385. #define DGEMM_DEFAULT_UNROLL_M 8
  1386. #define DGEMM_DEFAULT_UNROLL_N 4
  1387. #define CGEMM_DEFAULT_UNROLL_M 4
  1388. #define CGEMM_DEFAULT_UNROLL_N 2
  1389. #define ZGEMM_DEFAULT_UNROLL_M 4
  1390. #define ZGEMM_DEFAULT_UNROLL_N 2
  1391. #define SGEMM_DEFAULT_P 128
  1392. #define DGEMM_DEFAULT_P 128
  1393. #define CGEMM_DEFAULT_P 128
  1394. #define ZGEMM_DEFAULT_P 128
  1395. #if 1
  1396. #define SGEMM_DEFAULT_Q 4096
  1397. #define DGEMM_DEFAULT_Q 3072
  1398. #define CGEMM_DEFAULT_Q 2048
  1399. #define ZGEMM_DEFAULT_Q 1024
  1400. #else
  1401. #define SGEMM_DEFAULT_Q 512
  1402. #define DGEMM_DEFAULT_Q 256
  1403. #define CGEMM_DEFAULT_Q 256
  1404. #define ZGEMM_DEFAULT_Q 128
  1405. #endif
  1406. #define SYMV_P 4
  1407. #endif
  1408. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1409. #define GEMM_DEFAULT_OFFSET_A 0
  1410. #define GEMM_DEFAULT_OFFSET_B 2048
  1411. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1412. #define SGEMM_DEFAULT_UNROLL_M 4
  1413. #define SGEMM_DEFAULT_UNROLL_N 4
  1414. #define DGEMM_DEFAULT_UNROLL_M 4
  1415. #define DGEMM_DEFAULT_UNROLL_N 4
  1416. #define CGEMM_DEFAULT_UNROLL_M 2
  1417. #define CGEMM_DEFAULT_UNROLL_N 2
  1418. #define ZGEMM_DEFAULT_UNROLL_M 2
  1419. #define ZGEMM_DEFAULT_UNROLL_N 2
  1420. #ifdef POWER3
  1421. #define SNUMOPT 4
  1422. #define DNUMOPT 4
  1423. #define SGEMM_DEFAULT_P 256
  1424. #define SGEMM_DEFAULT_Q 432
  1425. #define SGEMM_DEFAULT_R 1012
  1426. #define DGEMM_DEFAULT_P 256
  1427. #define DGEMM_DEFAULT_Q 216
  1428. #define DGEMM_DEFAULT_R 1012
  1429. #define ZGEMM_DEFAULT_P 256
  1430. #define ZGEMM_DEFAULT_Q 104
  1431. #define ZGEMM_DEFAULT_R 1012
  1432. #endif
  1433. #if defined(POWER4)
  1434. #ifdef ALLOC_HUGETLB
  1435. #define SGEMM_DEFAULT_P 184
  1436. #define DGEMM_DEFAULT_P 184
  1437. #define CGEMM_DEFAULT_P 184
  1438. #define ZGEMM_DEFAULT_P 184
  1439. #else
  1440. #define SGEMM_DEFAULT_P 144
  1441. #define DGEMM_DEFAULT_P 144
  1442. #define CGEMM_DEFAULT_P 144
  1443. #define ZGEMM_DEFAULT_P 144
  1444. #endif
  1445. #endif
  1446. #if defined(POWER5)
  1447. #ifdef ALLOC_HUGETLB
  1448. #define SGEMM_DEFAULT_P 512
  1449. #define DGEMM_DEFAULT_P 256
  1450. #define CGEMM_DEFAULT_P 256
  1451. #define ZGEMM_DEFAULT_P 128
  1452. #else
  1453. #define SGEMM_DEFAULT_P 320
  1454. #define DGEMM_DEFAULT_P 160
  1455. #define CGEMM_DEFAULT_P 160
  1456. #define ZGEMM_DEFAULT_P 80
  1457. #endif
  1458. #define SGEMM_DEFAULT_Q 256
  1459. #define CGEMM_DEFAULT_Q 256
  1460. #define DGEMM_DEFAULT_Q 256
  1461. #define ZGEMM_DEFAULT_Q 256
  1462. #endif
  1463. #define SYMV_P 8
  1464. #endif
  1465. #if defined(POWER6)
  1466. #define SNUMOPT 4
  1467. #define DNUMOPT 4
  1468. #define GEMM_DEFAULT_OFFSET_A 384
  1469. #define GEMM_DEFAULT_OFFSET_B 1024
  1470. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1471. #define SGEMM_DEFAULT_UNROLL_M 4
  1472. #define SGEMM_DEFAULT_UNROLL_N 4
  1473. #define DGEMM_DEFAULT_UNROLL_M 4
  1474. #define DGEMM_DEFAULT_UNROLL_N 4
  1475. #define CGEMM_DEFAULT_UNROLL_M 2
  1476. #define CGEMM_DEFAULT_UNROLL_N 4
  1477. #define ZGEMM_DEFAULT_UNROLL_M 2
  1478. #define ZGEMM_DEFAULT_UNROLL_N 4
  1479. #define SGEMM_DEFAULT_P 992
  1480. #define DGEMM_DEFAULT_P 480
  1481. #define CGEMM_DEFAULT_P 488
  1482. #define ZGEMM_DEFAULT_P 248
  1483. #define SGEMM_DEFAULT_Q 504
  1484. #define DGEMM_DEFAULT_Q 504
  1485. #define CGEMM_DEFAULT_Q 400
  1486. #define ZGEMM_DEFAULT_Q 400
  1487. #define SYMV_P 8
  1488. #endif
  1489. #if defined(SPARC) && defined(V7)
  1490. #define SNUMOPT 4
  1491. #define DNUMOPT 4
  1492. #define GEMM_DEFAULT_OFFSET_A 0
  1493. #define GEMM_DEFAULT_OFFSET_B 2048
  1494. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1495. #define SGEMM_DEFAULT_UNROLL_M 2
  1496. #define SGEMM_DEFAULT_UNROLL_N 8
  1497. #define DGEMM_DEFAULT_UNROLL_M 2
  1498. #define DGEMM_DEFAULT_UNROLL_N 8
  1499. #define CGEMM_DEFAULT_UNROLL_M 1
  1500. #define CGEMM_DEFAULT_UNROLL_N 4
  1501. #define ZGEMM_DEFAULT_UNROLL_M 1
  1502. #define ZGEMM_DEFAULT_UNROLL_N 4
  1503. #define SGEMM_DEFAULT_P 256
  1504. #define DGEMM_DEFAULT_P 256
  1505. #define CGEMM_DEFAULT_P 256
  1506. #define ZGEMM_DEFAULT_P 256
  1507. #define SGEMM_DEFAULT_Q 512
  1508. #define DGEMM_DEFAULT_Q 256
  1509. #define CGEMM_DEFAULT_Q 256
  1510. #define ZGEMM_DEFAULT_Q 128
  1511. #define SYMV_P 8
  1512. #define GEMM_THREAD gemm_thread_mn
  1513. #endif
  1514. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1515. #define SNUMOPT 2
  1516. #define DNUMOPT 2
  1517. #define GEMM_DEFAULT_OFFSET_A 0
  1518. #define GEMM_DEFAULT_OFFSET_B 2048
  1519. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1520. #define SGEMM_DEFAULT_UNROLL_M 4
  1521. #define SGEMM_DEFAULT_UNROLL_N 4
  1522. #define DGEMM_DEFAULT_UNROLL_M 4
  1523. #define DGEMM_DEFAULT_UNROLL_N 4
  1524. #define CGEMM_DEFAULT_UNROLL_M 2
  1525. #define CGEMM_DEFAULT_UNROLL_N 2
  1526. #define ZGEMM_DEFAULT_UNROLL_M 2
  1527. #define ZGEMM_DEFAULT_UNROLL_N 2
  1528. #define SGEMM_DEFAULT_P 512
  1529. #define DGEMM_DEFAULT_P 512
  1530. #define CGEMM_DEFAULT_P 512
  1531. #define ZGEMM_DEFAULT_P 512
  1532. #define SGEMM_DEFAULT_Q 1024
  1533. #define DGEMM_DEFAULT_Q 512
  1534. #define CGEMM_DEFAULT_Q 512
  1535. #define ZGEMM_DEFAULT_Q 256
  1536. #define SYMV_P 8
  1537. #endif
  1538. #ifdef SICORTEX
  1539. #define SNUMOPT 2
  1540. #define DNUMOPT 2
  1541. #define GEMM_DEFAULT_OFFSET_A 0
  1542. #define GEMM_DEFAULT_OFFSET_B 0
  1543. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1544. #define SGEMM_DEFAULT_UNROLL_M 2
  1545. #define SGEMM_DEFAULT_UNROLL_N 8
  1546. #define DGEMM_DEFAULT_UNROLL_M 2
  1547. #define DGEMM_DEFAULT_UNROLL_N 8
  1548. #define CGEMM_DEFAULT_UNROLL_M 1
  1549. #define CGEMM_DEFAULT_UNROLL_N 4
  1550. #define ZGEMM_DEFAULT_UNROLL_M 1
  1551. #define ZGEMM_DEFAULT_UNROLL_N 4
  1552. #define SGEMM_DEFAULT_P 108
  1553. #define DGEMM_DEFAULT_P 112
  1554. #define CGEMM_DEFAULT_P 108
  1555. #define ZGEMM_DEFAULT_P 112
  1556. #define SGEMM_DEFAULT_Q 288
  1557. #define DGEMM_DEFAULT_Q 144
  1558. #define CGEMM_DEFAULT_Q 144
  1559. #define ZGEMM_DEFAULT_Q 72
  1560. #define SGEMM_DEFAULT_R 2000
  1561. #define DGEMM_DEFAULT_R 2000
  1562. #define CGEMM_DEFAULT_R 2000
  1563. #define ZGEMM_DEFAULT_R 2000
  1564. #define SYMV_P 16
  1565. #endif
  1566. #ifdef LOONGSON3A
  1567. ////Copy from SICORTEX
  1568. #define SNUMOPT 2
  1569. #define DNUMOPT 2
  1570. #define GEMM_DEFAULT_OFFSET_A 0
  1571. #define GEMM_DEFAULT_OFFSET_B 0
  1572. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1573. #define SGEMM_DEFAULT_UNROLL_M 8
  1574. #define SGEMM_DEFAULT_UNROLL_N 4
  1575. #define DGEMM_DEFAULT_UNROLL_M 4
  1576. #define DGEMM_DEFAULT_UNROLL_N 4
  1577. #define CGEMM_DEFAULT_UNROLL_M 4
  1578. #define CGEMM_DEFAULT_UNROLL_N 2
  1579. #define ZGEMM_DEFAULT_UNROLL_M 2
  1580. #define ZGEMM_DEFAULT_UNROLL_N 2
  1581. #define SGEMM_DEFAULT_P 64
  1582. #define DGEMM_DEFAULT_P 44
  1583. #define CGEMM_DEFAULT_P 64
  1584. #define ZGEMM_DEFAULT_P 32
  1585. #define SGEMM_DEFAULT_Q 192
  1586. #define DGEMM_DEFAULT_Q 92
  1587. #define CGEMM_DEFAULT_Q 128
  1588. #define ZGEMM_DEFAULT_Q 80
  1589. #define SGEMM_DEFAULT_R 640
  1590. #define DGEMM_DEFAULT_R dgemm_r
  1591. #define CGEMM_DEFAULT_R 640
  1592. #define ZGEMM_DEFAULT_R 640
  1593. #define GEMM_OFFSET_A1 0x10000
  1594. #define GEMM_OFFSET_B1 0x100000
  1595. #define SYMV_P 16
  1596. #endif
  1597. #ifdef LOONGSON3B
  1598. #define SNUMOPT 2
  1599. #define DNUMOPT 2
  1600. #define GEMM_DEFAULT_OFFSET_A 0
  1601. #define GEMM_DEFAULT_OFFSET_B 0
  1602. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1603. #define SGEMM_DEFAULT_UNROLL_M 2
  1604. #define SGEMM_DEFAULT_UNROLL_N 2
  1605. #define DGEMM_DEFAULT_UNROLL_M 2
  1606. #define DGEMM_DEFAULT_UNROLL_N 2
  1607. #define CGEMM_DEFAULT_UNROLL_M 2
  1608. #define CGEMM_DEFAULT_UNROLL_N 2
  1609. #define ZGEMM_DEFAULT_UNROLL_M 2
  1610. #define ZGEMM_DEFAULT_UNROLL_N 2
  1611. #define SGEMM_DEFAULT_P 64
  1612. #define DGEMM_DEFAULT_P 24
  1613. #define CGEMM_DEFAULT_P 24
  1614. #define ZGEMM_DEFAULT_P 20
  1615. #define SGEMM_DEFAULT_Q 192
  1616. #define DGEMM_DEFAULT_Q 128
  1617. #define CGEMM_DEFAULT_Q 128
  1618. #define ZGEMM_DEFAULT_Q 64
  1619. #define SGEMM_DEFAULT_R 512
  1620. #define DGEMM_DEFAULT_R 512
  1621. #define CGEMM_DEFAULT_R 512
  1622. #define ZGEMM_DEFAULT_R 512
  1623. #define GEMM_OFFSET_A1 0x10000
  1624. #define GEMM_OFFSET_B1 0x100000
  1625. #define SYMV_P 16
  1626. #endif
  1627. #ifdef ARMV7
  1628. #define SNUMOPT 2
  1629. #define DNUMOPT 2
  1630. #define GEMM_DEFAULT_OFFSET_A 0
  1631. #define GEMM_DEFAULT_OFFSET_B 0
  1632. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1633. #define SGEMM_DEFAULT_UNROLL_M 4
  1634. #define SGEMM_DEFAULT_UNROLL_N 4
  1635. #define DGEMM_DEFAULT_UNROLL_M 4
  1636. #define DGEMM_DEFAULT_UNROLL_N 4
  1637. #define CGEMM_DEFAULT_UNROLL_M 2
  1638. #define CGEMM_DEFAULT_UNROLL_N 2
  1639. #define ZGEMM_DEFAULT_UNROLL_M 2
  1640. #define ZGEMM_DEFAULT_UNROLL_N 2
  1641. #define SGEMM_DEFAULT_P 128
  1642. #define DGEMM_DEFAULT_P 128
  1643. #define CGEMM_DEFAULT_P 96
  1644. #define ZGEMM_DEFAULT_P 64
  1645. #define SGEMM_DEFAULT_Q 240
  1646. #define DGEMM_DEFAULT_Q 120
  1647. #define CGEMM_DEFAULT_Q 120
  1648. #define ZGEMM_DEFAULT_Q 120
  1649. #define SGEMM_DEFAULT_R 12288
  1650. #define DGEMM_DEFAULT_R 8192
  1651. #define CGEMM_DEFAULT_R 4096
  1652. #define ZGEMM_DEFAULT_R 4096
  1653. #define SYMV_P 16
  1654. #endif
  1655. #if defined(ARMV6)
  1656. #define SNUMOPT 2
  1657. #define DNUMOPT 2
  1658. #define GEMM_DEFAULT_OFFSET_A 0
  1659. #define GEMM_DEFAULT_OFFSET_B 0
  1660. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1661. #define SGEMM_DEFAULT_UNROLL_M 4
  1662. #define SGEMM_DEFAULT_UNROLL_N 2
  1663. #define DGEMM_DEFAULT_UNROLL_M 4
  1664. #define DGEMM_DEFAULT_UNROLL_N 2
  1665. #define CGEMM_DEFAULT_UNROLL_M 2
  1666. #define CGEMM_DEFAULT_UNROLL_N 2
  1667. #define ZGEMM_DEFAULT_UNROLL_M 2
  1668. #define ZGEMM_DEFAULT_UNROLL_N 2
  1669. #define SGEMM_DEFAULT_P 128
  1670. #define DGEMM_DEFAULT_P 128
  1671. #define CGEMM_DEFAULT_P 96
  1672. #define ZGEMM_DEFAULT_P 64
  1673. #define SGEMM_DEFAULT_Q 240
  1674. #define DGEMM_DEFAULT_Q 120
  1675. #define CGEMM_DEFAULT_Q 120
  1676. #define ZGEMM_DEFAULT_Q 120
  1677. #define SGEMM_DEFAULT_R 12288
  1678. #define DGEMM_DEFAULT_R 8192
  1679. #define CGEMM_DEFAULT_R 4096
  1680. #define ZGEMM_DEFAULT_R 4096
  1681. #define SYMV_P 16
  1682. #endif
  1683. #if defined(ARMV8)
  1684. #define SNUMOPT 2
  1685. #define DNUMOPT 2
  1686. #define GEMM_DEFAULT_OFFSET_A 0
  1687. #define GEMM_DEFAULT_OFFSET_B 0
  1688. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1689. #define SGEMM_DEFAULT_UNROLL_M 4
  1690. #define SGEMM_DEFAULT_UNROLL_N 4
  1691. #define DGEMM_DEFAULT_UNROLL_M 2
  1692. #define DGEMM_DEFAULT_UNROLL_N 2
  1693. #define CGEMM_DEFAULT_UNROLL_M 2
  1694. #define CGEMM_DEFAULT_UNROLL_N 2
  1695. #define ZGEMM_DEFAULT_UNROLL_M 2
  1696. #define ZGEMM_DEFAULT_UNROLL_N 2
  1697. #define SGEMM_DEFAULT_P 128
  1698. #define DGEMM_DEFAULT_P 128
  1699. #define CGEMM_DEFAULT_P 96
  1700. #define ZGEMM_DEFAULT_P 64
  1701. #define SGEMM_DEFAULT_Q 240
  1702. #define DGEMM_DEFAULT_Q 120
  1703. #define CGEMM_DEFAULT_Q 120
  1704. #define ZGEMM_DEFAULT_Q 120
  1705. #define SGEMM_DEFAULT_R 12288
  1706. #define DGEMM_DEFAULT_R 8192
  1707. #define CGEMM_DEFAULT_R 4096
  1708. #define ZGEMM_DEFAULT_R 4096
  1709. #define SYMV_P 16
  1710. #endif
  1711. #if defined(ARMV5)
  1712. #define SNUMOPT 2
  1713. #define DNUMOPT 2
  1714. #define GEMM_DEFAULT_OFFSET_A 0
  1715. #define GEMM_DEFAULT_OFFSET_B 0
  1716. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1717. #define SGEMM_DEFAULT_UNROLL_M 2
  1718. #define SGEMM_DEFAULT_UNROLL_N 2
  1719. #define DGEMM_DEFAULT_UNROLL_M 2
  1720. #define DGEMM_DEFAULT_UNROLL_N 2
  1721. #define CGEMM_DEFAULT_UNROLL_M 2
  1722. #define CGEMM_DEFAULT_UNROLL_N 2
  1723. #define ZGEMM_DEFAULT_UNROLL_M 2
  1724. #define ZGEMM_DEFAULT_UNROLL_N 2
  1725. #define SGEMM_DEFAULT_P 128
  1726. #define DGEMM_DEFAULT_P 128
  1727. #define CGEMM_DEFAULT_P 96
  1728. #define ZGEMM_DEFAULT_P 64
  1729. #define SGEMM_DEFAULT_Q 240
  1730. #define DGEMM_DEFAULT_Q 120
  1731. #define CGEMM_DEFAULT_Q 120
  1732. #define ZGEMM_DEFAULT_Q 120
  1733. #define SGEMM_DEFAULT_R 12288
  1734. #define DGEMM_DEFAULT_R 8192
  1735. #define CGEMM_DEFAULT_R 4096
  1736. #define ZGEMM_DEFAULT_R 4096
  1737. #define SYMV_P 16
  1738. #endif
  1739. #ifdef GENERIC
  1740. #define SNUMOPT 2
  1741. #define DNUMOPT 2
  1742. #define GEMM_DEFAULT_OFFSET_A 0
  1743. #define GEMM_DEFAULT_OFFSET_B 0
  1744. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1745. #define SGEMM_DEFAULT_UNROLL_N 2
  1746. #define DGEMM_DEFAULT_UNROLL_N 2
  1747. #define QGEMM_DEFAULT_UNROLL_N 2
  1748. #define CGEMM_DEFAULT_UNROLL_N 2
  1749. #define ZGEMM_DEFAULT_UNROLL_N 2
  1750. #define XGEMM_DEFAULT_UNROLL_N 1
  1751. #ifdef ARCH_X86
  1752. #define SGEMM_DEFAULT_UNROLL_M 2
  1753. #define DGEMM_DEFAULT_UNROLL_M 2
  1754. #define QGEMM_DEFAULT_UNROLL_M 2
  1755. #define CGEMM_DEFAULT_UNROLL_M 2
  1756. #define ZGEMM_DEFAULT_UNROLL_M 2
  1757. #define XGEMM_DEFAULT_UNROLL_M 1
  1758. #else
  1759. #define SGEMM_DEFAULT_UNROLL_M 2
  1760. #define DGEMM_DEFAULT_UNROLL_M 2
  1761. #define QGEMM_DEFAULT_UNROLL_M 2
  1762. #define CGEMM_DEFAULT_UNROLL_M 2
  1763. #define ZGEMM_DEFAULT_UNROLL_M 2
  1764. #define XGEMM_DEFAULT_UNROLL_M 1
  1765. #endif
  1766. #define SGEMM_DEFAULT_P sgemm_p
  1767. #define DGEMM_DEFAULT_P dgemm_p
  1768. #define QGEMM_DEFAULT_P qgemm_p
  1769. #define CGEMM_DEFAULT_P cgemm_p
  1770. #define ZGEMM_DEFAULT_P zgemm_p
  1771. #define XGEMM_DEFAULT_P xgemm_p
  1772. #define SGEMM_DEFAULT_R sgemm_r
  1773. #define DGEMM_DEFAULT_R dgemm_r
  1774. #define QGEMM_DEFAULT_R qgemm_r
  1775. #define CGEMM_DEFAULT_R cgemm_r
  1776. #define ZGEMM_DEFAULT_R zgemm_r
  1777. #define XGEMM_DEFAULT_R xgemm_r
  1778. #define SGEMM_DEFAULT_Q 128
  1779. #define DGEMM_DEFAULT_Q 128
  1780. #define QGEMM_DEFAULT_Q 128
  1781. #define CGEMM_DEFAULT_Q 128
  1782. #define ZGEMM_DEFAULT_Q 128
  1783. #define XGEMM_DEFAULT_Q 128
  1784. #define SYMV_P 16
  1785. #endif
  1786. #ifndef QGEMM_DEFAULT_UNROLL_M
  1787. #define QGEMM_DEFAULT_UNROLL_M 2
  1788. #endif
  1789. #ifndef QGEMM_DEFAULT_UNROLL_N
  1790. #define QGEMM_DEFAULT_UNROLL_N 2
  1791. #endif
  1792. #ifndef XGEMM_DEFAULT_UNROLL_M
  1793. #define XGEMM_DEFAULT_UNROLL_M 2
  1794. #endif
  1795. #ifndef XGEMM_DEFAULT_UNROLL_N
  1796. #define XGEMM_DEFAULT_UNROLL_N 2
  1797. #endif
  1798. #ifndef HAVE_SSE2
  1799. #define SHUFPD_0 shufps $0x44,
  1800. #define SHUFPD_1 shufps $0x4e,
  1801. #define SHUFPD_2 shufps $0xe4,
  1802. #define SHUFPD_3 shufps $0xee,
  1803. #endif
  1804. #ifndef SHUFPD_0
  1805. #define SHUFPD_0 shufpd $0,
  1806. #endif
  1807. #ifndef SHUFPD_1
  1808. #define SHUFPD_1 shufpd $1,
  1809. #endif
  1810. #ifndef SHUFPD_2
  1811. #define SHUFPD_2 shufpd $2,
  1812. #endif
  1813. #ifndef SHUFPD_3
  1814. #define SHUFPD_3 shufpd $3,
  1815. #endif
  1816. #ifndef SHUFPS_39
  1817. #define SHUFPS_39 shufps $0x39,
  1818. #endif
  1819. #endif