You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485
  1. /*****************************************************************************
  2. Copyright (c) 2011-2014, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written
  16. permission.
  17. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  23. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  25. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  26. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. **********************************************************************************/
  28. /*********************************************************************/
  29. /* Copyright 2009, 2010 The University of Texas at Austin. */
  30. /* All rights reserved. */
  31. /* */
  32. /* Redistribution and use in source and binary forms, with or */
  33. /* without modification, are permitted provided that the following */
  34. /* conditions are met: */
  35. /* */
  36. /* 1. Redistributions of source code must retain the above */
  37. /* copyright notice, this list of conditions and the following */
  38. /* disclaimer. */
  39. /* */
  40. /* 2. Redistributions in binary form must reproduce the above */
  41. /* copyright notice, this list of conditions and the following */
  42. /* disclaimer in the documentation and/or other materials */
  43. /* provided with the distribution. */
  44. /* */
  45. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  46. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  47. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  48. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  49. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  50. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  51. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  52. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  53. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  54. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  55. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  56. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  57. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  58. /* POSSIBILITY OF SUCH DAMAGE. */
  59. /* */
  60. /* The views and conclusions contained in the software and */
  61. /* documentation are those of the authors and should not be */
  62. /* interpreted as representing official policies, either expressed */
  63. /* or implied, of The University of Texas at Austin. */
  64. /*********************************************************************/
  65. #ifndef PARAM_H
  66. #define PARAM_H
  67. #ifdef OPTERON
  68. #define SNUMOPT 4
  69. #define DNUMOPT 2
  70. #define GEMM_DEFAULT_OFFSET_A 64
  71. #define GEMM_DEFAULT_OFFSET_B 256
  72. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  73. #define SGEMM_DEFAULT_UNROLL_N 4
  74. #define DGEMM_DEFAULT_UNROLL_N 4
  75. #define QGEMM_DEFAULT_UNROLL_N 2
  76. #define CGEMM_DEFAULT_UNROLL_N 2
  77. #define ZGEMM_DEFAULT_UNROLL_N 2
  78. #define XGEMM_DEFAULT_UNROLL_N 1
  79. #ifdef ARCH_X86
  80. #define SGEMM_DEFAULT_UNROLL_M 4
  81. #define DGEMM_DEFAULT_UNROLL_M 2
  82. #define QGEMM_DEFAULT_UNROLL_M 2
  83. #define CGEMM_DEFAULT_UNROLL_M 2
  84. #define ZGEMM_DEFAULT_UNROLL_M 1
  85. #define XGEMM_DEFAULT_UNROLL_M 1
  86. #else
  87. #define SGEMM_DEFAULT_UNROLL_M 8
  88. #define DGEMM_DEFAULT_UNROLL_M 4
  89. #define QGEMM_DEFAULT_UNROLL_M 2
  90. #define CGEMM_DEFAULT_UNROLL_M 4
  91. #define ZGEMM_DEFAULT_UNROLL_M 2
  92. #define XGEMM_DEFAULT_UNROLL_M 1
  93. #endif
  94. #define SGEMM_DEFAULT_P sgemm_p
  95. #define DGEMM_DEFAULT_P dgemm_p
  96. #define QGEMM_DEFAULT_P qgemm_p
  97. #define CGEMM_DEFAULT_P cgemm_p
  98. #define ZGEMM_DEFAULT_P zgemm_p
  99. #define XGEMM_DEFAULT_P xgemm_p
  100. #define SGEMM_DEFAULT_R sgemm_r
  101. #define DGEMM_DEFAULT_R dgemm_r
  102. #define QGEMM_DEFAULT_R qgemm_r
  103. #define CGEMM_DEFAULT_R cgemm_r
  104. #define ZGEMM_DEFAULT_R zgemm_r
  105. #define XGEMM_DEFAULT_R xgemm_r
  106. #ifdef ALLOC_HUGETLB
  107. #define SGEMM_DEFAULT_Q 248
  108. #define DGEMM_DEFAULT_Q 248
  109. #define QGEMM_DEFAULT_Q 248
  110. #define CGEMM_DEFAULT_Q 248
  111. #define ZGEMM_DEFAULT_Q 248
  112. #define XGEMM_DEFAULT_Q 248
  113. #else
  114. #define SGEMM_DEFAULT_Q 240
  115. #define DGEMM_DEFAULT_Q 240
  116. #define QGEMM_DEFAULT_Q 240
  117. #define CGEMM_DEFAULT_Q 240
  118. #define ZGEMM_DEFAULT_Q 240
  119. #define XGEMM_DEFAULT_Q 240
  120. #endif
  121. #define SYMV_P 16
  122. #define HAVE_EXCLUSIVE_CACHE
  123. #endif
  124. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  125. #define SNUMOPT 8
  126. #define DNUMOPT 4
  127. #define GEMM_DEFAULT_OFFSET_A 64
  128. #define GEMM_DEFAULT_OFFSET_B 832
  129. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  130. #define SGEMM_DEFAULT_UNROLL_N 4
  131. #define DGEMM_DEFAULT_UNROLL_N 4
  132. #define QGEMM_DEFAULT_UNROLL_N 2
  133. #define CGEMM_DEFAULT_UNROLL_N 2
  134. #define ZGEMM_DEFAULT_UNROLL_N 2
  135. #define XGEMM_DEFAULT_UNROLL_N 1
  136. #ifdef ARCH_X86
  137. #define SGEMM_DEFAULT_UNROLL_M 4
  138. #define DGEMM_DEFAULT_UNROLL_M 2
  139. #define QGEMM_DEFAULT_UNROLL_M 2
  140. #define CGEMM_DEFAULT_UNROLL_M 2
  141. #define ZGEMM_DEFAULT_UNROLL_M 1
  142. #define XGEMM_DEFAULT_UNROLL_M 1
  143. #else
  144. #define SGEMM_DEFAULT_UNROLL_M 8
  145. #define DGEMM_DEFAULT_UNROLL_M 4
  146. #define QGEMM_DEFAULT_UNROLL_M 2
  147. #define CGEMM_DEFAULT_UNROLL_M 4
  148. #define ZGEMM_DEFAULT_UNROLL_M 2
  149. #define XGEMM_DEFAULT_UNROLL_M 1
  150. #endif
  151. #if 0
  152. #define SGEMM_DEFAULT_P 496
  153. #define DGEMM_DEFAULT_P 248
  154. #define QGEMM_DEFAULT_P 124
  155. #define CGEMM_DEFAULT_P 248
  156. #define ZGEMM_DEFAULT_P 124
  157. #define XGEMM_DEFAULT_P 62
  158. #define SGEMM_DEFAULT_Q 248
  159. #define DGEMM_DEFAULT_Q 248
  160. #define QGEMM_DEFAULT_Q 248
  161. #define CGEMM_DEFAULT_Q 248
  162. #define ZGEMM_DEFAULT_Q 248
  163. #define XGEMM_DEFAULT_Q 248
  164. #else
  165. #define SGEMM_DEFAULT_P 448
  166. #define DGEMM_DEFAULT_P 224
  167. #define QGEMM_DEFAULT_P 112
  168. #define CGEMM_DEFAULT_P 224
  169. #define ZGEMM_DEFAULT_P 112
  170. #define XGEMM_DEFAULT_P 56
  171. #define SGEMM_DEFAULT_Q 224
  172. #define DGEMM_DEFAULT_Q 224
  173. #define QGEMM_DEFAULT_Q 224
  174. #define CGEMM_DEFAULT_Q 224
  175. #define ZGEMM_DEFAULT_Q 224
  176. #define XGEMM_DEFAULT_Q 224
  177. #endif
  178. #define SGEMM_DEFAULT_R sgemm_r
  179. #define QGEMM_DEFAULT_R qgemm_r
  180. #define DGEMM_DEFAULT_R dgemm_r
  181. #define CGEMM_DEFAULT_R cgemm_r
  182. #define ZGEMM_DEFAULT_R zgemm_r
  183. #define XGEMM_DEFAULT_R xgemm_r
  184. #define SYMV_P 16
  185. #define HAVE_EXCLUSIVE_CACHE
  186. #define GEMM_THREAD gemm_thread_mn
  187. #endif
  188. #ifdef BULLDOZER
  189. #define SNUMOPT 8
  190. #define DNUMOPT 4
  191. #define GEMM_DEFAULT_OFFSET_A 64
  192. #define GEMM_DEFAULT_OFFSET_B 832
  193. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  194. #define QGEMM_DEFAULT_UNROLL_N 2
  195. #define CGEMM_DEFAULT_UNROLL_N 2
  196. #define ZGEMM_DEFAULT_UNROLL_N 2
  197. #define XGEMM_DEFAULT_UNROLL_N 1
  198. #ifdef ARCH_X86
  199. #define SGEMM_DEFAULT_UNROLL_N 4
  200. #define DGEMM_DEFAULT_UNROLL_N 4
  201. #define SGEMM_DEFAULT_UNROLL_M 4
  202. #define DGEMM_DEFAULT_UNROLL_M 2
  203. #define QGEMM_DEFAULT_UNROLL_M 2
  204. #define CGEMM_DEFAULT_UNROLL_M 2
  205. #define ZGEMM_DEFAULT_UNROLL_M 1
  206. #define XGEMM_DEFAULT_UNROLL_M 1
  207. #else
  208. #define SGEMM_DEFAULT_UNROLL_N 2
  209. #define DGEMM_DEFAULT_UNROLL_N 2
  210. #define SGEMM_DEFAULT_UNROLL_M 16
  211. #define DGEMM_DEFAULT_UNROLL_M 8
  212. #define QGEMM_DEFAULT_UNROLL_M 2
  213. #define CGEMM_DEFAULT_UNROLL_M 4
  214. #define ZGEMM_DEFAULT_UNROLL_M 2
  215. #define XGEMM_DEFAULT_UNROLL_M 1
  216. #define CGEMM3M_DEFAULT_UNROLL_N 4
  217. #define CGEMM3M_DEFAULT_UNROLL_M 8
  218. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  219. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  220. #define GEMV_UNROLL 8
  221. #endif
  222. #if defined(ARCH_X86_64)
  223. #define SGEMM_DEFAULT_P 768
  224. #define DGEMM_DEFAULT_P 384
  225. #else
  226. #define SGEMM_DEFAULT_P 448
  227. #define DGEMM_DEFAULT_P 224
  228. #endif
  229. #define QGEMM_DEFAULT_P 112
  230. #define CGEMM_DEFAULT_P 224
  231. #define ZGEMM_DEFAULT_P 112
  232. #define XGEMM_DEFAULT_P 56
  233. #if defined(ARCH_X86_64)
  234. #define SGEMM_DEFAULT_Q 168
  235. #define DGEMM_DEFAULT_Q 168
  236. #else
  237. #define SGEMM_DEFAULT_Q 224
  238. #define DGEMM_DEFAULT_Q 224
  239. #endif
  240. #define QGEMM_DEFAULT_Q 224
  241. #define CGEMM_DEFAULT_Q 224
  242. #define ZGEMM_DEFAULT_Q 224
  243. #define XGEMM_DEFAULT_Q 224
  244. #define CGEMM3M_DEFAULT_P 448
  245. #define ZGEMM3M_DEFAULT_P 224
  246. #define XGEMM3M_DEFAULT_P 112
  247. #define CGEMM3M_DEFAULT_Q 224
  248. #define ZGEMM3M_DEFAULT_Q 224
  249. #define XGEMM3M_DEFAULT_Q 224
  250. #define CGEMM3M_DEFAULT_R 12288
  251. #define ZGEMM3M_DEFAULT_R 12288
  252. #define XGEMM3M_DEFAULT_R 12288
  253. #define SGEMM_DEFAULT_R sgemm_r
  254. #define QGEMM_DEFAULT_R qgemm_r
  255. #define DGEMM_DEFAULT_R dgemm_r
  256. #define CGEMM_DEFAULT_R cgemm_r
  257. #define ZGEMM_DEFAULT_R zgemm_r
  258. #define XGEMM_DEFAULT_R xgemm_r
  259. #define SYMV_P 16
  260. #define HAVE_EXCLUSIVE_CACHE
  261. #define GEMM_THREAD gemm_thread_mn
  262. #endif
  263. #ifdef PILEDRIVER
  264. #define SNUMOPT 8
  265. #define DNUMOPT 4
  266. #define GEMM_DEFAULT_OFFSET_A 64
  267. #define GEMM_DEFAULT_OFFSET_B 832
  268. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  269. #define QGEMM_DEFAULT_UNROLL_N 2
  270. #define CGEMM_DEFAULT_UNROLL_N 2
  271. #define ZGEMM_DEFAULT_UNROLL_N 2
  272. #define XGEMM_DEFAULT_UNROLL_N 1
  273. #ifdef ARCH_X86
  274. #define SGEMM_DEFAULT_UNROLL_N 4
  275. #define DGEMM_DEFAULT_UNROLL_N 4
  276. #define SGEMM_DEFAULT_UNROLL_M 4
  277. #define DGEMM_DEFAULT_UNROLL_M 2
  278. #define QGEMM_DEFAULT_UNROLL_M 2
  279. #define CGEMM_DEFAULT_UNROLL_M 2
  280. #define ZGEMM_DEFAULT_UNROLL_M 1
  281. #define XGEMM_DEFAULT_UNROLL_M 1
  282. #else
  283. #define SGEMM_DEFAULT_UNROLL_N 2
  284. #define DGEMM_DEFAULT_UNROLL_N 2
  285. #define SGEMM_DEFAULT_UNROLL_M 16
  286. #define DGEMM_DEFAULT_UNROLL_M 8
  287. #define QGEMM_DEFAULT_UNROLL_M 2
  288. #define CGEMM_DEFAULT_UNROLL_M 4
  289. #define ZGEMM_DEFAULT_UNROLL_M 2
  290. #define XGEMM_DEFAULT_UNROLL_M 1
  291. #define CGEMM3M_DEFAULT_UNROLL_N 4
  292. #define CGEMM3M_DEFAULT_UNROLL_M 8
  293. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  294. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  295. #define GEMV_UNROLL 8
  296. #endif
  297. #if defined(ARCH_X86_64)
  298. #define SGEMM_DEFAULT_P 768
  299. #define DGEMM_DEFAULT_P 768
  300. #define ZGEMM_DEFAULT_P 384
  301. #define CGEMM_DEFAULT_P 768
  302. #else
  303. #define SGEMM_DEFAULT_P 448
  304. #define DGEMM_DEFAULT_P 480
  305. #define ZGEMM_DEFAULT_P 112
  306. #define CGEMM_DEFAULT_P 224
  307. #endif
  308. #define QGEMM_DEFAULT_P 112
  309. #define XGEMM_DEFAULT_P 56
  310. #if defined(ARCH_X86_64)
  311. #define SGEMM_DEFAULT_Q 192
  312. #define DGEMM_DEFAULT_Q 168
  313. #define ZGEMM_DEFAULT_Q 168
  314. #define CGEMM_DEFAULT_Q 168
  315. #else
  316. #define SGEMM_DEFAULT_Q 224
  317. #define DGEMM_DEFAULT_Q 224
  318. #define ZGEMM_DEFAULT_Q 224
  319. #define CGEMM_DEFAULT_Q 224
  320. #endif
  321. #define QGEMM_DEFAULT_Q 224
  322. #define XGEMM_DEFAULT_Q 224
  323. #define CGEMM3M_DEFAULT_P 448
  324. #define ZGEMM3M_DEFAULT_P 224
  325. #define XGEMM3M_DEFAULT_P 112
  326. #define CGEMM3M_DEFAULT_Q 224
  327. #define ZGEMM3M_DEFAULT_Q 224
  328. #define XGEMM3M_DEFAULT_Q 224
  329. #define CGEMM3M_DEFAULT_R 12288
  330. #define ZGEMM3M_DEFAULT_R 12288
  331. #define XGEMM3M_DEFAULT_R 12288
  332. #define SGEMM_DEFAULT_R 12288
  333. #define QGEMM_DEFAULT_R qgemm_r
  334. #define DGEMM_DEFAULT_R 12288
  335. #define CGEMM_DEFAULT_R cgemm_r
  336. #define ZGEMM_DEFAULT_R zgemm_r
  337. #define XGEMM_DEFAULT_R xgemm_r
  338. #define SYMV_P 16
  339. #define HAVE_EXCLUSIVE_CACHE
  340. #define GEMM_THREAD gemm_thread_mn
  341. #endif
  342. #ifdef STEAMROLLER
  343. #define SNUMOPT 8
  344. #define DNUMOPT 4
  345. #define GEMM_DEFAULT_OFFSET_A 64
  346. #define GEMM_DEFAULT_OFFSET_B 832
  347. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  348. #define QGEMM_DEFAULT_UNROLL_N 2
  349. #define CGEMM_DEFAULT_UNROLL_N 2
  350. #define ZGEMM_DEFAULT_UNROLL_N 2
  351. #define XGEMM_DEFAULT_UNROLL_N 1
  352. #ifdef ARCH_X86
  353. #define SGEMM_DEFAULT_UNROLL_N 4
  354. #define DGEMM_DEFAULT_UNROLL_N 4
  355. #define SGEMM_DEFAULT_UNROLL_M 4
  356. #define DGEMM_DEFAULT_UNROLL_M 2
  357. #define QGEMM_DEFAULT_UNROLL_M 2
  358. #define CGEMM_DEFAULT_UNROLL_M 2
  359. #define ZGEMM_DEFAULT_UNROLL_M 1
  360. #define XGEMM_DEFAULT_UNROLL_M 1
  361. #else
  362. #define SGEMM_DEFAULT_UNROLL_N 2
  363. #define DGEMM_DEFAULT_UNROLL_N 2
  364. #define SGEMM_DEFAULT_UNROLL_M 16
  365. #define DGEMM_DEFAULT_UNROLL_M 8
  366. #define QGEMM_DEFAULT_UNROLL_M 2
  367. #define CGEMM_DEFAULT_UNROLL_M 4
  368. #define ZGEMM_DEFAULT_UNROLL_M 2
  369. #define XGEMM_DEFAULT_UNROLL_M 1
  370. #define CGEMM3M_DEFAULT_UNROLL_N 4
  371. #define CGEMM3M_DEFAULT_UNROLL_M 8
  372. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  373. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  374. #define GEMV_UNROLL 8
  375. #endif
  376. #if defined(ARCH_X86_64)
  377. #define SGEMM_DEFAULT_P 768
  378. #define DGEMM_DEFAULT_P 576
  379. #define ZGEMM_DEFAULT_P 288
  380. #define CGEMM_DEFAULT_P 576
  381. #else
  382. #define SGEMM_DEFAULT_P 448
  383. #define DGEMM_DEFAULT_P 480
  384. #define ZGEMM_DEFAULT_P 112
  385. #define CGEMM_DEFAULT_P 224
  386. #endif
  387. #define QGEMM_DEFAULT_P 112
  388. #define XGEMM_DEFAULT_P 56
  389. #if defined(ARCH_X86_64)
  390. #define SGEMM_DEFAULT_Q 192
  391. #define DGEMM_DEFAULT_Q 160
  392. #define ZGEMM_DEFAULT_Q 160
  393. #define CGEMM_DEFAULT_Q 160
  394. #else
  395. #define SGEMM_DEFAULT_Q 224
  396. #define DGEMM_DEFAULT_Q 224
  397. #define ZGEMM_DEFAULT_Q 224
  398. #define CGEMM_DEFAULT_Q 224
  399. #endif
  400. #define QGEMM_DEFAULT_Q 224
  401. #define XGEMM_DEFAULT_Q 224
  402. #define CGEMM3M_DEFAULT_P 448
  403. #define ZGEMM3M_DEFAULT_P 224
  404. #define XGEMM3M_DEFAULT_P 112
  405. #define CGEMM3M_DEFAULT_Q 224
  406. #define ZGEMM3M_DEFAULT_Q 224
  407. #define XGEMM3M_DEFAULT_Q 224
  408. #define CGEMM3M_DEFAULT_R 12288
  409. #define ZGEMM3M_DEFAULT_R 12288
  410. #define XGEMM3M_DEFAULT_R 12288
  411. #define SGEMM_DEFAULT_R 12288
  412. #define QGEMM_DEFAULT_R qgemm_r
  413. #define DGEMM_DEFAULT_R 12288
  414. #define CGEMM_DEFAULT_R cgemm_r
  415. #define ZGEMM_DEFAULT_R zgemm_r
  416. #define XGEMM_DEFAULT_R xgemm_r
  417. #define SYMV_P 16
  418. #define HAVE_EXCLUSIVE_CACHE
  419. #define GEMM_THREAD gemm_thread_mn
  420. #endif
  421. #ifdef EXCAVATOR
  422. #define SNUMOPT 8
  423. #define DNUMOPT 4
  424. #define GEMM_DEFAULT_OFFSET_A 64
  425. #define GEMM_DEFAULT_OFFSET_B 832
  426. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  427. #define QGEMM_DEFAULT_UNROLL_N 2
  428. #define CGEMM_DEFAULT_UNROLL_N 2
  429. #define ZGEMM_DEFAULT_UNROLL_N 2
  430. #define XGEMM_DEFAULT_UNROLL_N 1
  431. #ifdef ARCH_X86
  432. #define SGEMM_DEFAULT_UNROLL_N 4
  433. #define DGEMM_DEFAULT_UNROLL_N 4
  434. #define SGEMM_DEFAULT_UNROLL_M 4
  435. #define DGEMM_DEFAULT_UNROLL_M 2
  436. #define QGEMM_DEFAULT_UNROLL_M 2
  437. #define CGEMM_DEFAULT_UNROLL_M 2
  438. #define ZGEMM_DEFAULT_UNROLL_M 1
  439. #define XGEMM_DEFAULT_UNROLL_M 1
  440. #else
  441. #define SGEMM_DEFAULT_UNROLL_N 2
  442. #define DGEMM_DEFAULT_UNROLL_N 2
  443. #define SGEMM_DEFAULT_UNROLL_M 16
  444. #define DGEMM_DEFAULT_UNROLL_M 8
  445. #define QGEMM_DEFAULT_UNROLL_M 2
  446. #define CGEMM_DEFAULT_UNROLL_M 4
  447. #define ZGEMM_DEFAULT_UNROLL_M 2
  448. #define XGEMM_DEFAULT_UNROLL_M 1
  449. #define CGEMM3M_DEFAULT_UNROLL_N 4
  450. #define CGEMM3M_DEFAULT_UNROLL_M 8
  451. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  452. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  453. #define GEMV_UNROLL 8
  454. #endif
  455. #if defined(ARCH_X86_64)
  456. #define SGEMM_DEFAULT_P 768
  457. #define DGEMM_DEFAULT_P 576
  458. #define ZGEMM_DEFAULT_P 288
  459. #define CGEMM_DEFAULT_P 576
  460. #else
  461. #define SGEMM_DEFAULT_P 448
  462. #define DGEMM_DEFAULT_P 480
  463. #define ZGEMM_DEFAULT_P 112
  464. #define CGEMM_DEFAULT_P 224
  465. #endif
  466. #define QGEMM_DEFAULT_P 112
  467. #define XGEMM_DEFAULT_P 56
  468. #if defined(ARCH_X86_64)
  469. #define SGEMM_DEFAULT_Q 192
  470. #define DGEMM_DEFAULT_Q 160
  471. #define ZGEMM_DEFAULT_Q 160
  472. #define CGEMM_DEFAULT_Q 160
  473. #else
  474. #define SGEMM_DEFAULT_Q 224
  475. #define DGEMM_DEFAULT_Q 224
  476. #define ZGEMM_DEFAULT_Q 224
  477. #define CGEMM_DEFAULT_Q 224
  478. #endif
  479. #define QGEMM_DEFAULT_Q 224
  480. #define XGEMM_DEFAULT_Q 224
  481. #define CGEMM3M_DEFAULT_P 448
  482. #define ZGEMM3M_DEFAULT_P 224
  483. #define XGEMM3M_DEFAULT_P 112
  484. #define CGEMM3M_DEFAULT_Q 224
  485. #define ZGEMM3M_DEFAULT_Q 224
  486. #define XGEMM3M_DEFAULT_Q 224
  487. #define CGEMM3M_DEFAULT_R 12288
  488. #define ZGEMM3M_DEFAULT_R 12288
  489. #define XGEMM3M_DEFAULT_R 12288
  490. #define SGEMM_DEFAULT_R 12288
  491. #define QGEMM_DEFAULT_R qgemm_r
  492. #define DGEMM_DEFAULT_R 12288
  493. #define CGEMM_DEFAULT_R cgemm_r
  494. #define ZGEMM_DEFAULT_R zgemm_r
  495. #define XGEMM_DEFAULT_R xgemm_r
  496. #define SYMV_P 16
  497. #define HAVE_EXCLUSIVE_CACHE
  498. #define GEMM_THREAD gemm_thread_mn
  499. #endif
  500. #ifdef ATHLON
  501. #define SNUMOPT 4
  502. #define DNUMOPT 2
  503. #define GEMM_DEFAULT_OFFSET_A 0
  504. #define GEMM_DEFAULT_OFFSET_B 384
  505. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  506. #define SGEMM_DEFAULT_UNROLL_N 4
  507. #define DGEMM_DEFAULT_UNROLL_N 4
  508. #define QGEMM_DEFAULT_UNROLL_N 2
  509. #define CGEMM_DEFAULT_UNROLL_N 2
  510. #define ZGEMM_DEFAULT_UNROLL_N 2
  511. #define XGEMM_DEFAULT_UNROLL_N 1
  512. #define SGEMM_DEFAULT_UNROLL_M 2
  513. #define DGEMM_DEFAULT_UNROLL_M 1
  514. #define QGEMM_DEFAULT_UNROLL_M 2
  515. #define CGEMM_DEFAULT_UNROLL_M 1
  516. #define ZGEMM_DEFAULT_UNROLL_M 1
  517. #define XGEMM_DEFAULT_UNROLL_M 1
  518. #define SGEMM_DEFAULT_R sgemm_r
  519. #define DGEMM_DEFAULT_R dgemm_r
  520. #define QGEMM_DEFAULT_R qgemm_r
  521. #define CGEMM_DEFAULT_R cgemm_r
  522. #define ZGEMM_DEFAULT_R zgemm_r
  523. #define XGEMM_DEFAULT_R xgemm_r
  524. #define SGEMM_DEFAULT_P 208
  525. #define DGEMM_DEFAULT_P 104
  526. #define QGEMM_DEFAULT_P 56
  527. #define CGEMM_DEFAULT_P 104
  528. #define ZGEMM_DEFAULT_P 56
  529. #define XGEMM_DEFAULT_P 28
  530. #define SGEMM_DEFAULT_Q 208
  531. #define DGEMM_DEFAULT_Q 208
  532. #define QGEMM_DEFAULT_Q 208
  533. #define CGEMM_DEFAULT_Q 208
  534. #define ZGEMM_DEFAULT_Q 208
  535. #define XGEMM_DEFAULT_Q 208
  536. #define SYMV_P 16
  537. #define HAVE_EXCLUSIVE_CACHE
  538. #endif
  539. #ifdef VIAC3
  540. #define SNUMOPT 2
  541. #define DNUMOPT 1
  542. #define GEMM_DEFAULT_OFFSET_A 0
  543. #define GEMM_DEFAULT_OFFSET_B 256
  544. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  545. #define SGEMM_DEFAULT_UNROLL_N 4
  546. #define DGEMM_DEFAULT_UNROLL_N 4
  547. #define QGEMM_DEFAULT_UNROLL_N 2
  548. #define CGEMM_DEFAULT_UNROLL_N 2
  549. #define ZGEMM_DEFAULT_UNROLL_N 2
  550. #define XGEMM_DEFAULT_UNROLL_N 1
  551. #define SGEMM_DEFAULT_UNROLL_M 2
  552. #define DGEMM_DEFAULT_UNROLL_M 1
  553. #define QGEMM_DEFAULT_UNROLL_M 2
  554. #define CGEMM_DEFAULT_UNROLL_M 1
  555. #define ZGEMM_DEFAULT_UNROLL_M 1
  556. #define XGEMM_DEFAULT_UNROLL_M 1
  557. #define SGEMM_DEFAULT_R sgemm_r
  558. #define DGEMM_DEFAULT_R dgemm_r
  559. #define QGEMM_DEFAULT_R qgemm_r
  560. #define CGEMM_DEFAULT_R cgemm_r
  561. #define ZGEMM_DEFAULT_R zgemm_r
  562. #define XGEMM_DEFAULT_R xgemm_r
  563. #define SGEMM_DEFAULT_P 128
  564. #define DGEMM_DEFAULT_P 128
  565. #define QGEMM_DEFAULT_P 128
  566. #define CGEMM_DEFAULT_P 128
  567. #define ZGEMM_DEFAULT_P 128
  568. #define XGEMM_DEFAULT_P 128
  569. #define SGEMM_DEFAULT_Q 512
  570. #define DGEMM_DEFAULT_Q 256
  571. #define QGEMM_DEFAULT_Q 256
  572. #define CGEMM_DEFAULT_Q 256
  573. #define ZGEMM_DEFAULT_Q 128
  574. #define XGEMM_DEFAULT_Q 128
  575. #define SYMV_P 16
  576. #endif
  577. #ifdef NANO
  578. #define SNUMOPT 4
  579. #define DNUMOPT 2
  580. #define GEMM_DEFAULT_OFFSET_A 64
  581. #define GEMM_DEFAULT_OFFSET_B 256
  582. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  583. #ifdef ARCH_X86
  584. #define SGEMM_DEFAULT_UNROLL_N 4
  585. #define DGEMM_DEFAULT_UNROLL_N 4
  586. #define QGEMM_DEFAULT_UNROLL_N 2
  587. #define CGEMM_DEFAULT_UNROLL_N 2
  588. #define ZGEMM_DEFAULT_UNROLL_N 2
  589. #define XGEMM_DEFAULT_UNROLL_N 1
  590. #define SGEMM_DEFAULT_UNROLL_M 4
  591. #define DGEMM_DEFAULT_UNROLL_M 2
  592. #define QGEMM_DEFAULT_UNROLL_M 2
  593. #define CGEMM_DEFAULT_UNROLL_M 2
  594. #define ZGEMM_DEFAULT_UNROLL_M 1
  595. #define XGEMM_DEFAULT_UNROLL_M 1
  596. #else
  597. #define SGEMM_DEFAULT_UNROLL_N 8
  598. #define DGEMM_DEFAULT_UNROLL_N 4
  599. #define QGEMM_DEFAULT_UNROLL_N 2
  600. #define CGEMM_DEFAULT_UNROLL_N 4
  601. #define ZGEMM_DEFAULT_UNROLL_N 2
  602. #define XGEMM_DEFAULT_UNROLL_N 1
  603. #define SGEMM_DEFAULT_UNROLL_M 4
  604. #define DGEMM_DEFAULT_UNROLL_M 4
  605. #define QGEMM_DEFAULT_UNROLL_M 2
  606. #define CGEMM_DEFAULT_UNROLL_M 2
  607. #define ZGEMM_DEFAULT_UNROLL_M 2
  608. #define XGEMM_DEFAULT_UNROLL_M 1
  609. #endif
  610. #define SGEMM_DEFAULT_P 288
  611. #define DGEMM_DEFAULT_P 288
  612. #define QGEMM_DEFAULT_P 288
  613. #define CGEMM_DEFAULT_P 288
  614. #define ZGEMM_DEFAULT_P 288
  615. #define XGEMM_DEFAULT_P 288
  616. #define SGEMM_DEFAULT_R sgemm_r
  617. #define DGEMM_DEFAULT_R dgemm_r
  618. #define QGEMM_DEFAULT_R qgemm_r
  619. #define CGEMM_DEFAULT_R cgemm_r
  620. #define ZGEMM_DEFAULT_R zgemm_r
  621. #define XGEMM_DEFAULT_R xgemm_r
  622. #define SGEMM_DEFAULT_Q 256
  623. #define DGEMM_DEFAULT_Q 128
  624. #define QGEMM_DEFAULT_Q 64
  625. #define CGEMM_DEFAULT_Q 128
  626. #define ZGEMM_DEFAULT_Q 64
  627. #define XGEMM_DEFAULT_Q 32
  628. #define SYMV_P 16
  629. #define HAVE_EXCLUSIVE_CACHE
  630. #endif
  631. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  632. #ifdef HAVE_SSE
  633. #define SNUMOPT 2
  634. #else
  635. #define SNUMOPT 1
  636. #endif
  637. #define DNUMOPT 1
  638. #define GEMM_DEFAULT_OFFSET_A 0
  639. #define GEMM_DEFAULT_OFFSET_B 0
  640. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  641. #ifdef HAVE_SSE
  642. #define SGEMM_DEFAULT_UNROLL_M 8
  643. #define CGEMM_DEFAULT_UNROLL_M 4
  644. #else
  645. #define SGEMM_DEFAULT_UNROLL_M 4
  646. #define CGEMM_DEFAULT_UNROLL_M 2
  647. #endif
  648. #define DGEMM_DEFAULT_UNROLL_M 2
  649. #define SGEMM_DEFAULT_UNROLL_N 2
  650. #define DGEMM_DEFAULT_UNROLL_N 2
  651. #define QGEMM_DEFAULT_UNROLL_M 2
  652. #define QGEMM_DEFAULT_UNROLL_N 2
  653. #define CGEMM_DEFAULT_UNROLL_N 1
  654. #define ZGEMM_DEFAULT_UNROLL_M 1
  655. #define ZGEMM_DEFAULT_UNROLL_N 1
  656. #define XGEMM_DEFAULT_UNROLL_M 1
  657. #define XGEMM_DEFAULT_UNROLL_N 1
  658. #define SGEMM_DEFAULT_P sgemm_p
  659. #define SGEMM_DEFAULT_Q 256
  660. #define SGEMM_DEFAULT_R sgemm_r
  661. #define DGEMM_DEFAULT_P dgemm_p
  662. #define DGEMM_DEFAULT_Q 256
  663. #define DGEMM_DEFAULT_R dgemm_r
  664. #define QGEMM_DEFAULT_P qgemm_p
  665. #define QGEMM_DEFAULT_Q 256
  666. #define QGEMM_DEFAULT_R qgemm_r
  667. #define CGEMM_DEFAULT_P cgemm_p
  668. #define CGEMM_DEFAULT_Q 256
  669. #define CGEMM_DEFAULT_R cgemm_r
  670. #define ZGEMM_DEFAULT_P zgemm_p
  671. #define ZGEMM_DEFAULT_Q 256
  672. #define ZGEMM_DEFAULT_R zgemm_r
  673. #define XGEMM_DEFAULT_P xgemm_p
  674. #define XGEMM_DEFAULT_Q 256
  675. #define XGEMM_DEFAULT_R xgemm_r
  676. #define SYMV_P 4
  677. #endif
  678. #ifdef PENTIUMM
  679. #define SNUMOPT 2
  680. #define DNUMOPT 1
  681. #define GEMM_DEFAULT_OFFSET_A 0
  682. #define GEMM_DEFAULT_OFFSET_B 0
  683. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  684. #ifdef CORE_YONAH
  685. #define SGEMM_DEFAULT_UNROLL_M 4
  686. #define SGEMM_DEFAULT_UNROLL_N 4
  687. #define DGEMM_DEFAULT_UNROLL_M 2
  688. #define DGEMM_DEFAULT_UNROLL_N 4
  689. #define QGEMM_DEFAULT_UNROLL_M 2
  690. #define QGEMM_DEFAULT_UNROLL_N 2
  691. #define CGEMM_DEFAULT_UNROLL_M 2
  692. #define CGEMM_DEFAULT_UNROLL_N 2
  693. #define ZGEMM_DEFAULT_UNROLL_M 1
  694. #define ZGEMM_DEFAULT_UNROLL_N 2
  695. #define XGEMM_DEFAULT_UNROLL_M 1
  696. #define XGEMM_DEFAULT_UNROLL_N 1
  697. #else
  698. #define SGEMM_DEFAULT_UNROLL_M 8
  699. #define SGEMM_DEFAULT_UNROLL_N 2
  700. #define DGEMM_DEFAULT_UNROLL_M 2
  701. #define DGEMM_DEFAULT_UNROLL_N 2
  702. #define QGEMM_DEFAULT_UNROLL_M 2
  703. #define QGEMM_DEFAULT_UNROLL_N 2
  704. #define CGEMM_DEFAULT_UNROLL_M 4
  705. #define CGEMM_DEFAULT_UNROLL_N 1
  706. #define ZGEMM_DEFAULT_UNROLL_M 1
  707. #define ZGEMM_DEFAULT_UNROLL_N 1
  708. #define XGEMM_DEFAULT_UNROLL_M 1
  709. #define XGEMM_DEFAULT_UNROLL_N 1
  710. #endif
  711. #define SGEMM_DEFAULT_P sgemm_p
  712. #define SGEMM_DEFAULT_Q 256
  713. #define SGEMM_DEFAULT_R sgemm_r
  714. #define DGEMM_DEFAULT_P dgemm_p
  715. #define DGEMM_DEFAULT_Q 256
  716. #define DGEMM_DEFAULT_R dgemm_r
  717. #define QGEMM_DEFAULT_P qgemm_p
  718. #define QGEMM_DEFAULT_Q 256
  719. #define QGEMM_DEFAULT_R qgemm_r
  720. #define CGEMM_DEFAULT_P cgemm_p
  721. #define CGEMM_DEFAULT_Q 256
  722. #define CGEMM_DEFAULT_R cgemm_r
  723. #define ZGEMM_DEFAULT_P zgemm_p
  724. #define ZGEMM_DEFAULT_Q 256
  725. #define ZGEMM_DEFAULT_R zgemm_r
  726. #define XGEMM_DEFAULT_P xgemm_p
  727. #define XGEMM_DEFAULT_Q 256
  728. #define XGEMM_DEFAULT_R xgemm_r
  729. #define SYMV_P 4
  730. #endif
  731. #ifdef CORE_NORTHWOOD
  732. #define SNUMOPT 4
  733. #define DNUMOPT 2
  734. #define GEMM_DEFAULT_OFFSET_A 0
  735. #define GEMM_DEFAULT_OFFSET_B 32
  736. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  737. #define SYMV_P 8
  738. #define SGEMM_DEFAULT_UNROLL_M 8
  739. #define DGEMM_DEFAULT_UNROLL_M 4
  740. #define QGEMM_DEFAULT_UNROLL_M 2
  741. #define CGEMM_DEFAULT_UNROLL_M 4
  742. #define ZGEMM_DEFAULT_UNROLL_M 2
  743. #define XGEMM_DEFAULT_UNROLL_M 1
  744. #define SGEMM_DEFAULT_UNROLL_N 2
  745. #define DGEMM_DEFAULT_UNROLL_N 2
  746. #define QGEMM_DEFAULT_UNROLL_N 2
  747. #define CGEMM_DEFAULT_UNROLL_N 1
  748. #define ZGEMM_DEFAULT_UNROLL_N 1
  749. #define XGEMM_DEFAULT_UNROLL_N 1
  750. #define SGEMM_DEFAULT_P sgemm_p
  751. #define SGEMM_DEFAULT_R sgemm_r
  752. #define DGEMM_DEFAULT_P dgemm_p
  753. #define DGEMM_DEFAULT_R dgemm_r
  754. #define QGEMM_DEFAULT_P qgemm_p
  755. #define QGEMM_DEFAULT_R qgemm_r
  756. #define CGEMM_DEFAULT_P cgemm_p
  757. #define CGEMM_DEFAULT_R cgemm_r
  758. #define ZGEMM_DEFAULT_P zgemm_p
  759. #define ZGEMM_DEFAULT_R zgemm_r
  760. #define XGEMM_DEFAULT_P xgemm_p
  761. #define XGEMM_DEFAULT_R xgemm_r
  762. #define SGEMM_DEFAULT_Q 128
  763. #define DGEMM_DEFAULT_Q 128
  764. #define QGEMM_DEFAULT_Q 128
  765. #define CGEMM_DEFAULT_Q 128
  766. #define ZGEMM_DEFAULT_Q 128
  767. #define XGEMM_DEFAULT_Q 128
  768. #endif
  769. #ifdef CORE_PRESCOTT
  770. #define SNUMOPT 4
  771. #define DNUMOPT 2
  772. #ifndef __64BIT__
  773. #define GEMM_DEFAULT_OFFSET_A 128
  774. #define GEMM_DEFAULT_OFFSET_B 192
  775. #else
  776. #define GEMM_DEFAULT_OFFSET_A 0
  777. #define GEMM_DEFAULT_OFFSET_B 256
  778. #endif
  779. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  780. #define SYMV_P 8
  781. #ifdef ARCH_X86
  782. #define SGEMM_DEFAULT_UNROLL_M 4
  783. #define DGEMM_DEFAULT_UNROLL_M 2
  784. #define QGEMM_DEFAULT_UNROLL_M 2
  785. #define CGEMM_DEFAULT_UNROLL_M 2
  786. #define ZGEMM_DEFAULT_UNROLL_M 1
  787. #define XGEMM_DEFAULT_UNROLL_M 1
  788. #else
  789. #define SGEMM_DEFAULT_UNROLL_M 8
  790. #define DGEMM_DEFAULT_UNROLL_M 4
  791. #define QGEMM_DEFAULT_UNROLL_M 2
  792. #define CGEMM_DEFAULT_UNROLL_M 4
  793. #define ZGEMM_DEFAULT_UNROLL_M 2
  794. #define XGEMM_DEFAULT_UNROLL_M 1
  795. #endif
  796. #define SGEMM_DEFAULT_UNROLL_N 4
  797. #define DGEMM_DEFAULT_UNROLL_N 4
  798. #define QGEMM_DEFAULT_UNROLL_N 2
  799. #define CGEMM_DEFAULT_UNROLL_N 2
  800. #define ZGEMM_DEFAULT_UNROLL_N 2
  801. #define XGEMM_DEFAULT_UNROLL_N 1
  802. #define SGEMM_DEFAULT_P sgemm_p
  803. #define SGEMM_DEFAULT_R sgemm_r
  804. #define DGEMM_DEFAULT_P dgemm_p
  805. #define DGEMM_DEFAULT_R dgemm_r
  806. #define QGEMM_DEFAULT_P qgemm_p
  807. #define QGEMM_DEFAULT_R qgemm_r
  808. #define CGEMM_DEFAULT_P cgemm_p
  809. #define CGEMM_DEFAULT_R cgemm_r
  810. #define ZGEMM_DEFAULT_P zgemm_p
  811. #define ZGEMM_DEFAULT_R zgemm_r
  812. #define XGEMM_DEFAULT_P xgemm_p
  813. #define XGEMM_DEFAULT_R xgemm_r
  814. #define SGEMM_DEFAULT_Q 128
  815. #define DGEMM_DEFAULT_Q 128
  816. #define QGEMM_DEFAULT_Q 128
  817. #define CGEMM_DEFAULT_Q 128
  818. #define ZGEMM_DEFAULT_Q 128
  819. #define XGEMM_DEFAULT_Q 128
  820. #endif
  821. #ifdef CORE2
  822. #define SNUMOPT 8
  823. #define DNUMOPT 4
  824. #define GEMM_DEFAULT_OFFSET_A 448
  825. #define GEMM_DEFAULT_OFFSET_B 128
  826. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  827. #define SYMV_P 8
  828. #define SWITCH_RATIO 4
  829. #ifdef ARCH_X86
  830. #define SGEMM_DEFAULT_UNROLL_M 8
  831. #define DGEMM_DEFAULT_UNROLL_M 4
  832. #define QGEMM_DEFAULT_UNROLL_M 2
  833. #define CGEMM_DEFAULT_UNROLL_M 4
  834. #define ZGEMM_DEFAULT_UNROLL_M 2
  835. #define XGEMM_DEFAULT_UNROLL_M 1
  836. #define SGEMM_DEFAULT_UNROLL_N 2
  837. #define DGEMM_DEFAULT_UNROLL_N 2
  838. #define QGEMM_DEFAULT_UNROLL_N 2
  839. #define CGEMM_DEFAULT_UNROLL_N 1
  840. #define ZGEMM_DEFAULT_UNROLL_N 1
  841. #define XGEMM_DEFAULT_UNROLL_N 1
  842. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  843. #else
  844. #define SGEMM_DEFAULT_UNROLL_M 8
  845. #define DGEMM_DEFAULT_UNROLL_M 4
  846. #define QGEMM_DEFAULT_UNROLL_M 2
  847. #define CGEMM_DEFAULT_UNROLL_M 4
  848. #define ZGEMM_DEFAULT_UNROLL_M 2
  849. #define XGEMM_DEFAULT_UNROLL_M 1
  850. #define SGEMM_DEFAULT_UNROLL_N 4
  851. #define DGEMM_DEFAULT_UNROLL_N 4
  852. #define QGEMM_DEFAULT_UNROLL_N 2
  853. #define CGEMM_DEFAULT_UNROLL_N 2
  854. #define ZGEMM_DEFAULT_UNROLL_N 2
  855. #define XGEMM_DEFAULT_UNROLL_N 1
  856. #endif
  857. #define SGEMM_DEFAULT_P sgemm_p
  858. #define SGEMM_DEFAULT_R sgemm_r
  859. #define DGEMM_DEFAULT_P dgemm_p
  860. #define DGEMM_DEFAULT_R dgemm_r
  861. #define QGEMM_DEFAULT_P qgemm_p
  862. #define QGEMM_DEFAULT_R qgemm_r
  863. #define CGEMM_DEFAULT_P cgemm_p
  864. #define CGEMM_DEFAULT_R cgemm_r
  865. #define ZGEMM_DEFAULT_P zgemm_p
  866. #define ZGEMM_DEFAULT_R zgemm_r
  867. #define XGEMM_DEFAULT_P xgemm_p
  868. #define XGEMM_DEFAULT_R xgemm_r
  869. #define SGEMM_DEFAULT_Q 256
  870. #define DGEMM_DEFAULT_Q 256
  871. #define QGEMM_DEFAULT_Q 256
  872. #define CGEMM_DEFAULT_Q 256
  873. #define ZGEMM_DEFAULT_Q 256
  874. #define XGEMM_DEFAULT_Q 256
  875. #endif
  876. #ifdef PENRYN
  877. #define SNUMOPT 8
  878. #define DNUMOPT 4
  879. #define GEMM_DEFAULT_OFFSET_A 128
  880. #define GEMM_DEFAULT_OFFSET_B 0
  881. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  882. #define SYMV_P 8
  883. #define SWITCH_RATIO 4
  884. #ifdef ARCH_X86
  885. #define SGEMM_DEFAULT_UNROLL_M 4
  886. #define DGEMM_DEFAULT_UNROLL_M 2
  887. #define QGEMM_DEFAULT_UNROLL_M 2
  888. #define CGEMM_DEFAULT_UNROLL_M 2
  889. #define ZGEMM_DEFAULT_UNROLL_M 1
  890. #define XGEMM_DEFAULT_UNROLL_M 1
  891. #define SGEMM_DEFAULT_UNROLL_N 4
  892. #define DGEMM_DEFAULT_UNROLL_N 4
  893. #define QGEMM_DEFAULT_UNROLL_N 2
  894. #define CGEMM_DEFAULT_UNROLL_N 2
  895. #define ZGEMM_DEFAULT_UNROLL_N 2
  896. #define XGEMM_DEFAULT_UNROLL_N 1
  897. #else
  898. #define SGEMM_DEFAULT_UNROLL_M 8
  899. #define DGEMM_DEFAULT_UNROLL_M 4
  900. #define QGEMM_DEFAULT_UNROLL_M 2
  901. #define CGEMM_DEFAULT_UNROLL_M 4
  902. #define ZGEMM_DEFAULT_UNROLL_M 2
  903. #define XGEMM_DEFAULT_UNROLL_M 1
  904. #define SGEMM_DEFAULT_UNROLL_N 4
  905. #define DGEMM_DEFAULT_UNROLL_N 4
  906. #define QGEMM_DEFAULT_UNROLL_N 2
  907. #define CGEMM_DEFAULT_UNROLL_N 2
  908. #define ZGEMM_DEFAULT_UNROLL_N 2
  909. #define XGEMM_DEFAULT_UNROLL_N 1
  910. #endif
  911. #define SGEMM_DEFAULT_P sgemm_p
  912. #define SGEMM_DEFAULT_R sgemm_r
  913. #define DGEMM_DEFAULT_P dgemm_p
  914. #define DGEMM_DEFAULT_R dgemm_r
  915. #define QGEMM_DEFAULT_P qgemm_p
  916. #define QGEMM_DEFAULT_R qgemm_r
  917. #define CGEMM_DEFAULT_P cgemm_p
  918. #define CGEMM_DEFAULT_R cgemm_r
  919. #define ZGEMM_DEFAULT_P zgemm_p
  920. #define ZGEMM_DEFAULT_R zgemm_r
  921. #define XGEMM_DEFAULT_P xgemm_p
  922. #define XGEMM_DEFAULT_R xgemm_r
  923. #define SGEMM_DEFAULT_Q 512
  924. #define DGEMM_DEFAULT_Q 256
  925. #define QGEMM_DEFAULT_Q 128
  926. #define CGEMM_DEFAULT_Q 512
  927. #define ZGEMM_DEFAULT_Q 256
  928. #define XGEMM_DEFAULT_Q 128
  929. #define GETRF_FACTOR 0.75
  930. #endif
  931. #ifdef DUNNINGTON
  932. #define SNUMOPT 8
  933. #define DNUMOPT 4
  934. #define GEMM_DEFAULT_OFFSET_A 128
  935. #define GEMM_DEFAULT_OFFSET_B 0
  936. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  937. #define SYMV_P 8
  938. #define SWITCH_RATIO 4
  939. #ifdef ARCH_X86
  940. #define SGEMM_DEFAULT_UNROLL_M 4
  941. #define DGEMM_DEFAULT_UNROLL_M 2
  942. #define QGEMM_DEFAULT_UNROLL_M 2
  943. #define CGEMM_DEFAULT_UNROLL_M 2
  944. #define ZGEMM_DEFAULT_UNROLL_M 1
  945. #define XGEMM_DEFAULT_UNROLL_M 1
  946. #define SGEMM_DEFAULT_UNROLL_N 4
  947. #define DGEMM_DEFAULT_UNROLL_N 4
  948. #define QGEMM_DEFAULT_UNROLL_N 2
  949. #define CGEMM_DEFAULT_UNROLL_N 2
  950. #define ZGEMM_DEFAULT_UNROLL_N 2
  951. #define XGEMM_DEFAULT_UNROLL_N 1
  952. #else
  953. #define SGEMM_DEFAULT_UNROLL_M 8
  954. #define DGEMM_DEFAULT_UNROLL_M 4
  955. #define QGEMM_DEFAULT_UNROLL_M 2
  956. #define CGEMM_DEFAULT_UNROLL_M 4
  957. #define ZGEMM_DEFAULT_UNROLL_M 2
  958. #define XGEMM_DEFAULT_UNROLL_M 1
  959. #define SGEMM_DEFAULT_UNROLL_N 4
  960. #define DGEMM_DEFAULT_UNROLL_N 4
  961. #define QGEMM_DEFAULT_UNROLL_N 2
  962. #define CGEMM_DEFAULT_UNROLL_N 2
  963. #define ZGEMM_DEFAULT_UNROLL_N 2
  964. #define XGEMM_DEFAULT_UNROLL_N 1
  965. #endif
  966. #define SGEMM_DEFAULT_P sgemm_p
  967. #define SGEMM_DEFAULT_R sgemm_r
  968. #define DGEMM_DEFAULT_P dgemm_p
  969. #define DGEMM_DEFAULT_R dgemm_r
  970. #define QGEMM_DEFAULT_P qgemm_p
  971. #define QGEMM_DEFAULT_R qgemm_r
  972. #define CGEMM_DEFAULT_P cgemm_p
  973. #define CGEMM_DEFAULT_R cgemm_r
  974. #define ZGEMM_DEFAULT_P zgemm_p
  975. #define ZGEMM_DEFAULT_R zgemm_r
  976. #define XGEMM_DEFAULT_P xgemm_p
  977. #define XGEMM_DEFAULT_R xgemm_r
  978. #define SGEMM_DEFAULT_Q 768
  979. #define DGEMM_DEFAULT_Q 384
  980. #define QGEMM_DEFAULT_Q 192
  981. #define CGEMM_DEFAULT_Q 768
  982. #define ZGEMM_DEFAULT_Q 384
  983. #define XGEMM_DEFAULT_Q 192
  984. #define GETRF_FACTOR 0.75
  985. #define GEMM_THREAD gemm_thread_mn
  986. #endif
  987. #ifdef NEHALEM
  988. #define SNUMOPT 8
  989. #define DNUMOPT 4
  990. #define GEMM_DEFAULT_OFFSET_A 32
  991. #define GEMM_DEFAULT_OFFSET_B 0
  992. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  993. #define SYMV_P 8
  994. #define SWITCH_RATIO 4
  995. #ifdef ARCH_X86
  996. #define SGEMM_DEFAULT_UNROLL_M 4
  997. #define DGEMM_DEFAULT_UNROLL_M 2
  998. #define QGEMM_DEFAULT_UNROLL_M 2
  999. #define CGEMM_DEFAULT_UNROLL_M 2
  1000. #define ZGEMM_DEFAULT_UNROLL_M 1
  1001. #define XGEMM_DEFAULT_UNROLL_M 1
  1002. #define SGEMM_DEFAULT_UNROLL_N 4
  1003. #define DGEMM_DEFAULT_UNROLL_N 4
  1004. #define QGEMM_DEFAULT_UNROLL_N 2
  1005. #define CGEMM_DEFAULT_UNROLL_N 2
  1006. #define ZGEMM_DEFAULT_UNROLL_N 2
  1007. #define XGEMM_DEFAULT_UNROLL_N 1
  1008. #else
  1009. #define SGEMM_DEFAULT_UNROLL_M 4
  1010. #define DGEMM_DEFAULT_UNROLL_M 2
  1011. #define QGEMM_DEFAULT_UNROLL_M 2
  1012. #define CGEMM_DEFAULT_UNROLL_M 2
  1013. #define ZGEMM_DEFAULT_UNROLL_M 1
  1014. #define XGEMM_DEFAULT_UNROLL_M 1
  1015. #define SGEMM_DEFAULT_UNROLL_N 8
  1016. #define DGEMM_DEFAULT_UNROLL_N 8
  1017. #define QGEMM_DEFAULT_UNROLL_N 2
  1018. #define CGEMM_DEFAULT_UNROLL_N 4
  1019. #define ZGEMM_DEFAULT_UNROLL_N 4
  1020. #define XGEMM_DEFAULT_UNROLL_N 1
  1021. #endif
  1022. #define SGEMM_DEFAULT_P 504
  1023. #define SGEMM_DEFAULT_R sgemm_r
  1024. #define DGEMM_DEFAULT_P 504
  1025. #define DGEMM_DEFAULT_R dgemm_r
  1026. #define QGEMM_DEFAULT_P 504
  1027. #define QGEMM_DEFAULT_R qgemm_r
  1028. #define CGEMM_DEFAULT_P 252
  1029. #define CGEMM_DEFAULT_R cgemm_r
  1030. #define ZGEMM_DEFAULT_P 252
  1031. #define ZGEMM_DEFAULT_R zgemm_r
  1032. #define XGEMM_DEFAULT_P 252
  1033. #define XGEMM_DEFAULT_R xgemm_r
  1034. #define SGEMM_DEFAULT_Q 512
  1035. #define DGEMM_DEFAULT_Q 256
  1036. #define QGEMM_DEFAULT_Q 128
  1037. #define CGEMM_DEFAULT_Q 512
  1038. #define ZGEMM_DEFAULT_Q 256
  1039. #define XGEMM_DEFAULT_Q 128
  1040. #define GETRF_FACTOR 0.72
  1041. #endif
  1042. #ifdef SANDYBRIDGE
  1043. #define SNUMOPT 8
  1044. #define DNUMOPT 4
  1045. #define GEMM_DEFAULT_OFFSET_A 0
  1046. #define GEMM_DEFAULT_OFFSET_B 0
  1047. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1048. #define SYMV_P 8
  1049. #define SWITCH_RATIO 4
  1050. #ifdef ARCH_X86
  1051. #define SGEMM_DEFAULT_UNROLL_M 4
  1052. #define DGEMM_DEFAULT_UNROLL_M 2
  1053. #define QGEMM_DEFAULT_UNROLL_M 2
  1054. #define CGEMM_DEFAULT_UNROLL_M 2
  1055. #define ZGEMM_DEFAULT_UNROLL_M 1
  1056. #define XGEMM_DEFAULT_UNROLL_M 1
  1057. #define SGEMM_DEFAULT_UNROLL_N 4
  1058. #define DGEMM_DEFAULT_UNROLL_N 4
  1059. #define QGEMM_DEFAULT_UNROLL_N 2
  1060. #define CGEMM_DEFAULT_UNROLL_N 2
  1061. #define ZGEMM_DEFAULT_UNROLL_N 2
  1062. #define XGEMM_DEFAULT_UNROLL_N 1
  1063. #else
  1064. #define SGEMM_DEFAULT_UNROLL_M 16
  1065. #define DGEMM_DEFAULT_UNROLL_M 8
  1066. #define QGEMM_DEFAULT_UNROLL_M 2
  1067. #define CGEMM_DEFAULT_UNROLL_M 8
  1068. #define ZGEMM_DEFAULT_UNROLL_M 1
  1069. #define XGEMM_DEFAULT_UNROLL_M 1
  1070. #define SGEMM_DEFAULT_UNROLL_N 4
  1071. #define DGEMM_DEFAULT_UNROLL_N 4
  1072. #define QGEMM_DEFAULT_UNROLL_N 2
  1073. #define CGEMM_DEFAULT_UNROLL_N 2
  1074. #define ZGEMM_DEFAULT_UNROLL_N 4
  1075. #define XGEMM_DEFAULT_UNROLL_N 1
  1076. #endif
  1077. #define SGEMM_DEFAULT_P 768
  1078. #define SGEMM_DEFAULT_R sgemm_r
  1079. //#define SGEMM_DEFAULT_R 1024
  1080. #define DGEMM_DEFAULT_P 512
  1081. #define DGEMM_DEFAULT_R dgemm_r
  1082. //#define DGEMM_DEFAULT_R 1024
  1083. #define QGEMM_DEFAULT_P 504
  1084. #define QGEMM_DEFAULT_R qgemm_r
  1085. #define CGEMM_DEFAULT_P 768
  1086. #define CGEMM_DEFAULT_R cgemm_r
  1087. //#define CGEMM_DEFAULT_R 1024
  1088. #define ZGEMM_DEFAULT_P 512
  1089. #define ZGEMM_DEFAULT_R zgemm_r
  1090. //#define ZGEMM_DEFAULT_R 1024
  1091. #define XGEMM_DEFAULT_P 252
  1092. #define XGEMM_DEFAULT_R xgemm_r
  1093. #define SGEMM_DEFAULT_Q 384
  1094. #define DGEMM_DEFAULT_Q 256
  1095. #define QGEMM_DEFAULT_Q 128
  1096. #define CGEMM_DEFAULT_Q 512
  1097. #define ZGEMM_DEFAULT_Q 192
  1098. #define XGEMM_DEFAULT_Q 128
  1099. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1100. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1101. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1102. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1103. #define CGEMM3M_DEFAULT_P 448
  1104. #define ZGEMM3M_DEFAULT_P 224
  1105. #define XGEMM3M_DEFAULT_P 112
  1106. #define CGEMM3M_DEFAULT_Q 224
  1107. #define ZGEMM3M_DEFAULT_Q 224
  1108. #define XGEMM3M_DEFAULT_Q 224
  1109. #define CGEMM3M_DEFAULT_R 12288
  1110. #define ZGEMM3M_DEFAULT_R 12288
  1111. #define XGEMM3M_DEFAULT_R 12288
  1112. #define GETRF_FACTOR 0.72
  1113. #endif
  1114. #ifdef HASWELL
  1115. #define SNUMOPT 16
  1116. #define DNUMOPT 8
  1117. #define GEMM_DEFAULT_OFFSET_A 0
  1118. #define GEMM_DEFAULT_OFFSET_B 0
  1119. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1120. #define SYMV_P 8
  1121. #define SWITCH_RATIO 4
  1122. #ifdef ARCH_X86
  1123. #define SGEMM_DEFAULT_UNROLL_M 4
  1124. #define DGEMM_DEFAULT_UNROLL_M 2
  1125. #define QGEMM_DEFAULT_UNROLL_M 2
  1126. #define CGEMM_DEFAULT_UNROLL_M 2
  1127. #define ZGEMM_DEFAULT_UNROLL_M 1
  1128. #define XGEMM_DEFAULT_UNROLL_M 1
  1129. #define SGEMM_DEFAULT_UNROLL_N 4
  1130. #define DGEMM_DEFAULT_UNROLL_N 4
  1131. #define QGEMM_DEFAULT_UNROLL_N 2
  1132. #define CGEMM_DEFAULT_UNROLL_N 2
  1133. #define ZGEMM_DEFAULT_UNROLL_N 2
  1134. #define XGEMM_DEFAULT_UNROLL_N 1
  1135. #else
  1136. #define SGEMM_DEFAULT_UNROLL_M 16
  1137. #define DGEMM_DEFAULT_UNROLL_M 4
  1138. #define QGEMM_DEFAULT_UNROLL_M 2
  1139. #define CGEMM_DEFAULT_UNROLL_M 8
  1140. #define ZGEMM_DEFAULT_UNROLL_M 4
  1141. #define XGEMM_DEFAULT_UNROLL_M 1
  1142. #define SGEMM_DEFAULT_UNROLL_N 4
  1143. #define DGEMM_DEFAULT_UNROLL_N 8
  1144. #define QGEMM_DEFAULT_UNROLL_N 2
  1145. #define CGEMM_DEFAULT_UNROLL_N 2
  1146. #define ZGEMM_DEFAULT_UNROLL_N 2
  1147. #define XGEMM_DEFAULT_UNROLL_N 1
  1148. #define DGEMM_DEFAULT_UNROLL_MN 16
  1149. #endif
  1150. #ifdef ARCH_X86
  1151. #define SGEMM_DEFAULT_P 512
  1152. #define SGEMM_DEFAULT_R sgemm_r
  1153. #define DGEMM_DEFAULT_P 512
  1154. #define DGEMM_DEFAULT_R dgemm_r
  1155. #define QGEMM_DEFAULT_P 504
  1156. #define QGEMM_DEFAULT_R qgemm_r
  1157. #define CGEMM_DEFAULT_P 128
  1158. #define CGEMM_DEFAULT_R 1024
  1159. #define ZGEMM_DEFAULT_P 512
  1160. #define ZGEMM_DEFAULT_R zgemm_r
  1161. #define XGEMM_DEFAULT_P 252
  1162. #define XGEMM_DEFAULT_R xgemm_r
  1163. #define SGEMM_DEFAULT_Q 256
  1164. #define DGEMM_DEFAULT_Q 256
  1165. #define QGEMM_DEFAULT_Q 128
  1166. #define CGEMM_DEFAULT_Q 256
  1167. #define ZGEMM_DEFAULT_Q 192
  1168. #define XGEMM_DEFAULT_Q 128
  1169. #else
  1170. #define SGEMM_DEFAULT_P 768
  1171. #define DGEMM_DEFAULT_P 512
  1172. #define CGEMM_DEFAULT_P 384
  1173. #define ZGEMM_DEFAULT_P 256
  1174. #ifdef WINDOWS_ABI
  1175. #define SGEMM_DEFAULT_Q 320
  1176. #define DGEMM_DEFAULT_Q 128
  1177. #else
  1178. #define SGEMM_DEFAULT_Q 384
  1179. #define DGEMM_DEFAULT_Q 256
  1180. #endif
  1181. #define CGEMM_DEFAULT_Q 192
  1182. #define ZGEMM_DEFAULT_Q 128
  1183. #define SGEMM_DEFAULT_R sgemm_r
  1184. #define DGEMM_DEFAULT_R 13824
  1185. #define CGEMM_DEFAULT_R cgemm_r
  1186. #define ZGEMM_DEFAULT_R zgemm_r
  1187. #define QGEMM_DEFAULT_Q 128
  1188. #define QGEMM_DEFAULT_P 504
  1189. #define QGEMM_DEFAULT_R qgemm_r
  1190. #define XGEMM_DEFAULT_P 252
  1191. #define XGEMM_DEFAULT_R xgemm_r
  1192. #define XGEMM_DEFAULT_Q 128
  1193. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1194. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1195. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1196. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1197. #define CGEMM3M_DEFAULT_P 448
  1198. #define ZGEMM3M_DEFAULT_P 224
  1199. #define XGEMM3M_DEFAULT_P 112
  1200. #define CGEMM3M_DEFAULT_Q 224
  1201. #define ZGEMM3M_DEFAULT_Q 224
  1202. #define XGEMM3M_DEFAULT_Q 224
  1203. #define CGEMM3M_DEFAULT_R 12288
  1204. #define ZGEMM3M_DEFAULT_R 12288
  1205. #define XGEMM3M_DEFAULT_R 12288
  1206. #endif
  1207. #endif
  1208. #ifdef ATOM
  1209. #define SNUMOPT 2
  1210. #define DNUMOPT 1
  1211. #define GEMM_DEFAULT_OFFSET_A 64
  1212. #define GEMM_DEFAULT_OFFSET_B 0
  1213. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1214. #define SYMV_P 8
  1215. #ifdef ARCH_X86
  1216. #define SGEMM_DEFAULT_UNROLL_M 4
  1217. #define DGEMM_DEFAULT_UNROLL_M 2
  1218. #define QGEMM_DEFAULT_UNROLL_M 2
  1219. #define CGEMM_DEFAULT_UNROLL_M 2
  1220. #define ZGEMM_DEFAULT_UNROLL_M 1
  1221. #define XGEMM_DEFAULT_UNROLL_M 1
  1222. #else
  1223. #define SGEMM_DEFAULT_UNROLL_M 8
  1224. #define DGEMM_DEFAULT_UNROLL_M 4
  1225. #define QGEMM_DEFAULT_UNROLL_M 2
  1226. #define CGEMM_DEFAULT_UNROLL_M 4
  1227. #define ZGEMM_DEFAULT_UNROLL_M 2
  1228. #define XGEMM_DEFAULT_UNROLL_M 1
  1229. #endif
  1230. #define SGEMM_DEFAULT_UNROLL_N 4
  1231. #define DGEMM_DEFAULT_UNROLL_N 2
  1232. #define QGEMM_DEFAULT_UNROLL_N 2
  1233. #define CGEMM_DEFAULT_UNROLL_N 2
  1234. #define ZGEMM_DEFAULT_UNROLL_N 1
  1235. #define XGEMM_DEFAULT_UNROLL_N 1
  1236. #define SGEMM_DEFAULT_P sgemm_p
  1237. #define SGEMM_DEFAULT_R sgemm_r
  1238. #define DGEMM_DEFAULT_P dgemm_p
  1239. #define DGEMM_DEFAULT_R dgemm_r
  1240. #define QGEMM_DEFAULT_P qgemm_p
  1241. #define QGEMM_DEFAULT_R qgemm_r
  1242. #define CGEMM_DEFAULT_P cgemm_p
  1243. #define CGEMM_DEFAULT_R cgemm_r
  1244. #define ZGEMM_DEFAULT_P zgemm_p
  1245. #define ZGEMM_DEFAULT_R zgemm_r
  1246. #define XGEMM_DEFAULT_P xgemm_p
  1247. #define XGEMM_DEFAULT_R xgemm_r
  1248. #define SGEMM_DEFAULT_Q 256
  1249. #define DGEMM_DEFAULT_Q 256
  1250. #define QGEMM_DEFAULT_Q 256
  1251. #define CGEMM_DEFAULT_Q 256
  1252. #define ZGEMM_DEFAULT_Q 256
  1253. #define XGEMM_DEFAULT_Q 256
  1254. #endif
  1255. #ifdef ITANIUM2
  1256. #define SNUMOPT 4
  1257. #define DNUMOPT 4
  1258. #define GEMM_DEFAULT_OFFSET_A 0
  1259. #define GEMM_DEFAULT_OFFSET_B 128
  1260. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1261. #define SGEMM_DEFAULT_UNROLL_M 8
  1262. #define SGEMM_DEFAULT_UNROLL_N 8
  1263. #define DGEMM_DEFAULT_UNROLL_M 8
  1264. #define DGEMM_DEFAULT_UNROLL_N 8
  1265. #define QGEMM_DEFAULT_UNROLL_M 8
  1266. #define QGEMM_DEFAULT_UNROLL_N 8
  1267. #define CGEMM_DEFAULT_UNROLL_M 4
  1268. #define CGEMM_DEFAULT_UNROLL_N 4
  1269. #define ZGEMM_DEFAULT_UNROLL_M 4
  1270. #define ZGEMM_DEFAULT_UNROLL_N 4
  1271. #define XGEMM_DEFAULT_UNROLL_M 4
  1272. #define XGEMM_DEFAULT_UNROLL_N 4
  1273. #define SGEMM_DEFAULT_P sgemm_p
  1274. #define DGEMM_DEFAULT_P dgemm_p
  1275. #define QGEMM_DEFAULT_P qgemm_p
  1276. #define CGEMM_DEFAULT_P cgemm_p
  1277. #define ZGEMM_DEFAULT_P zgemm_p
  1278. #define XGEMM_DEFAULT_P xgemm_p
  1279. #define SGEMM_DEFAULT_Q 1024
  1280. #define DGEMM_DEFAULT_Q 1024
  1281. #define QGEMM_DEFAULT_Q 1024
  1282. #define CGEMM_DEFAULT_Q 1024
  1283. #define ZGEMM_DEFAULT_Q 1024
  1284. #define XGEMM_DEFAULT_Q 1024
  1285. #define SGEMM_DEFAULT_R sgemm_r
  1286. #define DGEMM_DEFAULT_R dgemm_r
  1287. #define QGEMM_DEFAULT_R qgemm_r
  1288. #define CGEMM_DEFAULT_R cgemm_r
  1289. #define ZGEMM_DEFAULT_R zgemm_r
  1290. #define XGEMM_DEFAULT_R xgemm_r
  1291. #define SYMV_P 16
  1292. #define GETRF_FACTOR 0.65
  1293. #endif
  1294. #if defined(EV4) || defined(EV5) || defined(EV6)
  1295. #ifdef EV4
  1296. #define SNUMOPT 1
  1297. #define DNUMOPT 1
  1298. #else
  1299. #define SNUMOPT 2
  1300. #define DNUMOPT 2
  1301. #endif
  1302. #define GEMM_DEFAULT_OFFSET_A 512
  1303. #define GEMM_DEFAULT_OFFSET_B 512
  1304. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1305. #define SGEMM_DEFAULT_UNROLL_M 4
  1306. #define SGEMM_DEFAULT_UNROLL_N 4
  1307. #define DGEMM_DEFAULT_UNROLL_M 4
  1308. #define DGEMM_DEFAULT_UNROLL_N 4
  1309. #define CGEMM_DEFAULT_UNROLL_M 2
  1310. #define CGEMM_DEFAULT_UNROLL_N 2
  1311. #define ZGEMM_DEFAULT_UNROLL_M 2
  1312. #define ZGEMM_DEFAULT_UNROLL_N 2
  1313. #define SYMV_P 8
  1314. #ifdef EV4
  1315. #define SGEMM_DEFAULT_P 32
  1316. #define SGEMM_DEFAULT_Q 112
  1317. #define SGEMM_DEFAULT_R 256
  1318. #define DGEMM_DEFAULT_P 32
  1319. #define DGEMM_DEFAULT_Q 56
  1320. #define DGEMM_DEFAULT_R 256
  1321. #define CGEMM_DEFAULT_P 32
  1322. #define CGEMM_DEFAULT_Q 64
  1323. #define CGEMM_DEFAULT_R 240
  1324. #define ZGEMM_DEFAULT_P 32
  1325. #define ZGEMM_DEFAULT_Q 32
  1326. #define ZGEMM_DEFAULT_R 240
  1327. #endif
  1328. #ifdef EV5
  1329. #define SGEMM_DEFAULT_P 64
  1330. #define SGEMM_DEFAULT_Q 256
  1331. #define DGEMM_DEFAULT_P 64
  1332. #define DGEMM_DEFAULT_Q 128
  1333. #define CGEMM_DEFAULT_P 64
  1334. #define CGEMM_DEFAULT_Q 128
  1335. #define ZGEMM_DEFAULT_P 64
  1336. #define ZGEMM_DEFAULT_Q 64
  1337. #endif
  1338. #ifdef EV6
  1339. #define SGEMM_DEFAULT_P 256
  1340. #define SGEMM_DEFAULT_Q 512
  1341. #define DGEMM_DEFAULT_P 256
  1342. #define DGEMM_DEFAULT_Q 256
  1343. #define CGEMM_DEFAULT_P 256
  1344. #define CGEMM_DEFAULT_Q 256
  1345. #define ZGEMM_DEFAULT_P 128
  1346. #define ZGEMM_DEFAULT_Q 256
  1347. #endif
  1348. #endif
  1349. #ifdef CELL
  1350. #define SNUMOPT 2
  1351. #define DNUMOPT 2
  1352. #define GEMM_DEFAULT_OFFSET_A 0
  1353. #define GEMM_DEFAULT_OFFSET_B 8192
  1354. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1355. #define SGEMM_DEFAULT_UNROLL_M 16
  1356. #define SGEMM_DEFAULT_UNROLL_N 4
  1357. #define DGEMM_DEFAULT_UNROLL_M 4
  1358. #define DGEMM_DEFAULT_UNROLL_N 4
  1359. #define CGEMM_DEFAULT_UNROLL_M 8
  1360. #define CGEMM_DEFAULT_UNROLL_N 2
  1361. #define ZGEMM_DEFAULT_UNROLL_M 2
  1362. #define ZGEMM_DEFAULT_UNROLL_N 2
  1363. #define SGEMM_DEFAULT_P 128
  1364. #define DGEMM_DEFAULT_P 128
  1365. #define CGEMM_DEFAULT_P 128
  1366. #define ZGEMM_DEFAULT_P 128
  1367. #define SGEMM_DEFAULT_Q 512
  1368. #define DGEMM_DEFAULT_Q 256
  1369. #define CGEMM_DEFAULT_Q 256
  1370. #define ZGEMM_DEFAULT_Q 128
  1371. #define SYMV_P 4
  1372. #endif
  1373. #ifdef PPCG4
  1374. #define GEMM_DEFAULT_OFFSET_A 0
  1375. #define GEMM_DEFAULT_OFFSET_B 1024
  1376. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1377. #define SGEMM_DEFAULT_UNROLL_M 16
  1378. #define SGEMM_DEFAULT_UNROLL_N 4
  1379. #define DGEMM_DEFAULT_UNROLL_M 4
  1380. #define DGEMM_DEFAULT_UNROLL_N 4
  1381. #define CGEMM_DEFAULT_UNROLL_M 8
  1382. #define CGEMM_DEFAULT_UNROLL_N 2
  1383. #define ZGEMM_DEFAULT_UNROLL_M 2
  1384. #define ZGEMM_DEFAULT_UNROLL_N 2
  1385. #define SGEMM_DEFAULT_P 256
  1386. #define DGEMM_DEFAULT_P 128
  1387. #define CGEMM_DEFAULT_P 128
  1388. #define ZGEMM_DEFAULT_P 64
  1389. #define SGEMM_DEFAULT_Q 256
  1390. #define DGEMM_DEFAULT_Q 256
  1391. #define CGEMM_DEFAULT_Q 256
  1392. #define ZGEMM_DEFAULT_Q 256
  1393. #define SYMV_P 4
  1394. #endif
  1395. #ifdef PPC970
  1396. #define SNUMOPT 4
  1397. #define DNUMOPT 4
  1398. #define GEMM_DEFAULT_OFFSET_A 2688
  1399. #define GEMM_DEFAULT_OFFSET_B 3072
  1400. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1401. #define SGEMM_DEFAULT_UNROLL_M 16
  1402. #define SGEMM_DEFAULT_UNROLL_N 4
  1403. #define DGEMM_DEFAULT_UNROLL_M 4
  1404. #define DGEMM_DEFAULT_UNROLL_N 4
  1405. #define CGEMM_DEFAULT_UNROLL_M 8
  1406. #define CGEMM_DEFAULT_UNROLL_N 2
  1407. #define ZGEMM_DEFAULT_UNROLL_M 2
  1408. #define ZGEMM_DEFAULT_UNROLL_N 2
  1409. #ifdef OS_LINUX
  1410. #if L2_SIZE == 1024976
  1411. #define SGEMM_DEFAULT_P 320
  1412. #define DGEMM_DEFAULT_P 256
  1413. #define CGEMM_DEFAULT_P 256
  1414. #define ZGEMM_DEFAULT_P 256
  1415. #else
  1416. #define SGEMM_DEFAULT_P 176
  1417. #define DGEMM_DEFAULT_P 176
  1418. #define CGEMM_DEFAULT_P 176
  1419. #define ZGEMM_DEFAULT_P 176
  1420. #endif
  1421. #endif
  1422. #define SGEMM_DEFAULT_Q 512
  1423. #define DGEMM_DEFAULT_Q 256
  1424. #define CGEMM_DEFAULT_Q 256
  1425. #define ZGEMM_DEFAULT_Q 128
  1426. #define SYMV_P 4
  1427. #endif
  1428. #ifdef PPC440
  1429. #define SNUMOPT 2
  1430. #define DNUMOPT 2
  1431. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1432. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1433. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1434. #define SGEMM_DEFAULT_UNROLL_M 4
  1435. #define SGEMM_DEFAULT_UNROLL_N 4
  1436. #define DGEMM_DEFAULT_UNROLL_M 4
  1437. #define DGEMM_DEFAULT_UNROLL_N 4
  1438. #define CGEMM_DEFAULT_UNROLL_M 2
  1439. #define CGEMM_DEFAULT_UNROLL_N 2
  1440. #define ZGEMM_DEFAULT_UNROLL_M 2
  1441. #define ZGEMM_DEFAULT_UNROLL_N 2
  1442. #define SGEMM_DEFAULT_P 512
  1443. #define DGEMM_DEFAULT_P 512
  1444. #define CGEMM_DEFAULT_P 512
  1445. #define ZGEMM_DEFAULT_P 512
  1446. #define SGEMM_DEFAULT_Q 1024
  1447. #define DGEMM_DEFAULT_Q 512
  1448. #define CGEMM_DEFAULT_Q 512
  1449. #define ZGEMM_DEFAULT_Q 256
  1450. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1451. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1452. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1453. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1454. #define SYMV_P 4
  1455. #endif
  1456. #ifdef PPC440FP2
  1457. #define SNUMOPT 4
  1458. #define DNUMOPT 4
  1459. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1460. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1461. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1462. #define SGEMM_DEFAULT_UNROLL_M 8
  1463. #define SGEMM_DEFAULT_UNROLL_N 4
  1464. #define DGEMM_DEFAULT_UNROLL_M 8
  1465. #define DGEMM_DEFAULT_UNROLL_N 4
  1466. #define CGEMM_DEFAULT_UNROLL_M 4
  1467. #define CGEMM_DEFAULT_UNROLL_N 2
  1468. #define ZGEMM_DEFAULT_UNROLL_M 4
  1469. #define ZGEMM_DEFAULT_UNROLL_N 2
  1470. #define SGEMM_DEFAULT_P 128
  1471. #define DGEMM_DEFAULT_P 128
  1472. #define CGEMM_DEFAULT_P 128
  1473. #define ZGEMM_DEFAULT_P 128
  1474. #if 1
  1475. #define SGEMM_DEFAULT_Q 4096
  1476. #define DGEMM_DEFAULT_Q 3072
  1477. #define CGEMM_DEFAULT_Q 2048
  1478. #define ZGEMM_DEFAULT_Q 1024
  1479. #else
  1480. #define SGEMM_DEFAULT_Q 512
  1481. #define DGEMM_DEFAULT_Q 256
  1482. #define CGEMM_DEFAULT_Q 256
  1483. #define ZGEMM_DEFAULT_Q 128
  1484. #endif
  1485. #define SYMV_P 4
  1486. #endif
  1487. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1488. #define GEMM_DEFAULT_OFFSET_A 0
  1489. #define GEMM_DEFAULT_OFFSET_B 2048
  1490. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1491. #define SGEMM_DEFAULT_UNROLL_M 4
  1492. #define SGEMM_DEFAULT_UNROLL_N 4
  1493. #define DGEMM_DEFAULT_UNROLL_M 4
  1494. #define DGEMM_DEFAULT_UNROLL_N 4
  1495. #define CGEMM_DEFAULT_UNROLL_M 2
  1496. #define CGEMM_DEFAULT_UNROLL_N 2
  1497. #define ZGEMM_DEFAULT_UNROLL_M 2
  1498. #define ZGEMM_DEFAULT_UNROLL_N 2
  1499. #ifdef POWER3
  1500. #define SNUMOPT 4
  1501. #define DNUMOPT 4
  1502. #define SGEMM_DEFAULT_P 256
  1503. #define SGEMM_DEFAULT_Q 432
  1504. #define SGEMM_DEFAULT_R 1012
  1505. #define DGEMM_DEFAULT_P 256
  1506. #define DGEMM_DEFAULT_Q 216
  1507. #define DGEMM_DEFAULT_R 1012
  1508. #define ZGEMM_DEFAULT_P 256
  1509. #define ZGEMM_DEFAULT_Q 104
  1510. #define ZGEMM_DEFAULT_R 1012
  1511. #endif
  1512. #if defined(POWER4)
  1513. #ifdef ALLOC_HUGETLB
  1514. #define SGEMM_DEFAULT_P 184
  1515. #define DGEMM_DEFAULT_P 184
  1516. #define CGEMM_DEFAULT_P 184
  1517. #define ZGEMM_DEFAULT_P 184
  1518. #else
  1519. #define SGEMM_DEFAULT_P 144
  1520. #define DGEMM_DEFAULT_P 144
  1521. #define CGEMM_DEFAULT_P 144
  1522. #define ZGEMM_DEFAULT_P 144
  1523. #endif
  1524. #endif
  1525. #if defined(POWER5)
  1526. #ifdef ALLOC_HUGETLB
  1527. #define SGEMM_DEFAULT_P 512
  1528. #define DGEMM_DEFAULT_P 256
  1529. #define CGEMM_DEFAULT_P 256
  1530. #define ZGEMM_DEFAULT_P 128
  1531. #else
  1532. #define SGEMM_DEFAULT_P 320
  1533. #define DGEMM_DEFAULT_P 160
  1534. #define CGEMM_DEFAULT_P 160
  1535. #define ZGEMM_DEFAULT_P 80
  1536. #endif
  1537. #define SGEMM_DEFAULT_Q 256
  1538. #define CGEMM_DEFAULT_Q 256
  1539. #define DGEMM_DEFAULT_Q 256
  1540. #define ZGEMM_DEFAULT_Q 256
  1541. #endif
  1542. #define SYMV_P 8
  1543. #endif
  1544. #if defined(POWER6)
  1545. #define SNUMOPT 4
  1546. #define DNUMOPT 4
  1547. #define GEMM_DEFAULT_OFFSET_A 384
  1548. #define GEMM_DEFAULT_OFFSET_B 1024
  1549. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1550. #define SGEMM_DEFAULT_UNROLL_M 4
  1551. #define SGEMM_DEFAULT_UNROLL_N 4
  1552. #define DGEMM_DEFAULT_UNROLL_M 4
  1553. #define DGEMM_DEFAULT_UNROLL_N 4
  1554. #define CGEMM_DEFAULT_UNROLL_M 2
  1555. #define CGEMM_DEFAULT_UNROLL_N 4
  1556. #define ZGEMM_DEFAULT_UNROLL_M 2
  1557. #define ZGEMM_DEFAULT_UNROLL_N 4
  1558. #define SGEMM_DEFAULT_P 992
  1559. #define DGEMM_DEFAULT_P 480
  1560. #define CGEMM_DEFAULT_P 488
  1561. #define ZGEMM_DEFAULT_P 248
  1562. #define SGEMM_DEFAULT_Q 504
  1563. #define DGEMM_DEFAULT_Q 504
  1564. #define CGEMM_DEFAULT_Q 400
  1565. #define ZGEMM_DEFAULT_Q 400
  1566. #define SYMV_P 8
  1567. #endif
  1568. #if defined(SPARC) && defined(V7)
  1569. #define SNUMOPT 4
  1570. #define DNUMOPT 4
  1571. #define GEMM_DEFAULT_OFFSET_A 0
  1572. #define GEMM_DEFAULT_OFFSET_B 2048
  1573. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1574. #define SGEMM_DEFAULT_UNROLL_M 2
  1575. #define SGEMM_DEFAULT_UNROLL_N 8
  1576. #define DGEMM_DEFAULT_UNROLL_M 2
  1577. #define DGEMM_DEFAULT_UNROLL_N 8
  1578. #define CGEMM_DEFAULT_UNROLL_M 1
  1579. #define CGEMM_DEFAULT_UNROLL_N 4
  1580. #define ZGEMM_DEFAULT_UNROLL_M 1
  1581. #define ZGEMM_DEFAULT_UNROLL_N 4
  1582. #define SGEMM_DEFAULT_P 256
  1583. #define DGEMM_DEFAULT_P 256
  1584. #define CGEMM_DEFAULT_P 256
  1585. #define ZGEMM_DEFAULT_P 256
  1586. #define SGEMM_DEFAULT_Q 512
  1587. #define DGEMM_DEFAULT_Q 256
  1588. #define CGEMM_DEFAULT_Q 256
  1589. #define ZGEMM_DEFAULT_Q 128
  1590. #define SYMV_P 8
  1591. #define GEMM_THREAD gemm_thread_mn
  1592. #endif
  1593. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1594. #define SNUMOPT 2
  1595. #define DNUMOPT 2
  1596. #define GEMM_DEFAULT_OFFSET_A 0
  1597. #define GEMM_DEFAULT_OFFSET_B 2048
  1598. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1599. #define SGEMM_DEFAULT_UNROLL_M 4
  1600. #define SGEMM_DEFAULT_UNROLL_N 4
  1601. #define DGEMM_DEFAULT_UNROLL_M 4
  1602. #define DGEMM_DEFAULT_UNROLL_N 4
  1603. #define CGEMM_DEFAULT_UNROLL_M 2
  1604. #define CGEMM_DEFAULT_UNROLL_N 2
  1605. #define ZGEMM_DEFAULT_UNROLL_M 2
  1606. #define ZGEMM_DEFAULT_UNROLL_N 2
  1607. #define SGEMM_DEFAULT_P 512
  1608. #define DGEMM_DEFAULT_P 512
  1609. #define CGEMM_DEFAULT_P 512
  1610. #define ZGEMM_DEFAULT_P 512
  1611. #define SGEMM_DEFAULT_Q 1024
  1612. #define DGEMM_DEFAULT_Q 512
  1613. #define CGEMM_DEFAULT_Q 512
  1614. #define ZGEMM_DEFAULT_Q 256
  1615. #define SYMV_P 8
  1616. #endif
  1617. #ifdef SICORTEX
  1618. #define SNUMOPT 2
  1619. #define DNUMOPT 2
  1620. #define GEMM_DEFAULT_OFFSET_A 0
  1621. #define GEMM_DEFAULT_OFFSET_B 0
  1622. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1623. #define SGEMM_DEFAULT_UNROLL_M 2
  1624. #define SGEMM_DEFAULT_UNROLL_N 8
  1625. #define DGEMM_DEFAULT_UNROLL_M 2
  1626. #define DGEMM_DEFAULT_UNROLL_N 8
  1627. #define CGEMM_DEFAULT_UNROLL_M 1
  1628. #define CGEMM_DEFAULT_UNROLL_N 4
  1629. #define ZGEMM_DEFAULT_UNROLL_M 1
  1630. #define ZGEMM_DEFAULT_UNROLL_N 4
  1631. #define SGEMM_DEFAULT_P 108
  1632. #define DGEMM_DEFAULT_P 112
  1633. #define CGEMM_DEFAULT_P 108
  1634. #define ZGEMM_DEFAULT_P 112
  1635. #define SGEMM_DEFAULT_Q 288
  1636. #define DGEMM_DEFAULT_Q 144
  1637. #define CGEMM_DEFAULT_Q 144
  1638. #define ZGEMM_DEFAULT_Q 72
  1639. #define SGEMM_DEFAULT_R 2000
  1640. #define DGEMM_DEFAULT_R 2000
  1641. #define CGEMM_DEFAULT_R 2000
  1642. #define ZGEMM_DEFAULT_R 2000
  1643. #define SYMV_P 16
  1644. #endif
  1645. #ifdef LOONGSON3A
  1646. ////Copy from SICORTEX
  1647. #define SNUMOPT 2
  1648. #define DNUMOPT 2
  1649. #define GEMM_DEFAULT_OFFSET_A 0
  1650. #define GEMM_DEFAULT_OFFSET_B 0
  1651. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1652. #define SGEMM_DEFAULT_UNROLL_M 8
  1653. #define SGEMM_DEFAULT_UNROLL_N 4
  1654. #define DGEMM_DEFAULT_UNROLL_M 4
  1655. #define DGEMM_DEFAULT_UNROLL_N 4
  1656. #define CGEMM_DEFAULT_UNROLL_M 4
  1657. #define CGEMM_DEFAULT_UNROLL_N 2
  1658. #define ZGEMM_DEFAULT_UNROLL_M 2
  1659. #define ZGEMM_DEFAULT_UNROLL_N 2
  1660. #define SGEMM_DEFAULT_P 64
  1661. #define DGEMM_DEFAULT_P 44
  1662. #define CGEMM_DEFAULT_P 64
  1663. #define ZGEMM_DEFAULT_P 32
  1664. #define SGEMM_DEFAULT_Q 192
  1665. #define DGEMM_DEFAULT_Q 92
  1666. #define CGEMM_DEFAULT_Q 128
  1667. #define ZGEMM_DEFAULT_Q 80
  1668. #define SGEMM_DEFAULT_R 640
  1669. #define DGEMM_DEFAULT_R dgemm_r
  1670. #define CGEMM_DEFAULT_R 640
  1671. #define ZGEMM_DEFAULT_R 640
  1672. #define GEMM_OFFSET_A1 0x10000
  1673. #define GEMM_OFFSET_B1 0x100000
  1674. #define SYMV_P 16
  1675. #endif
  1676. #ifdef LOONGSON3B
  1677. #define SNUMOPT 2
  1678. #define DNUMOPT 2
  1679. #define GEMM_DEFAULT_OFFSET_A 0
  1680. #define GEMM_DEFAULT_OFFSET_B 0
  1681. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1682. #define SGEMM_DEFAULT_UNROLL_M 2
  1683. #define SGEMM_DEFAULT_UNROLL_N 2
  1684. #define DGEMM_DEFAULT_UNROLL_M 2
  1685. #define DGEMM_DEFAULT_UNROLL_N 2
  1686. #define CGEMM_DEFAULT_UNROLL_M 2
  1687. #define CGEMM_DEFAULT_UNROLL_N 2
  1688. #define ZGEMM_DEFAULT_UNROLL_M 2
  1689. #define ZGEMM_DEFAULT_UNROLL_N 2
  1690. #define SGEMM_DEFAULT_P 64
  1691. #define DGEMM_DEFAULT_P 24
  1692. #define CGEMM_DEFAULT_P 24
  1693. #define ZGEMM_DEFAULT_P 20
  1694. #define SGEMM_DEFAULT_Q 192
  1695. #define DGEMM_DEFAULT_Q 128
  1696. #define CGEMM_DEFAULT_Q 128
  1697. #define ZGEMM_DEFAULT_Q 64
  1698. #define SGEMM_DEFAULT_R 512
  1699. #define DGEMM_DEFAULT_R 512
  1700. #define CGEMM_DEFAULT_R 512
  1701. #define ZGEMM_DEFAULT_R 512
  1702. #define GEMM_OFFSET_A1 0x10000
  1703. #define GEMM_OFFSET_B1 0x100000
  1704. #define SYMV_P 16
  1705. #endif
  1706. #ifdef ARMV7
  1707. #define SNUMOPT 2
  1708. #define DNUMOPT 2
  1709. #define GEMM_DEFAULT_OFFSET_A 0
  1710. #define GEMM_DEFAULT_OFFSET_B 0
  1711. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1712. #define SGEMM_DEFAULT_UNROLL_M 4
  1713. #define SGEMM_DEFAULT_UNROLL_N 4
  1714. #define DGEMM_DEFAULT_UNROLL_M 4
  1715. #define DGEMM_DEFAULT_UNROLL_N 4
  1716. #define CGEMM_DEFAULT_UNROLL_M 2
  1717. #define CGEMM_DEFAULT_UNROLL_N 2
  1718. #define ZGEMM_DEFAULT_UNROLL_M 2
  1719. #define ZGEMM_DEFAULT_UNROLL_N 2
  1720. #define SGEMM_DEFAULT_P 128
  1721. #define DGEMM_DEFAULT_P 128
  1722. #define CGEMM_DEFAULT_P 96
  1723. #define ZGEMM_DEFAULT_P 64
  1724. #define SGEMM_DEFAULT_Q 240
  1725. #define DGEMM_DEFAULT_Q 120
  1726. #define CGEMM_DEFAULT_Q 120
  1727. #define ZGEMM_DEFAULT_Q 120
  1728. #define SGEMM_DEFAULT_R 12288
  1729. #define DGEMM_DEFAULT_R 8192
  1730. #define CGEMM_DEFAULT_R 4096
  1731. #define ZGEMM_DEFAULT_R 4096
  1732. #define SYMV_P 16
  1733. #endif
  1734. #if defined(ARMV6)
  1735. #define SNUMOPT 2
  1736. #define DNUMOPT 2
  1737. #define GEMM_DEFAULT_OFFSET_A 0
  1738. #define GEMM_DEFAULT_OFFSET_B 0
  1739. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1740. #define SGEMM_DEFAULT_UNROLL_M 4
  1741. #define SGEMM_DEFAULT_UNROLL_N 2
  1742. #define DGEMM_DEFAULT_UNROLL_M 4
  1743. #define DGEMM_DEFAULT_UNROLL_N 2
  1744. #define CGEMM_DEFAULT_UNROLL_M 2
  1745. #define CGEMM_DEFAULT_UNROLL_N 2
  1746. #define ZGEMM_DEFAULT_UNROLL_M 2
  1747. #define ZGEMM_DEFAULT_UNROLL_N 2
  1748. #define SGEMM_DEFAULT_P 128
  1749. #define DGEMM_DEFAULT_P 128
  1750. #define CGEMM_DEFAULT_P 96
  1751. #define ZGEMM_DEFAULT_P 64
  1752. #define SGEMM_DEFAULT_Q 240
  1753. #define DGEMM_DEFAULT_Q 120
  1754. #define CGEMM_DEFAULT_Q 120
  1755. #define ZGEMM_DEFAULT_Q 120
  1756. #define SGEMM_DEFAULT_R 12288
  1757. #define DGEMM_DEFAULT_R 8192
  1758. #define CGEMM_DEFAULT_R 4096
  1759. #define ZGEMM_DEFAULT_R 4096
  1760. #define SYMV_P 16
  1761. #endif
  1762. #if defined(ARMV8)
  1763. #define SNUMOPT 2
  1764. #define DNUMOPT 2
  1765. #define GEMM_DEFAULT_OFFSET_A 0
  1766. #define GEMM_DEFAULT_OFFSET_B 0
  1767. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1768. #define SGEMM_DEFAULT_UNROLL_M 4
  1769. #define SGEMM_DEFAULT_UNROLL_N 4
  1770. #define DGEMM_DEFAULT_UNROLL_M 2
  1771. #define DGEMM_DEFAULT_UNROLL_N 2
  1772. #define CGEMM_DEFAULT_UNROLL_M 2
  1773. #define CGEMM_DEFAULT_UNROLL_N 2
  1774. #define ZGEMM_DEFAULT_UNROLL_M 2
  1775. #define ZGEMM_DEFAULT_UNROLL_N 2
  1776. #define SGEMM_DEFAULT_P 128
  1777. #define DGEMM_DEFAULT_P 128
  1778. #define CGEMM_DEFAULT_P 96
  1779. #define ZGEMM_DEFAULT_P 64
  1780. #define SGEMM_DEFAULT_Q 240
  1781. #define DGEMM_DEFAULT_Q 120
  1782. #define CGEMM_DEFAULT_Q 120
  1783. #define ZGEMM_DEFAULT_Q 120
  1784. #define SGEMM_DEFAULT_R 12288
  1785. #define DGEMM_DEFAULT_R 8192
  1786. #define CGEMM_DEFAULT_R 4096
  1787. #define ZGEMM_DEFAULT_R 4096
  1788. #define SYMV_P 16
  1789. #endif
  1790. #if defined(ARMV5)
  1791. #define SNUMOPT 2
  1792. #define DNUMOPT 2
  1793. #define GEMM_DEFAULT_OFFSET_A 0
  1794. #define GEMM_DEFAULT_OFFSET_B 0
  1795. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1796. #define SGEMM_DEFAULT_UNROLL_M 2
  1797. #define SGEMM_DEFAULT_UNROLL_N 2
  1798. #define DGEMM_DEFAULT_UNROLL_M 2
  1799. #define DGEMM_DEFAULT_UNROLL_N 2
  1800. #define CGEMM_DEFAULT_UNROLL_M 2
  1801. #define CGEMM_DEFAULT_UNROLL_N 2
  1802. #define ZGEMM_DEFAULT_UNROLL_M 2
  1803. #define ZGEMM_DEFAULT_UNROLL_N 2
  1804. #define SGEMM_DEFAULT_P 128
  1805. #define DGEMM_DEFAULT_P 128
  1806. #define CGEMM_DEFAULT_P 96
  1807. #define ZGEMM_DEFAULT_P 64
  1808. #define SGEMM_DEFAULT_Q 240
  1809. #define DGEMM_DEFAULT_Q 120
  1810. #define CGEMM_DEFAULT_Q 120
  1811. #define ZGEMM_DEFAULT_Q 120
  1812. #define SGEMM_DEFAULT_R 12288
  1813. #define DGEMM_DEFAULT_R 8192
  1814. #define CGEMM_DEFAULT_R 4096
  1815. #define ZGEMM_DEFAULT_R 4096
  1816. #define SYMV_P 16
  1817. #endif
  1818. #ifdef CORTEXA9
  1819. #define SNUMOPT 2
  1820. #define DNUMOPT 2
  1821. #define GEMM_DEFAULT_OFFSET_A 0
  1822. #define GEMM_DEFAULT_OFFSET_B 0
  1823. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1824. #define SGEMM_DEFAULT_UNROLL_M 4
  1825. #define SGEMM_DEFAULT_UNROLL_N 4
  1826. #define DGEMM_DEFAULT_UNROLL_M 4
  1827. #define DGEMM_DEFAULT_UNROLL_N 4
  1828. #define CGEMM_DEFAULT_UNROLL_M 2
  1829. #define CGEMM_DEFAULT_UNROLL_N 2
  1830. #define ZGEMM_DEFAULT_UNROLL_M 2
  1831. #define ZGEMM_DEFAULT_UNROLL_N 2
  1832. #define SGEMM_DEFAULT_P 128
  1833. #define DGEMM_DEFAULT_P 128
  1834. #define CGEMM_DEFAULT_P 96
  1835. #define ZGEMM_DEFAULT_P 64
  1836. #define SGEMM_DEFAULT_Q 240
  1837. #define DGEMM_DEFAULT_Q 120
  1838. #define CGEMM_DEFAULT_Q 120
  1839. #define ZGEMM_DEFAULT_Q 120
  1840. #define SGEMM_DEFAULT_R 12288
  1841. #define DGEMM_DEFAULT_R 8192
  1842. #define CGEMM_DEFAULT_R 4096
  1843. #define ZGEMM_DEFAULT_R 4096
  1844. #define SYMV_P 16
  1845. #endif
  1846. #ifdef CORTEXA15
  1847. #define SNUMOPT 2
  1848. #define DNUMOPT 2
  1849. #define GEMM_DEFAULT_OFFSET_A 0
  1850. #define GEMM_DEFAULT_OFFSET_B 0
  1851. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1852. #define SGEMM_DEFAULT_UNROLL_M 4
  1853. #define SGEMM_DEFAULT_UNROLL_N 4
  1854. #define DGEMM_DEFAULT_UNROLL_M 4
  1855. #define DGEMM_DEFAULT_UNROLL_N 4
  1856. #define CGEMM_DEFAULT_UNROLL_M 2
  1857. #define CGEMM_DEFAULT_UNROLL_N 2
  1858. #define ZGEMM_DEFAULT_UNROLL_M 2
  1859. #define ZGEMM_DEFAULT_UNROLL_N 2
  1860. #define SGEMM_DEFAULT_P 128
  1861. #define DGEMM_DEFAULT_P 128
  1862. #define CGEMM_DEFAULT_P 96
  1863. #define ZGEMM_DEFAULT_P 64
  1864. #define SGEMM_DEFAULT_Q 240
  1865. #define DGEMM_DEFAULT_Q 120
  1866. #define CGEMM_DEFAULT_Q 120
  1867. #define ZGEMM_DEFAULT_Q 120
  1868. #define SGEMM_DEFAULT_R 12288
  1869. #define DGEMM_DEFAULT_R 8192
  1870. #define CGEMM_DEFAULT_R 4096
  1871. #define ZGEMM_DEFAULT_R 4096
  1872. #define SYMV_P 16
  1873. #endif
  1874. #ifdef GENERIC
  1875. #define SNUMOPT 2
  1876. #define DNUMOPT 2
  1877. #define GEMM_DEFAULT_OFFSET_A 0
  1878. #define GEMM_DEFAULT_OFFSET_B 0
  1879. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1880. #define SGEMM_DEFAULT_UNROLL_N 2
  1881. #define DGEMM_DEFAULT_UNROLL_N 2
  1882. #define QGEMM_DEFAULT_UNROLL_N 2
  1883. #define CGEMM_DEFAULT_UNROLL_N 2
  1884. #define ZGEMM_DEFAULT_UNROLL_N 2
  1885. #define XGEMM_DEFAULT_UNROLL_N 1
  1886. #ifdef ARCH_X86
  1887. #define SGEMM_DEFAULT_UNROLL_M 2
  1888. #define DGEMM_DEFAULT_UNROLL_M 2
  1889. #define QGEMM_DEFAULT_UNROLL_M 2
  1890. #define CGEMM_DEFAULT_UNROLL_M 2
  1891. #define ZGEMM_DEFAULT_UNROLL_M 2
  1892. #define XGEMM_DEFAULT_UNROLL_M 1
  1893. #else
  1894. #define SGEMM_DEFAULT_UNROLL_M 2
  1895. #define DGEMM_DEFAULT_UNROLL_M 2
  1896. #define QGEMM_DEFAULT_UNROLL_M 2
  1897. #define CGEMM_DEFAULT_UNROLL_M 2
  1898. #define ZGEMM_DEFAULT_UNROLL_M 2
  1899. #define XGEMM_DEFAULT_UNROLL_M 1
  1900. #endif
  1901. #define SGEMM_DEFAULT_P sgemm_p
  1902. #define DGEMM_DEFAULT_P dgemm_p
  1903. #define QGEMM_DEFAULT_P qgemm_p
  1904. #define CGEMM_DEFAULT_P cgemm_p
  1905. #define ZGEMM_DEFAULT_P zgemm_p
  1906. #define XGEMM_DEFAULT_P xgemm_p
  1907. #define SGEMM_DEFAULT_R sgemm_r
  1908. #define DGEMM_DEFAULT_R dgemm_r
  1909. #define QGEMM_DEFAULT_R qgemm_r
  1910. #define CGEMM_DEFAULT_R cgemm_r
  1911. #define ZGEMM_DEFAULT_R zgemm_r
  1912. #define XGEMM_DEFAULT_R xgemm_r
  1913. #define SGEMM_DEFAULT_Q 128
  1914. #define DGEMM_DEFAULT_Q 128
  1915. #define QGEMM_DEFAULT_Q 128
  1916. #define CGEMM_DEFAULT_Q 128
  1917. #define ZGEMM_DEFAULT_Q 128
  1918. #define XGEMM_DEFAULT_Q 128
  1919. #define SYMV_P 16
  1920. #endif
  1921. #ifndef QGEMM_DEFAULT_UNROLL_M
  1922. #define QGEMM_DEFAULT_UNROLL_M 2
  1923. #endif
  1924. #ifndef QGEMM_DEFAULT_UNROLL_N
  1925. #define QGEMM_DEFAULT_UNROLL_N 2
  1926. #endif
  1927. #ifndef XGEMM_DEFAULT_UNROLL_M
  1928. #define XGEMM_DEFAULT_UNROLL_M 2
  1929. #endif
  1930. #ifndef XGEMM_DEFAULT_UNROLL_N
  1931. #define XGEMM_DEFAULT_UNROLL_N 2
  1932. #endif
  1933. #ifndef HAVE_SSE2
  1934. #define SHUFPD_0 shufps $0x44,
  1935. #define SHUFPD_1 shufps $0x4e,
  1936. #define SHUFPD_2 shufps $0xe4,
  1937. #define SHUFPD_3 shufps $0xee,
  1938. #endif
  1939. #ifndef SHUFPD_0
  1940. #define SHUFPD_0 shufpd $0,
  1941. #endif
  1942. #ifndef SHUFPD_1
  1943. #define SHUFPD_1 shufpd $1,
  1944. #endif
  1945. #ifndef SHUFPD_2
  1946. #define SHUFPD_2 shufpd $2,
  1947. #endif
  1948. #ifndef SHUFPD_3
  1949. #define SHUFPD_3 shufpd $3,
  1950. #endif
  1951. #ifndef SHUFPS_39
  1952. #define SHUFPS_39 shufps $0x39,
  1953. #endif
  1954. #endif