You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 59 kB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530
  1. /*****************************************************************************
  2. Copyright (c) 2011-2014, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written
  16. permission.
  17. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  23. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  25. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  26. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. **********************************************************************************/
  28. /*********************************************************************/
  29. /* Copyright 2009, 2010 The University of Texas at Austin. */
  30. /* All rights reserved. */
  31. /* */
  32. /* Redistribution and use in source and binary forms, with or */
  33. /* without modification, are permitted provided that the following */
  34. /* conditions are met: */
  35. /* */
  36. /* 1. Redistributions of source code must retain the above */
  37. /* copyright notice, this list of conditions and the following */
  38. /* disclaimer. */
  39. /* */
  40. /* 2. Redistributions in binary form must reproduce the above */
  41. /* copyright notice, this list of conditions and the following */
  42. /* disclaimer in the documentation and/or other materials */
  43. /* provided with the distribution. */
  44. /* */
  45. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  46. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  47. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  48. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  49. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  50. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  51. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  52. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  53. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  54. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  55. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  56. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  57. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  58. /* POSSIBILITY OF SUCH DAMAGE. */
  59. /* */
  60. /* The views and conclusions contained in the software and */
  61. /* documentation are those of the authors and should not be */
  62. /* interpreted as representing official policies, either expressed */
  63. /* or implied, of The University of Texas at Austin. */
  64. /*********************************************************************/
  65. #ifndef PARAM_H
  66. #define PARAM_H
  67. #ifdef OPTERON
  68. #define SNUMOPT 4
  69. #define DNUMOPT 2
  70. #define GEMM_DEFAULT_OFFSET_A 64
  71. #define GEMM_DEFAULT_OFFSET_B 256
  72. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  73. #define SGEMM_DEFAULT_UNROLL_N 4
  74. #define DGEMM_DEFAULT_UNROLL_N 4
  75. #define QGEMM_DEFAULT_UNROLL_N 2
  76. #define CGEMM_DEFAULT_UNROLL_N 2
  77. #define ZGEMM_DEFAULT_UNROLL_N 2
  78. #define XGEMM_DEFAULT_UNROLL_N 1
  79. #ifdef ARCH_X86
  80. #define SGEMM_DEFAULT_UNROLL_M 4
  81. #define DGEMM_DEFAULT_UNROLL_M 2
  82. #define QGEMM_DEFAULT_UNROLL_M 2
  83. #define CGEMM_DEFAULT_UNROLL_M 2
  84. #define ZGEMM_DEFAULT_UNROLL_M 1
  85. #define XGEMM_DEFAULT_UNROLL_M 1
  86. #else
  87. #define SGEMM_DEFAULT_UNROLL_M 8
  88. #define DGEMM_DEFAULT_UNROLL_M 4
  89. #define QGEMM_DEFAULT_UNROLL_M 2
  90. #define CGEMM_DEFAULT_UNROLL_M 4
  91. #define ZGEMM_DEFAULT_UNROLL_M 2
  92. #define XGEMM_DEFAULT_UNROLL_M 1
  93. #endif
  94. #define SGEMM_DEFAULT_P sgemm_p
  95. #define DGEMM_DEFAULT_P dgemm_p
  96. #define QGEMM_DEFAULT_P qgemm_p
  97. #define CGEMM_DEFAULT_P cgemm_p
  98. #define ZGEMM_DEFAULT_P zgemm_p
  99. #define XGEMM_DEFAULT_P xgemm_p
  100. #define SGEMM_DEFAULT_R sgemm_r
  101. #define DGEMM_DEFAULT_R dgemm_r
  102. #define QGEMM_DEFAULT_R qgemm_r
  103. #define CGEMM_DEFAULT_R cgemm_r
  104. #define ZGEMM_DEFAULT_R zgemm_r
  105. #define XGEMM_DEFAULT_R xgemm_r
  106. #ifdef ALLOC_HUGETLB
  107. #define SGEMM_DEFAULT_Q 248
  108. #define DGEMM_DEFAULT_Q 248
  109. #define QGEMM_DEFAULT_Q 248
  110. #define CGEMM_DEFAULT_Q 248
  111. #define ZGEMM_DEFAULT_Q 248
  112. #define XGEMM_DEFAULT_Q 248
  113. #else
  114. #define SGEMM_DEFAULT_Q 240
  115. #define DGEMM_DEFAULT_Q 240
  116. #define QGEMM_DEFAULT_Q 240
  117. #define CGEMM_DEFAULT_Q 240
  118. #define ZGEMM_DEFAULT_Q 240
  119. #define XGEMM_DEFAULT_Q 240
  120. #endif
  121. #define SYMV_P 16
  122. #define HAVE_EXCLUSIVE_CACHE
  123. #endif
  124. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  125. #define SNUMOPT 8
  126. #define DNUMOPT 4
  127. #define GEMM_DEFAULT_OFFSET_A 64
  128. #define GEMM_DEFAULT_OFFSET_B 832
  129. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  130. #define SGEMM_DEFAULT_UNROLL_N 4
  131. #define DGEMM_DEFAULT_UNROLL_N 4
  132. #define QGEMM_DEFAULT_UNROLL_N 2
  133. #define CGEMM_DEFAULT_UNROLL_N 2
  134. #define ZGEMM_DEFAULT_UNROLL_N 2
  135. #define XGEMM_DEFAULT_UNROLL_N 1
  136. #ifdef ARCH_X86
  137. #define SGEMM_DEFAULT_UNROLL_M 4
  138. #define DGEMM_DEFAULT_UNROLL_M 2
  139. #define QGEMM_DEFAULT_UNROLL_M 2
  140. #define CGEMM_DEFAULT_UNROLL_M 2
  141. #define ZGEMM_DEFAULT_UNROLL_M 1
  142. #define XGEMM_DEFAULT_UNROLL_M 1
  143. #else
  144. #define SGEMM_DEFAULT_UNROLL_M 8
  145. #define DGEMM_DEFAULT_UNROLL_M 4
  146. #define QGEMM_DEFAULT_UNROLL_M 2
  147. #define CGEMM_DEFAULT_UNROLL_M 4
  148. #define ZGEMM_DEFAULT_UNROLL_M 2
  149. #define XGEMM_DEFAULT_UNROLL_M 1
  150. #endif
  151. #if 0
  152. #define SGEMM_DEFAULT_P 496
  153. #define DGEMM_DEFAULT_P 248
  154. #define QGEMM_DEFAULT_P 124
  155. #define CGEMM_DEFAULT_P 248
  156. #define ZGEMM_DEFAULT_P 124
  157. #define XGEMM_DEFAULT_P 62
  158. #define SGEMM_DEFAULT_Q 248
  159. #define DGEMM_DEFAULT_Q 248
  160. #define QGEMM_DEFAULT_Q 248
  161. #define CGEMM_DEFAULT_Q 248
  162. #define ZGEMM_DEFAULT_Q 248
  163. #define XGEMM_DEFAULT_Q 248
  164. #else
  165. #define SGEMM_DEFAULT_P 448
  166. #define DGEMM_DEFAULT_P 224
  167. #define QGEMM_DEFAULT_P 112
  168. #define CGEMM_DEFAULT_P 224
  169. #define ZGEMM_DEFAULT_P 112
  170. #define XGEMM_DEFAULT_P 56
  171. #define SGEMM_DEFAULT_Q 224
  172. #define DGEMM_DEFAULT_Q 224
  173. #define QGEMM_DEFAULT_Q 224
  174. #define CGEMM_DEFAULT_Q 224
  175. #define ZGEMM_DEFAULT_Q 224
  176. #define XGEMM_DEFAULT_Q 224
  177. #endif
  178. #define SGEMM_DEFAULT_R sgemm_r
  179. #define QGEMM_DEFAULT_R qgemm_r
  180. #define DGEMM_DEFAULT_R dgemm_r
  181. #define CGEMM_DEFAULT_R cgemm_r
  182. #define ZGEMM_DEFAULT_R zgemm_r
  183. #define XGEMM_DEFAULT_R xgemm_r
  184. #define SYMV_P 16
  185. #define HAVE_EXCLUSIVE_CACHE
  186. #define GEMM_THREAD gemm_thread_mn
  187. #endif
  188. #ifdef BULLDOZER
  189. #define SNUMOPT 8
  190. #define DNUMOPT 4
  191. #define GEMM_DEFAULT_OFFSET_A 64
  192. #define GEMM_DEFAULT_OFFSET_B 832
  193. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  194. #define QGEMM_DEFAULT_UNROLL_N 2
  195. #define CGEMM_DEFAULT_UNROLL_N 2
  196. #define ZGEMM_DEFAULT_UNROLL_N 2
  197. #define XGEMM_DEFAULT_UNROLL_N 1
  198. #ifdef ARCH_X86
  199. #define SGEMM_DEFAULT_UNROLL_N 4
  200. #define DGEMM_DEFAULT_UNROLL_N 4
  201. #define SGEMM_DEFAULT_UNROLL_M 4
  202. #define DGEMM_DEFAULT_UNROLL_M 2
  203. #define QGEMM_DEFAULT_UNROLL_M 2
  204. #define CGEMM_DEFAULT_UNROLL_M 2
  205. #define ZGEMM_DEFAULT_UNROLL_M 1
  206. #define XGEMM_DEFAULT_UNROLL_M 1
  207. #else
  208. #define SGEMM_DEFAULT_UNROLL_N 2
  209. #define DGEMM_DEFAULT_UNROLL_N 2
  210. #define SGEMM_DEFAULT_UNROLL_M 16
  211. #define DGEMM_DEFAULT_UNROLL_M 8
  212. #define QGEMM_DEFAULT_UNROLL_M 2
  213. #define CGEMM_DEFAULT_UNROLL_M 4
  214. #define ZGEMM_DEFAULT_UNROLL_M 2
  215. #define XGEMM_DEFAULT_UNROLL_M 1
  216. #define CGEMM3M_DEFAULT_UNROLL_N 4
  217. #define CGEMM3M_DEFAULT_UNROLL_M 8
  218. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  219. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  220. #define DGEMM_DEFAULT_UNROLL_MN 16
  221. #define GEMV_UNROLL 8
  222. #endif
  223. #if defined(ARCH_X86_64)
  224. #define SGEMM_DEFAULT_P 768
  225. #define DGEMM_DEFAULT_P 384
  226. #else
  227. #define SGEMM_DEFAULT_P 448
  228. #define DGEMM_DEFAULT_P 224
  229. #endif
  230. #define QGEMM_DEFAULT_P 112
  231. #define CGEMM_DEFAULT_P 224
  232. #define ZGEMM_DEFAULT_P 112
  233. #define XGEMM_DEFAULT_P 56
  234. #if defined(ARCH_X86_64)
  235. #define SGEMM_DEFAULT_Q 168
  236. #define DGEMM_DEFAULT_Q 168
  237. #else
  238. #define SGEMM_DEFAULT_Q 224
  239. #define DGEMM_DEFAULT_Q 224
  240. #endif
  241. #define QGEMM_DEFAULT_Q 224
  242. #define CGEMM_DEFAULT_Q 224
  243. #define ZGEMM_DEFAULT_Q 224
  244. #define XGEMM_DEFAULT_Q 224
  245. #define CGEMM3M_DEFAULT_P 448
  246. #define ZGEMM3M_DEFAULT_P 224
  247. #define XGEMM3M_DEFAULT_P 112
  248. #define CGEMM3M_DEFAULT_Q 224
  249. #define ZGEMM3M_DEFAULT_Q 224
  250. #define XGEMM3M_DEFAULT_Q 224
  251. #define CGEMM3M_DEFAULT_R 12288
  252. #define ZGEMM3M_DEFAULT_R 12288
  253. #define XGEMM3M_DEFAULT_R 12288
  254. #define SGEMM_DEFAULT_R sgemm_r
  255. #define QGEMM_DEFAULT_R qgemm_r
  256. #define DGEMM_DEFAULT_R dgemm_r
  257. #define CGEMM_DEFAULT_R cgemm_r
  258. #define ZGEMM_DEFAULT_R zgemm_r
  259. #define XGEMM_DEFAULT_R xgemm_r
  260. #define SYMV_P 16
  261. #define HAVE_EXCLUSIVE_CACHE
  262. #define GEMM_THREAD gemm_thread_mn
  263. #endif
  264. #ifdef PILEDRIVER
  265. #define SNUMOPT 8
  266. #define DNUMOPT 4
  267. #define GEMM_DEFAULT_OFFSET_A 64
  268. #define GEMM_DEFAULT_OFFSET_B 832
  269. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  270. #define QGEMM_DEFAULT_UNROLL_N 2
  271. #define CGEMM_DEFAULT_UNROLL_N 2
  272. #define ZGEMM_DEFAULT_UNROLL_N 2
  273. #define XGEMM_DEFAULT_UNROLL_N 1
  274. #ifdef ARCH_X86
  275. #define SGEMM_DEFAULT_UNROLL_N 4
  276. #define DGEMM_DEFAULT_UNROLL_N 4
  277. #define SGEMM_DEFAULT_UNROLL_M 4
  278. #define DGEMM_DEFAULT_UNROLL_M 2
  279. #define QGEMM_DEFAULT_UNROLL_M 2
  280. #define CGEMM_DEFAULT_UNROLL_M 2
  281. #define ZGEMM_DEFAULT_UNROLL_M 1
  282. #define XGEMM_DEFAULT_UNROLL_M 1
  283. #else
  284. #define SGEMM_DEFAULT_UNROLL_N 2
  285. #define DGEMM_DEFAULT_UNROLL_N 2
  286. #define SGEMM_DEFAULT_UNROLL_M 16
  287. #define DGEMM_DEFAULT_UNROLL_M 8
  288. #define QGEMM_DEFAULT_UNROLL_M 2
  289. #define CGEMM_DEFAULT_UNROLL_M 4
  290. #define ZGEMM_DEFAULT_UNROLL_M 2
  291. #define XGEMM_DEFAULT_UNROLL_M 1
  292. #define CGEMM3M_DEFAULT_UNROLL_N 4
  293. #define CGEMM3M_DEFAULT_UNROLL_M 8
  294. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  295. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  296. #define GEMV_UNROLL 8
  297. #endif
  298. #if defined(ARCH_X86_64)
  299. #define SGEMM_DEFAULT_P 768
  300. #define DGEMM_DEFAULT_P 768
  301. #define ZGEMM_DEFAULT_P 384
  302. #define CGEMM_DEFAULT_P 768
  303. #else
  304. #define SGEMM_DEFAULT_P 448
  305. #define DGEMM_DEFAULT_P 480
  306. #define ZGEMM_DEFAULT_P 112
  307. #define CGEMM_DEFAULT_P 224
  308. #endif
  309. #define QGEMM_DEFAULT_P 112
  310. #define XGEMM_DEFAULT_P 56
  311. #if defined(ARCH_X86_64)
  312. #define SGEMM_DEFAULT_Q 192
  313. #define DGEMM_DEFAULT_Q 168
  314. #define ZGEMM_DEFAULT_Q 168
  315. #define CGEMM_DEFAULT_Q 168
  316. #else
  317. #define SGEMM_DEFAULT_Q 224
  318. #define DGEMM_DEFAULT_Q 224
  319. #define ZGEMM_DEFAULT_Q 224
  320. #define CGEMM_DEFAULT_Q 224
  321. #endif
  322. #define QGEMM_DEFAULT_Q 224
  323. #define XGEMM_DEFAULT_Q 224
  324. #define CGEMM3M_DEFAULT_P 448
  325. #define ZGEMM3M_DEFAULT_P 224
  326. #define XGEMM3M_DEFAULT_P 112
  327. #define CGEMM3M_DEFAULT_Q 224
  328. #define ZGEMM3M_DEFAULT_Q 224
  329. #define XGEMM3M_DEFAULT_Q 224
  330. #define CGEMM3M_DEFAULT_R 12288
  331. #define ZGEMM3M_DEFAULT_R 12288
  332. #define XGEMM3M_DEFAULT_R 12288
  333. #define SGEMM_DEFAULT_R 12288
  334. #define QGEMM_DEFAULT_R qgemm_r
  335. #define DGEMM_DEFAULT_R 12288
  336. #define CGEMM_DEFAULT_R cgemm_r
  337. #define ZGEMM_DEFAULT_R zgemm_r
  338. #define XGEMM_DEFAULT_R xgemm_r
  339. #define SYMV_P 16
  340. #define HAVE_EXCLUSIVE_CACHE
  341. #define GEMM_THREAD gemm_thread_mn
  342. #endif
  343. #ifdef STEAMROLLER
  344. #define SNUMOPT 8
  345. #define DNUMOPT 4
  346. #define GEMM_DEFAULT_OFFSET_A 64
  347. #define GEMM_DEFAULT_OFFSET_B 832
  348. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  349. #define QGEMM_DEFAULT_UNROLL_N 2
  350. #define CGEMM_DEFAULT_UNROLL_N 2
  351. #define ZGEMM_DEFAULT_UNROLL_N 2
  352. #define XGEMM_DEFAULT_UNROLL_N 1
  353. #ifdef ARCH_X86
  354. #define SGEMM_DEFAULT_UNROLL_N 4
  355. #define DGEMM_DEFAULT_UNROLL_N 4
  356. #define SGEMM_DEFAULT_UNROLL_M 4
  357. #define DGEMM_DEFAULT_UNROLL_M 2
  358. #define QGEMM_DEFAULT_UNROLL_M 2
  359. #define CGEMM_DEFAULT_UNROLL_M 2
  360. #define ZGEMM_DEFAULT_UNROLL_M 1
  361. #define XGEMM_DEFAULT_UNROLL_M 1
  362. #else
  363. #define SGEMM_DEFAULT_UNROLL_N 2
  364. #define DGEMM_DEFAULT_UNROLL_N 2
  365. #define SGEMM_DEFAULT_UNROLL_M 16
  366. #define DGEMM_DEFAULT_UNROLL_M 8
  367. #define QGEMM_DEFAULT_UNROLL_M 2
  368. #define CGEMM_DEFAULT_UNROLL_M 4
  369. #define ZGEMM_DEFAULT_UNROLL_M 2
  370. #define XGEMM_DEFAULT_UNROLL_M 1
  371. #define CGEMM3M_DEFAULT_UNROLL_N 4
  372. #define CGEMM3M_DEFAULT_UNROLL_M 8
  373. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  374. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  375. #define GEMV_UNROLL 8
  376. #endif
  377. #if defined(ARCH_X86_64)
  378. #define SGEMM_DEFAULT_P 768
  379. #define DGEMM_DEFAULT_P 576
  380. #define ZGEMM_DEFAULT_P 288
  381. #define CGEMM_DEFAULT_P 576
  382. #else
  383. #define SGEMM_DEFAULT_P 448
  384. #define DGEMM_DEFAULT_P 480
  385. #define ZGEMM_DEFAULT_P 112
  386. #define CGEMM_DEFAULT_P 224
  387. #endif
  388. #define QGEMM_DEFAULT_P 112
  389. #define XGEMM_DEFAULT_P 56
  390. #if defined(ARCH_X86_64)
  391. #define SGEMM_DEFAULT_Q 192
  392. #define DGEMM_DEFAULT_Q 160
  393. #define ZGEMM_DEFAULT_Q 160
  394. #define CGEMM_DEFAULT_Q 160
  395. #else
  396. #define SGEMM_DEFAULT_Q 224
  397. #define DGEMM_DEFAULT_Q 224
  398. #define ZGEMM_DEFAULT_Q 224
  399. #define CGEMM_DEFAULT_Q 224
  400. #endif
  401. #define QGEMM_DEFAULT_Q 224
  402. #define XGEMM_DEFAULT_Q 224
  403. #define CGEMM3M_DEFAULT_P 448
  404. #define ZGEMM3M_DEFAULT_P 224
  405. #define XGEMM3M_DEFAULT_P 112
  406. #define CGEMM3M_DEFAULT_Q 224
  407. #define ZGEMM3M_DEFAULT_Q 224
  408. #define XGEMM3M_DEFAULT_Q 224
  409. #define CGEMM3M_DEFAULT_R 12288
  410. #define ZGEMM3M_DEFAULT_R 12288
  411. #define XGEMM3M_DEFAULT_R 12288
  412. #define SGEMM_DEFAULT_R 12288
  413. #define QGEMM_DEFAULT_R qgemm_r
  414. #define DGEMM_DEFAULT_R 12288
  415. #define CGEMM_DEFAULT_R cgemm_r
  416. #define ZGEMM_DEFAULT_R zgemm_r
  417. #define XGEMM_DEFAULT_R xgemm_r
  418. #define SYMV_P 16
  419. #define HAVE_EXCLUSIVE_CACHE
  420. #define GEMM_THREAD gemm_thread_mn
  421. #endif
  422. #ifdef EXCAVATOR
  423. #define SNUMOPT 8
  424. #define DNUMOPT 4
  425. #define GEMM_DEFAULT_OFFSET_A 64
  426. #define GEMM_DEFAULT_OFFSET_B 832
  427. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  428. #define QGEMM_DEFAULT_UNROLL_N 2
  429. #define CGEMM_DEFAULT_UNROLL_N 2
  430. #define ZGEMM_DEFAULT_UNROLL_N 2
  431. #define XGEMM_DEFAULT_UNROLL_N 1
  432. #ifdef ARCH_X86
  433. #define SGEMM_DEFAULT_UNROLL_N 4
  434. #define DGEMM_DEFAULT_UNROLL_N 4
  435. #define SGEMM_DEFAULT_UNROLL_M 4
  436. #define DGEMM_DEFAULT_UNROLL_M 2
  437. #define QGEMM_DEFAULT_UNROLL_M 2
  438. #define CGEMM_DEFAULT_UNROLL_M 2
  439. #define ZGEMM_DEFAULT_UNROLL_M 1
  440. #define XGEMM_DEFAULT_UNROLL_M 1
  441. #else
  442. #define SGEMM_DEFAULT_UNROLL_N 2
  443. #define DGEMM_DEFAULT_UNROLL_N 2
  444. #define SGEMM_DEFAULT_UNROLL_M 16
  445. #define DGEMM_DEFAULT_UNROLL_M 8
  446. #define QGEMM_DEFAULT_UNROLL_M 2
  447. #define CGEMM_DEFAULT_UNROLL_M 4
  448. #define ZGEMM_DEFAULT_UNROLL_M 2
  449. #define XGEMM_DEFAULT_UNROLL_M 1
  450. #define CGEMM3M_DEFAULT_UNROLL_N 4
  451. #define CGEMM3M_DEFAULT_UNROLL_M 8
  452. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  453. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  454. #define GEMV_UNROLL 8
  455. #endif
  456. #if defined(ARCH_X86_64)
  457. #define SGEMM_DEFAULT_P 768
  458. #define DGEMM_DEFAULT_P 576
  459. #define ZGEMM_DEFAULT_P 288
  460. #define CGEMM_DEFAULT_P 576
  461. #else
  462. #define SGEMM_DEFAULT_P 448
  463. #define DGEMM_DEFAULT_P 480
  464. #define ZGEMM_DEFAULT_P 112
  465. #define CGEMM_DEFAULT_P 224
  466. #endif
  467. #define QGEMM_DEFAULT_P 112
  468. #define XGEMM_DEFAULT_P 56
  469. #if defined(ARCH_X86_64)
  470. #define SGEMM_DEFAULT_Q 192
  471. #define DGEMM_DEFAULT_Q 160
  472. #define ZGEMM_DEFAULT_Q 160
  473. #define CGEMM_DEFAULT_Q 160
  474. #else
  475. #define SGEMM_DEFAULT_Q 224
  476. #define DGEMM_DEFAULT_Q 224
  477. #define ZGEMM_DEFAULT_Q 224
  478. #define CGEMM_DEFAULT_Q 224
  479. #endif
  480. #define QGEMM_DEFAULT_Q 224
  481. #define XGEMM_DEFAULT_Q 224
  482. #define CGEMM3M_DEFAULT_P 448
  483. #define ZGEMM3M_DEFAULT_P 224
  484. #define XGEMM3M_DEFAULT_P 112
  485. #define CGEMM3M_DEFAULT_Q 224
  486. #define ZGEMM3M_DEFAULT_Q 224
  487. #define XGEMM3M_DEFAULT_Q 224
  488. #define CGEMM3M_DEFAULT_R 12288
  489. #define ZGEMM3M_DEFAULT_R 12288
  490. #define XGEMM3M_DEFAULT_R 12288
  491. #define SGEMM_DEFAULT_R 12288
  492. #define QGEMM_DEFAULT_R qgemm_r
  493. #define DGEMM_DEFAULT_R 12288
  494. #define CGEMM_DEFAULT_R cgemm_r
  495. #define ZGEMM_DEFAULT_R zgemm_r
  496. #define XGEMM_DEFAULT_R xgemm_r
  497. #define SYMV_P 16
  498. #define HAVE_EXCLUSIVE_CACHE
  499. #define GEMM_THREAD gemm_thread_mn
  500. #endif
  501. #ifdef ATHLON
  502. #define SNUMOPT 4
  503. #define DNUMOPT 2
  504. #define GEMM_DEFAULT_OFFSET_A 0
  505. #define GEMM_DEFAULT_OFFSET_B 384
  506. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  507. #define SGEMM_DEFAULT_UNROLL_N 4
  508. #define DGEMM_DEFAULT_UNROLL_N 4
  509. #define QGEMM_DEFAULT_UNROLL_N 2
  510. #define CGEMM_DEFAULT_UNROLL_N 2
  511. #define ZGEMM_DEFAULT_UNROLL_N 2
  512. #define XGEMM_DEFAULT_UNROLL_N 1
  513. #define SGEMM_DEFAULT_UNROLL_M 2
  514. #define DGEMM_DEFAULT_UNROLL_M 1
  515. #define QGEMM_DEFAULT_UNROLL_M 2
  516. #define CGEMM_DEFAULT_UNROLL_M 1
  517. #define ZGEMM_DEFAULT_UNROLL_M 1
  518. #define XGEMM_DEFAULT_UNROLL_M 1
  519. #define SGEMM_DEFAULT_R sgemm_r
  520. #define DGEMM_DEFAULT_R dgemm_r
  521. #define QGEMM_DEFAULT_R qgemm_r
  522. #define CGEMM_DEFAULT_R cgemm_r
  523. #define ZGEMM_DEFAULT_R zgemm_r
  524. #define XGEMM_DEFAULT_R xgemm_r
  525. #define SGEMM_DEFAULT_P 208
  526. #define DGEMM_DEFAULT_P 104
  527. #define QGEMM_DEFAULT_P 56
  528. #define CGEMM_DEFAULT_P 104
  529. #define ZGEMM_DEFAULT_P 56
  530. #define XGEMM_DEFAULT_P 28
  531. #define SGEMM_DEFAULT_Q 208
  532. #define DGEMM_DEFAULT_Q 208
  533. #define QGEMM_DEFAULT_Q 208
  534. #define CGEMM_DEFAULT_Q 208
  535. #define ZGEMM_DEFAULT_Q 208
  536. #define XGEMM_DEFAULT_Q 208
  537. #define SYMV_P 16
  538. #define HAVE_EXCLUSIVE_CACHE
  539. #endif
  540. #ifdef VIAC3
  541. #define SNUMOPT 2
  542. #define DNUMOPT 1
  543. #define GEMM_DEFAULT_OFFSET_A 0
  544. #define GEMM_DEFAULT_OFFSET_B 256
  545. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  546. #define SGEMM_DEFAULT_UNROLL_N 4
  547. #define DGEMM_DEFAULT_UNROLL_N 4
  548. #define QGEMM_DEFAULT_UNROLL_N 2
  549. #define CGEMM_DEFAULT_UNROLL_N 2
  550. #define ZGEMM_DEFAULT_UNROLL_N 2
  551. #define XGEMM_DEFAULT_UNROLL_N 1
  552. #define SGEMM_DEFAULT_UNROLL_M 2
  553. #define DGEMM_DEFAULT_UNROLL_M 1
  554. #define QGEMM_DEFAULT_UNROLL_M 2
  555. #define CGEMM_DEFAULT_UNROLL_M 1
  556. #define ZGEMM_DEFAULT_UNROLL_M 1
  557. #define XGEMM_DEFAULT_UNROLL_M 1
  558. #define SGEMM_DEFAULT_R sgemm_r
  559. #define DGEMM_DEFAULT_R dgemm_r
  560. #define QGEMM_DEFAULT_R qgemm_r
  561. #define CGEMM_DEFAULT_R cgemm_r
  562. #define ZGEMM_DEFAULT_R zgemm_r
  563. #define XGEMM_DEFAULT_R xgemm_r
  564. #define SGEMM_DEFAULT_P 128
  565. #define DGEMM_DEFAULT_P 128
  566. #define QGEMM_DEFAULT_P 128
  567. #define CGEMM_DEFAULT_P 128
  568. #define ZGEMM_DEFAULT_P 128
  569. #define XGEMM_DEFAULT_P 128
  570. #define SGEMM_DEFAULT_Q 512
  571. #define DGEMM_DEFAULT_Q 256
  572. #define QGEMM_DEFAULT_Q 256
  573. #define CGEMM_DEFAULT_Q 256
  574. #define ZGEMM_DEFAULT_Q 128
  575. #define XGEMM_DEFAULT_Q 128
  576. #define SYMV_P 16
  577. #endif
  578. #ifdef NANO
  579. #define SNUMOPT 4
  580. #define DNUMOPT 2
  581. #define GEMM_DEFAULT_OFFSET_A 64
  582. #define GEMM_DEFAULT_OFFSET_B 256
  583. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  584. #ifdef ARCH_X86
  585. #define SGEMM_DEFAULT_UNROLL_N 4
  586. #define DGEMM_DEFAULT_UNROLL_N 4
  587. #define QGEMM_DEFAULT_UNROLL_N 2
  588. #define CGEMM_DEFAULT_UNROLL_N 2
  589. #define ZGEMM_DEFAULT_UNROLL_N 2
  590. #define XGEMM_DEFAULT_UNROLL_N 1
  591. #define SGEMM_DEFAULT_UNROLL_M 4
  592. #define DGEMM_DEFAULT_UNROLL_M 2
  593. #define QGEMM_DEFAULT_UNROLL_M 2
  594. #define CGEMM_DEFAULT_UNROLL_M 2
  595. #define ZGEMM_DEFAULT_UNROLL_M 1
  596. #define XGEMM_DEFAULT_UNROLL_M 1
  597. #else
  598. #define SGEMM_DEFAULT_UNROLL_N 8
  599. #define DGEMM_DEFAULT_UNROLL_N 4
  600. #define QGEMM_DEFAULT_UNROLL_N 2
  601. #define CGEMM_DEFAULT_UNROLL_N 4
  602. #define ZGEMM_DEFAULT_UNROLL_N 2
  603. #define XGEMM_DEFAULT_UNROLL_N 1
  604. #define SGEMM_DEFAULT_UNROLL_M 4
  605. #define DGEMM_DEFAULT_UNROLL_M 4
  606. #define QGEMM_DEFAULT_UNROLL_M 2
  607. #define CGEMM_DEFAULT_UNROLL_M 2
  608. #define ZGEMM_DEFAULT_UNROLL_M 2
  609. #define XGEMM_DEFAULT_UNROLL_M 1
  610. #endif
  611. #define SGEMM_DEFAULT_P 288
  612. #define DGEMM_DEFAULT_P 288
  613. #define QGEMM_DEFAULT_P 288
  614. #define CGEMM_DEFAULT_P 288
  615. #define ZGEMM_DEFAULT_P 288
  616. #define XGEMM_DEFAULT_P 288
  617. #define SGEMM_DEFAULT_R sgemm_r
  618. #define DGEMM_DEFAULT_R dgemm_r
  619. #define QGEMM_DEFAULT_R qgemm_r
  620. #define CGEMM_DEFAULT_R cgemm_r
  621. #define ZGEMM_DEFAULT_R zgemm_r
  622. #define XGEMM_DEFAULT_R xgemm_r
  623. #define SGEMM_DEFAULT_Q 256
  624. #define DGEMM_DEFAULT_Q 128
  625. #define QGEMM_DEFAULT_Q 64
  626. #define CGEMM_DEFAULT_Q 128
  627. #define ZGEMM_DEFAULT_Q 64
  628. #define XGEMM_DEFAULT_Q 32
  629. #define SYMV_P 16
  630. #define HAVE_EXCLUSIVE_CACHE
  631. #endif
  632. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  633. #ifdef HAVE_SSE
  634. #define SNUMOPT 2
  635. #else
  636. #define SNUMOPT 1
  637. #endif
  638. #define DNUMOPT 1
  639. #define GEMM_DEFAULT_OFFSET_A 0
  640. #define GEMM_DEFAULT_OFFSET_B 0
  641. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  642. #ifdef HAVE_SSE
  643. #define SGEMM_DEFAULT_UNROLL_M 8
  644. #define CGEMM_DEFAULT_UNROLL_M 4
  645. #else
  646. #define SGEMM_DEFAULT_UNROLL_M 4
  647. #define CGEMM_DEFAULT_UNROLL_M 2
  648. #endif
  649. #define DGEMM_DEFAULT_UNROLL_M 2
  650. #define SGEMM_DEFAULT_UNROLL_N 2
  651. #define DGEMM_DEFAULT_UNROLL_N 2
  652. #define QGEMM_DEFAULT_UNROLL_M 2
  653. #define QGEMM_DEFAULT_UNROLL_N 2
  654. #define CGEMM_DEFAULT_UNROLL_N 1
  655. #define ZGEMM_DEFAULT_UNROLL_M 1
  656. #define ZGEMM_DEFAULT_UNROLL_N 1
  657. #define XGEMM_DEFAULT_UNROLL_M 1
  658. #define XGEMM_DEFAULT_UNROLL_N 1
  659. #define SGEMM_DEFAULT_P sgemm_p
  660. #define SGEMM_DEFAULT_Q 256
  661. #define SGEMM_DEFAULT_R sgemm_r
  662. #define DGEMM_DEFAULT_P dgemm_p
  663. #define DGEMM_DEFAULT_Q 256
  664. #define DGEMM_DEFAULT_R dgemm_r
  665. #define QGEMM_DEFAULT_P qgemm_p
  666. #define QGEMM_DEFAULT_Q 256
  667. #define QGEMM_DEFAULT_R qgemm_r
  668. #define CGEMM_DEFAULT_P cgemm_p
  669. #define CGEMM_DEFAULT_Q 256
  670. #define CGEMM_DEFAULT_R cgemm_r
  671. #define ZGEMM_DEFAULT_P zgemm_p
  672. #define ZGEMM_DEFAULT_Q 256
  673. #define ZGEMM_DEFAULT_R zgemm_r
  674. #define XGEMM_DEFAULT_P xgemm_p
  675. #define XGEMM_DEFAULT_Q 256
  676. #define XGEMM_DEFAULT_R xgemm_r
  677. #define SYMV_P 4
  678. #endif
  679. #ifdef PENTIUMM
  680. #define SNUMOPT 2
  681. #define DNUMOPT 1
  682. #define GEMM_DEFAULT_OFFSET_A 0
  683. #define GEMM_DEFAULT_OFFSET_B 0
  684. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  685. #ifdef CORE_YONAH
  686. #define SGEMM_DEFAULT_UNROLL_M 4
  687. #define SGEMM_DEFAULT_UNROLL_N 4
  688. #define DGEMM_DEFAULT_UNROLL_M 2
  689. #define DGEMM_DEFAULT_UNROLL_N 4
  690. #define QGEMM_DEFAULT_UNROLL_M 2
  691. #define QGEMM_DEFAULT_UNROLL_N 2
  692. #define CGEMM_DEFAULT_UNROLL_M 2
  693. #define CGEMM_DEFAULT_UNROLL_N 2
  694. #define ZGEMM_DEFAULT_UNROLL_M 1
  695. #define ZGEMM_DEFAULT_UNROLL_N 2
  696. #define XGEMM_DEFAULT_UNROLL_M 1
  697. #define XGEMM_DEFAULT_UNROLL_N 1
  698. #else
  699. #define SGEMM_DEFAULT_UNROLL_M 8
  700. #define SGEMM_DEFAULT_UNROLL_N 2
  701. #define DGEMM_DEFAULT_UNROLL_M 2
  702. #define DGEMM_DEFAULT_UNROLL_N 2
  703. #define QGEMM_DEFAULT_UNROLL_M 2
  704. #define QGEMM_DEFAULT_UNROLL_N 2
  705. #define CGEMM_DEFAULT_UNROLL_M 4
  706. #define CGEMM_DEFAULT_UNROLL_N 1
  707. #define ZGEMM_DEFAULT_UNROLL_M 1
  708. #define ZGEMM_DEFAULT_UNROLL_N 1
  709. #define XGEMM_DEFAULT_UNROLL_M 1
  710. #define XGEMM_DEFAULT_UNROLL_N 1
  711. #endif
  712. #define SGEMM_DEFAULT_P sgemm_p
  713. #define SGEMM_DEFAULT_Q 256
  714. #define SGEMM_DEFAULT_R sgemm_r
  715. #define DGEMM_DEFAULT_P dgemm_p
  716. #define DGEMM_DEFAULT_Q 256
  717. #define DGEMM_DEFAULT_R dgemm_r
  718. #define QGEMM_DEFAULT_P qgemm_p
  719. #define QGEMM_DEFAULT_Q 256
  720. #define QGEMM_DEFAULT_R qgemm_r
  721. #define CGEMM_DEFAULT_P cgemm_p
  722. #define CGEMM_DEFAULT_Q 256
  723. #define CGEMM_DEFAULT_R cgemm_r
  724. #define ZGEMM_DEFAULT_P zgemm_p
  725. #define ZGEMM_DEFAULT_Q 256
  726. #define ZGEMM_DEFAULT_R zgemm_r
  727. #define XGEMM_DEFAULT_P xgemm_p
  728. #define XGEMM_DEFAULT_Q 256
  729. #define XGEMM_DEFAULT_R xgemm_r
  730. #define SYMV_P 4
  731. #endif
  732. #ifdef CORE_NORTHWOOD
  733. #define SNUMOPT 4
  734. #define DNUMOPT 2
  735. #define GEMM_DEFAULT_OFFSET_A 0
  736. #define GEMM_DEFAULT_OFFSET_B 32
  737. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  738. #define SYMV_P 8
  739. #define SGEMM_DEFAULT_UNROLL_M 8
  740. #define DGEMM_DEFAULT_UNROLL_M 4
  741. #define QGEMM_DEFAULT_UNROLL_M 2
  742. #define CGEMM_DEFAULT_UNROLL_M 4
  743. #define ZGEMM_DEFAULT_UNROLL_M 2
  744. #define XGEMM_DEFAULT_UNROLL_M 1
  745. #define SGEMM_DEFAULT_UNROLL_N 2
  746. #define DGEMM_DEFAULT_UNROLL_N 2
  747. #define QGEMM_DEFAULT_UNROLL_N 2
  748. #define CGEMM_DEFAULT_UNROLL_N 1
  749. #define ZGEMM_DEFAULT_UNROLL_N 1
  750. #define XGEMM_DEFAULT_UNROLL_N 1
  751. #define SGEMM_DEFAULT_P sgemm_p
  752. #define SGEMM_DEFAULT_R sgemm_r
  753. #define DGEMM_DEFAULT_P dgemm_p
  754. #define DGEMM_DEFAULT_R dgemm_r
  755. #define QGEMM_DEFAULT_P qgemm_p
  756. #define QGEMM_DEFAULT_R qgemm_r
  757. #define CGEMM_DEFAULT_P cgemm_p
  758. #define CGEMM_DEFAULT_R cgemm_r
  759. #define ZGEMM_DEFAULT_P zgemm_p
  760. #define ZGEMM_DEFAULT_R zgemm_r
  761. #define XGEMM_DEFAULT_P xgemm_p
  762. #define XGEMM_DEFAULT_R xgemm_r
  763. #define SGEMM_DEFAULT_Q 128
  764. #define DGEMM_DEFAULT_Q 128
  765. #define QGEMM_DEFAULT_Q 128
  766. #define CGEMM_DEFAULT_Q 128
  767. #define ZGEMM_DEFAULT_Q 128
  768. #define XGEMM_DEFAULT_Q 128
  769. #endif
  770. #ifdef CORE_PRESCOTT
  771. #define SNUMOPT 4
  772. #define DNUMOPT 2
  773. #ifndef __64BIT__
  774. #define GEMM_DEFAULT_OFFSET_A 128
  775. #define GEMM_DEFAULT_OFFSET_B 192
  776. #else
  777. #define GEMM_DEFAULT_OFFSET_A 0
  778. #define GEMM_DEFAULT_OFFSET_B 256
  779. #endif
  780. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  781. #define SYMV_P 8
  782. #ifdef ARCH_X86
  783. #define SGEMM_DEFAULT_UNROLL_M 4
  784. #define DGEMM_DEFAULT_UNROLL_M 2
  785. #define QGEMM_DEFAULT_UNROLL_M 2
  786. #define CGEMM_DEFAULT_UNROLL_M 2
  787. #define ZGEMM_DEFAULT_UNROLL_M 1
  788. #define XGEMM_DEFAULT_UNROLL_M 1
  789. #else
  790. #define SGEMM_DEFAULT_UNROLL_M 8
  791. #define DGEMM_DEFAULT_UNROLL_M 4
  792. #define QGEMM_DEFAULT_UNROLL_M 2
  793. #define CGEMM_DEFAULT_UNROLL_M 4
  794. #define ZGEMM_DEFAULT_UNROLL_M 2
  795. #define XGEMM_DEFAULT_UNROLL_M 1
  796. #endif
  797. #define SGEMM_DEFAULT_UNROLL_N 4
  798. #define DGEMM_DEFAULT_UNROLL_N 4
  799. #define QGEMM_DEFAULT_UNROLL_N 2
  800. #define CGEMM_DEFAULT_UNROLL_N 2
  801. #define ZGEMM_DEFAULT_UNROLL_N 2
  802. #define XGEMM_DEFAULT_UNROLL_N 1
  803. #define SGEMM_DEFAULT_P sgemm_p
  804. #define SGEMM_DEFAULT_R sgemm_r
  805. #define DGEMM_DEFAULT_P dgemm_p
  806. #define DGEMM_DEFAULT_R dgemm_r
  807. #define QGEMM_DEFAULT_P qgemm_p
  808. #define QGEMM_DEFAULT_R qgemm_r
  809. #define CGEMM_DEFAULT_P cgemm_p
  810. #define CGEMM_DEFAULT_R cgemm_r
  811. #define ZGEMM_DEFAULT_P zgemm_p
  812. #define ZGEMM_DEFAULT_R zgemm_r
  813. #define XGEMM_DEFAULT_P xgemm_p
  814. #define XGEMM_DEFAULT_R xgemm_r
  815. #define SGEMM_DEFAULT_Q 128
  816. #define DGEMM_DEFAULT_Q 128
  817. #define QGEMM_DEFAULT_Q 128
  818. #define CGEMM_DEFAULT_Q 128
  819. #define ZGEMM_DEFAULT_Q 128
  820. #define XGEMM_DEFAULT_Q 128
  821. #endif
  822. #ifdef CORE2
  823. #define SNUMOPT 8
  824. #define DNUMOPT 4
  825. #define GEMM_DEFAULT_OFFSET_A 448
  826. #define GEMM_DEFAULT_OFFSET_B 128
  827. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  828. #define SYMV_P 8
  829. #define SWITCH_RATIO 4
  830. #ifdef ARCH_X86
  831. #define SGEMM_DEFAULT_UNROLL_M 8
  832. #define DGEMM_DEFAULT_UNROLL_M 4
  833. #define QGEMM_DEFAULT_UNROLL_M 2
  834. #define CGEMM_DEFAULT_UNROLL_M 4
  835. #define ZGEMM_DEFAULT_UNROLL_M 2
  836. #define XGEMM_DEFAULT_UNROLL_M 1
  837. #define SGEMM_DEFAULT_UNROLL_N 2
  838. #define DGEMM_DEFAULT_UNROLL_N 2
  839. #define QGEMM_DEFAULT_UNROLL_N 2
  840. #define CGEMM_DEFAULT_UNROLL_N 1
  841. #define ZGEMM_DEFAULT_UNROLL_N 1
  842. #define XGEMM_DEFAULT_UNROLL_N 1
  843. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  844. #else
  845. #define SGEMM_DEFAULT_UNROLL_M 8
  846. #define DGEMM_DEFAULT_UNROLL_M 4
  847. #define QGEMM_DEFAULT_UNROLL_M 2
  848. #define CGEMM_DEFAULT_UNROLL_M 4
  849. #define ZGEMM_DEFAULT_UNROLL_M 2
  850. #define XGEMM_DEFAULT_UNROLL_M 1
  851. #define SGEMM_DEFAULT_UNROLL_N 4
  852. #define DGEMM_DEFAULT_UNROLL_N 4
  853. #define QGEMM_DEFAULT_UNROLL_N 2
  854. #define CGEMM_DEFAULT_UNROLL_N 2
  855. #define ZGEMM_DEFAULT_UNROLL_N 2
  856. #define XGEMM_DEFAULT_UNROLL_N 1
  857. #endif
  858. #define SGEMM_DEFAULT_P sgemm_p
  859. #define SGEMM_DEFAULT_R sgemm_r
  860. #define DGEMM_DEFAULT_P dgemm_p
  861. #define DGEMM_DEFAULT_R dgemm_r
  862. #define QGEMM_DEFAULT_P qgemm_p
  863. #define QGEMM_DEFAULT_R qgemm_r
  864. #define CGEMM_DEFAULT_P cgemm_p
  865. #define CGEMM_DEFAULT_R cgemm_r
  866. #define ZGEMM_DEFAULT_P zgemm_p
  867. #define ZGEMM_DEFAULT_R zgemm_r
  868. #define XGEMM_DEFAULT_P xgemm_p
  869. #define XGEMM_DEFAULT_R xgemm_r
  870. #define SGEMM_DEFAULT_Q 256
  871. #define DGEMM_DEFAULT_Q 256
  872. #define QGEMM_DEFAULT_Q 256
  873. #define CGEMM_DEFAULT_Q 256
  874. #define ZGEMM_DEFAULT_Q 256
  875. #define XGEMM_DEFAULT_Q 256
  876. #endif
  877. #ifdef PENRYN
  878. #define SNUMOPT 8
  879. #define DNUMOPT 4
  880. #define GEMM_DEFAULT_OFFSET_A 128
  881. #define GEMM_DEFAULT_OFFSET_B 0
  882. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  883. #define SYMV_P 8
  884. #define SWITCH_RATIO 4
  885. #ifdef ARCH_X86
  886. #define SGEMM_DEFAULT_UNROLL_M 4
  887. #define DGEMM_DEFAULT_UNROLL_M 2
  888. #define QGEMM_DEFAULT_UNROLL_M 2
  889. #define CGEMM_DEFAULT_UNROLL_M 2
  890. #define ZGEMM_DEFAULT_UNROLL_M 1
  891. #define XGEMM_DEFAULT_UNROLL_M 1
  892. #define SGEMM_DEFAULT_UNROLL_N 4
  893. #define DGEMM_DEFAULT_UNROLL_N 4
  894. #define QGEMM_DEFAULT_UNROLL_N 2
  895. #define CGEMM_DEFAULT_UNROLL_N 2
  896. #define ZGEMM_DEFAULT_UNROLL_N 2
  897. #define XGEMM_DEFAULT_UNROLL_N 1
  898. #else
  899. #define SGEMM_DEFAULT_UNROLL_M 8
  900. #define DGEMM_DEFAULT_UNROLL_M 4
  901. #define QGEMM_DEFAULT_UNROLL_M 2
  902. #define CGEMM_DEFAULT_UNROLL_M 4
  903. #define ZGEMM_DEFAULT_UNROLL_M 2
  904. #define XGEMM_DEFAULT_UNROLL_M 1
  905. #define SGEMM_DEFAULT_UNROLL_N 4
  906. #define DGEMM_DEFAULT_UNROLL_N 4
  907. #define QGEMM_DEFAULT_UNROLL_N 2
  908. #define CGEMM_DEFAULT_UNROLL_N 2
  909. #define ZGEMM_DEFAULT_UNROLL_N 2
  910. #define XGEMM_DEFAULT_UNROLL_N 1
  911. #endif
  912. #define SGEMM_DEFAULT_P sgemm_p
  913. #define SGEMM_DEFAULT_R sgemm_r
  914. #define DGEMM_DEFAULT_P dgemm_p
  915. #define DGEMM_DEFAULT_R dgemm_r
  916. #define QGEMM_DEFAULT_P qgemm_p
  917. #define QGEMM_DEFAULT_R qgemm_r
  918. #define CGEMM_DEFAULT_P cgemm_p
  919. #define CGEMM_DEFAULT_R cgemm_r
  920. #define ZGEMM_DEFAULT_P zgemm_p
  921. #define ZGEMM_DEFAULT_R zgemm_r
  922. #define XGEMM_DEFAULT_P xgemm_p
  923. #define XGEMM_DEFAULT_R xgemm_r
  924. #define SGEMM_DEFAULT_Q 512
  925. #define DGEMM_DEFAULT_Q 256
  926. #define QGEMM_DEFAULT_Q 128
  927. #define CGEMM_DEFAULT_Q 512
  928. #define ZGEMM_DEFAULT_Q 256
  929. #define XGEMM_DEFAULT_Q 128
  930. #define GETRF_FACTOR 0.75
  931. #endif
  932. #ifdef DUNNINGTON
  933. #define SNUMOPT 8
  934. #define DNUMOPT 4
  935. #define GEMM_DEFAULT_OFFSET_A 128
  936. #define GEMM_DEFAULT_OFFSET_B 0
  937. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  938. #define SYMV_P 8
  939. #define SWITCH_RATIO 4
  940. #ifdef ARCH_X86
  941. #define SGEMM_DEFAULT_UNROLL_M 4
  942. #define DGEMM_DEFAULT_UNROLL_M 2
  943. #define QGEMM_DEFAULT_UNROLL_M 2
  944. #define CGEMM_DEFAULT_UNROLL_M 2
  945. #define ZGEMM_DEFAULT_UNROLL_M 1
  946. #define XGEMM_DEFAULT_UNROLL_M 1
  947. #define SGEMM_DEFAULT_UNROLL_N 4
  948. #define DGEMM_DEFAULT_UNROLL_N 4
  949. #define QGEMM_DEFAULT_UNROLL_N 2
  950. #define CGEMM_DEFAULT_UNROLL_N 2
  951. #define ZGEMM_DEFAULT_UNROLL_N 2
  952. #define XGEMM_DEFAULT_UNROLL_N 1
  953. #else
  954. #define SGEMM_DEFAULT_UNROLL_M 8
  955. #define DGEMM_DEFAULT_UNROLL_M 4
  956. #define QGEMM_DEFAULT_UNROLL_M 2
  957. #define CGEMM_DEFAULT_UNROLL_M 4
  958. #define ZGEMM_DEFAULT_UNROLL_M 2
  959. #define XGEMM_DEFAULT_UNROLL_M 1
  960. #define SGEMM_DEFAULT_UNROLL_N 4
  961. #define DGEMM_DEFAULT_UNROLL_N 4
  962. #define QGEMM_DEFAULT_UNROLL_N 2
  963. #define CGEMM_DEFAULT_UNROLL_N 2
  964. #define ZGEMM_DEFAULT_UNROLL_N 2
  965. #define XGEMM_DEFAULT_UNROLL_N 1
  966. #endif
  967. #define SGEMM_DEFAULT_P sgemm_p
  968. #define SGEMM_DEFAULT_R sgemm_r
  969. #define DGEMM_DEFAULT_P dgemm_p
  970. #define DGEMM_DEFAULT_R dgemm_r
  971. #define QGEMM_DEFAULT_P qgemm_p
  972. #define QGEMM_DEFAULT_R qgemm_r
  973. #define CGEMM_DEFAULT_P cgemm_p
  974. #define CGEMM_DEFAULT_R cgemm_r
  975. #define ZGEMM_DEFAULT_P zgemm_p
  976. #define ZGEMM_DEFAULT_R zgemm_r
  977. #define XGEMM_DEFAULT_P xgemm_p
  978. #define XGEMM_DEFAULT_R xgemm_r
  979. #define SGEMM_DEFAULT_Q 768
  980. #define DGEMM_DEFAULT_Q 384
  981. #define QGEMM_DEFAULT_Q 192
  982. #define CGEMM_DEFAULT_Q 768
  983. #define ZGEMM_DEFAULT_Q 384
  984. #define XGEMM_DEFAULT_Q 192
  985. #define GETRF_FACTOR 0.75
  986. #define GEMM_THREAD gemm_thread_mn
  987. #endif
  988. #ifdef NEHALEM
  989. #define SNUMOPT 8
  990. #define DNUMOPT 4
  991. #define GEMM_DEFAULT_OFFSET_A 32
  992. #define GEMM_DEFAULT_OFFSET_B 0
  993. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  994. #define SYMV_P 8
  995. #define SWITCH_RATIO 4
  996. #ifdef ARCH_X86
  997. #define SGEMM_DEFAULT_UNROLL_M 4
  998. #define DGEMM_DEFAULT_UNROLL_M 2
  999. #define QGEMM_DEFAULT_UNROLL_M 2
  1000. #define CGEMM_DEFAULT_UNROLL_M 2
  1001. #define ZGEMM_DEFAULT_UNROLL_M 1
  1002. #define XGEMM_DEFAULT_UNROLL_M 1
  1003. #define SGEMM_DEFAULT_UNROLL_N 4
  1004. #define DGEMM_DEFAULT_UNROLL_N 4
  1005. #define QGEMM_DEFAULT_UNROLL_N 2
  1006. #define CGEMM_DEFAULT_UNROLL_N 2
  1007. #define ZGEMM_DEFAULT_UNROLL_N 2
  1008. #define XGEMM_DEFAULT_UNROLL_N 1
  1009. #else
  1010. #define SGEMM_DEFAULT_UNROLL_M 4
  1011. #define DGEMM_DEFAULT_UNROLL_M 2
  1012. #define QGEMM_DEFAULT_UNROLL_M 2
  1013. #define CGEMM_DEFAULT_UNROLL_M 2
  1014. #define ZGEMM_DEFAULT_UNROLL_M 1
  1015. #define XGEMM_DEFAULT_UNROLL_M 1
  1016. #define SGEMM_DEFAULT_UNROLL_N 8
  1017. #define DGEMM_DEFAULT_UNROLL_N 8
  1018. #define QGEMM_DEFAULT_UNROLL_N 2
  1019. #define CGEMM_DEFAULT_UNROLL_N 4
  1020. #define ZGEMM_DEFAULT_UNROLL_N 4
  1021. #define XGEMM_DEFAULT_UNROLL_N 1
  1022. #endif
  1023. #define SGEMM_DEFAULT_P 504
  1024. #define SGEMM_DEFAULT_R sgemm_r
  1025. #define DGEMM_DEFAULT_P 504
  1026. #define DGEMM_DEFAULT_R dgemm_r
  1027. #define QGEMM_DEFAULT_P 504
  1028. #define QGEMM_DEFAULT_R qgemm_r
  1029. #define CGEMM_DEFAULT_P 252
  1030. #define CGEMM_DEFAULT_R cgemm_r
  1031. #define ZGEMM_DEFAULT_P 252
  1032. #define ZGEMM_DEFAULT_R zgemm_r
  1033. #define XGEMM_DEFAULT_P 252
  1034. #define XGEMM_DEFAULT_R xgemm_r
  1035. #define SGEMM_DEFAULT_Q 512
  1036. #define DGEMM_DEFAULT_Q 256
  1037. #define QGEMM_DEFAULT_Q 128
  1038. #define CGEMM_DEFAULT_Q 512
  1039. #define ZGEMM_DEFAULT_Q 256
  1040. #define XGEMM_DEFAULT_Q 128
  1041. #define GETRF_FACTOR 0.72
  1042. #endif
  1043. #ifdef SANDYBRIDGE
  1044. #define SNUMOPT 8
  1045. #define DNUMOPT 4
  1046. #define GEMM_DEFAULT_OFFSET_A 0
  1047. #define GEMM_DEFAULT_OFFSET_B 0
  1048. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1049. #define SYMV_P 8
  1050. #define SWITCH_RATIO 4
  1051. #ifdef ARCH_X86
  1052. #define SGEMM_DEFAULT_UNROLL_M 4
  1053. #define DGEMM_DEFAULT_UNROLL_M 2
  1054. #define QGEMM_DEFAULT_UNROLL_M 2
  1055. #define CGEMM_DEFAULT_UNROLL_M 2
  1056. #define ZGEMM_DEFAULT_UNROLL_M 1
  1057. #define XGEMM_DEFAULT_UNROLL_M 1
  1058. #define SGEMM_DEFAULT_UNROLL_N 4
  1059. #define DGEMM_DEFAULT_UNROLL_N 4
  1060. #define QGEMM_DEFAULT_UNROLL_N 2
  1061. #define CGEMM_DEFAULT_UNROLL_N 2
  1062. #define ZGEMM_DEFAULT_UNROLL_N 2
  1063. #define XGEMM_DEFAULT_UNROLL_N 1
  1064. #else
  1065. #define SGEMM_DEFAULT_UNROLL_M 16
  1066. #define DGEMM_DEFAULT_UNROLL_M 8
  1067. #define QGEMM_DEFAULT_UNROLL_M 2
  1068. #define CGEMM_DEFAULT_UNROLL_M 8
  1069. #define ZGEMM_DEFAULT_UNROLL_M 1
  1070. #define XGEMM_DEFAULT_UNROLL_M 1
  1071. #define SGEMM_DEFAULT_UNROLL_N 4
  1072. #define DGEMM_DEFAULT_UNROLL_N 4
  1073. #define QGEMM_DEFAULT_UNROLL_N 2
  1074. #define CGEMM_DEFAULT_UNROLL_N 2
  1075. #define ZGEMM_DEFAULT_UNROLL_N 4
  1076. #define XGEMM_DEFAULT_UNROLL_N 1
  1077. #endif
  1078. #define SGEMM_DEFAULT_P 768
  1079. #define SGEMM_DEFAULT_R sgemm_r
  1080. //#define SGEMM_DEFAULT_R 1024
  1081. #define DGEMM_DEFAULT_P 512
  1082. #define DGEMM_DEFAULT_R dgemm_r
  1083. //#define DGEMM_DEFAULT_R 1024
  1084. #define QGEMM_DEFAULT_P 504
  1085. #define QGEMM_DEFAULT_R qgemm_r
  1086. #define CGEMM_DEFAULT_P 768
  1087. #define CGEMM_DEFAULT_R cgemm_r
  1088. //#define CGEMM_DEFAULT_R 1024
  1089. #define ZGEMM_DEFAULT_P 512
  1090. #define ZGEMM_DEFAULT_R zgemm_r
  1091. //#define ZGEMM_DEFAULT_R 1024
  1092. #define XGEMM_DEFAULT_P 252
  1093. #define XGEMM_DEFAULT_R xgemm_r
  1094. #define SGEMM_DEFAULT_Q 384
  1095. #define DGEMM_DEFAULT_Q 256
  1096. #define QGEMM_DEFAULT_Q 128
  1097. #define CGEMM_DEFAULT_Q 512
  1098. #define ZGEMM_DEFAULT_Q 192
  1099. #define XGEMM_DEFAULT_Q 128
  1100. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1101. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1102. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1103. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1104. #define CGEMM3M_DEFAULT_P 448
  1105. #define ZGEMM3M_DEFAULT_P 224
  1106. #define XGEMM3M_DEFAULT_P 112
  1107. #define CGEMM3M_DEFAULT_Q 224
  1108. #define ZGEMM3M_DEFAULT_Q 224
  1109. #define XGEMM3M_DEFAULT_Q 224
  1110. #define CGEMM3M_DEFAULT_R 12288
  1111. #define ZGEMM3M_DEFAULT_R 12288
  1112. #define XGEMM3M_DEFAULT_R 12288
  1113. #define GETRF_FACTOR 0.72
  1114. #endif
  1115. #ifdef HASWELL
  1116. #define SNUMOPT 16
  1117. #define DNUMOPT 8
  1118. #define GEMM_DEFAULT_OFFSET_A 0
  1119. #define GEMM_DEFAULT_OFFSET_B 0
  1120. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1121. #define SYMV_P 8
  1122. #define SWITCH_RATIO 4
  1123. #ifdef ARCH_X86
  1124. #define SGEMM_DEFAULT_UNROLL_M 4
  1125. #define DGEMM_DEFAULT_UNROLL_M 2
  1126. #define QGEMM_DEFAULT_UNROLL_M 2
  1127. #define CGEMM_DEFAULT_UNROLL_M 2
  1128. #define ZGEMM_DEFAULT_UNROLL_M 1
  1129. #define XGEMM_DEFAULT_UNROLL_M 1
  1130. #define SGEMM_DEFAULT_UNROLL_N 4
  1131. #define DGEMM_DEFAULT_UNROLL_N 4
  1132. #define QGEMM_DEFAULT_UNROLL_N 2
  1133. #define CGEMM_DEFAULT_UNROLL_N 2
  1134. #define ZGEMM_DEFAULT_UNROLL_N 2
  1135. #define XGEMM_DEFAULT_UNROLL_N 1
  1136. #else
  1137. #define SGEMM_DEFAULT_UNROLL_M 16
  1138. #define DGEMM_DEFAULT_UNROLL_M 4
  1139. #define QGEMM_DEFAULT_UNROLL_M 2
  1140. #define CGEMM_DEFAULT_UNROLL_M 8
  1141. #define ZGEMM_DEFAULT_UNROLL_M 4
  1142. #define XGEMM_DEFAULT_UNROLL_M 1
  1143. #define SGEMM_DEFAULT_UNROLL_N 4
  1144. #define DGEMM_DEFAULT_UNROLL_N 8
  1145. #define QGEMM_DEFAULT_UNROLL_N 2
  1146. #define CGEMM_DEFAULT_UNROLL_N 2
  1147. #define ZGEMM_DEFAULT_UNROLL_N 2
  1148. #define XGEMM_DEFAULT_UNROLL_N 1
  1149. #define SGEMM_DEFAULT_UNROLL_MN 32
  1150. #define DGEMM_DEFAULT_UNROLL_MN 32
  1151. #endif
  1152. #ifdef ARCH_X86
  1153. #define SGEMM_DEFAULT_P 512
  1154. #define SGEMM_DEFAULT_R sgemm_r
  1155. #define DGEMM_DEFAULT_P 512
  1156. #define DGEMM_DEFAULT_R dgemm_r
  1157. #define QGEMM_DEFAULT_P 504
  1158. #define QGEMM_DEFAULT_R qgemm_r
  1159. #define CGEMM_DEFAULT_P 128
  1160. #define CGEMM_DEFAULT_R 1024
  1161. #define ZGEMM_DEFAULT_P 512
  1162. #define ZGEMM_DEFAULT_R zgemm_r
  1163. #define XGEMM_DEFAULT_P 252
  1164. #define XGEMM_DEFAULT_R xgemm_r
  1165. #define SGEMM_DEFAULT_Q 256
  1166. #define DGEMM_DEFAULT_Q 256
  1167. #define QGEMM_DEFAULT_Q 128
  1168. #define CGEMM_DEFAULT_Q 256
  1169. #define ZGEMM_DEFAULT_Q 192
  1170. #define XGEMM_DEFAULT_Q 128
  1171. #else
  1172. #define SGEMM_DEFAULT_P 768
  1173. #define DGEMM_DEFAULT_P 512
  1174. #define CGEMM_DEFAULT_P 384
  1175. #define ZGEMM_DEFAULT_P 256
  1176. #ifdef WINDOWS_ABI
  1177. #define SGEMM_DEFAULT_Q 320
  1178. #define DGEMM_DEFAULT_Q 128
  1179. #else
  1180. #define SGEMM_DEFAULT_Q 384
  1181. #define DGEMM_DEFAULT_Q 256
  1182. #endif
  1183. #define CGEMM_DEFAULT_Q 192
  1184. #define ZGEMM_DEFAULT_Q 128
  1185. #define SGEMM_DEFAULT_R sgemm_r
  1186. #define DGEMM_DEFAULT_R 13824
  1187. #define CGEMM_DEFAULT_R cgemm_r
  1188. #define ZGEMM_DEFAULT_R zgemm_r
  1189. #define QGEMM_DEFAULT_Q 128
  1190. #define QGEMM_DEFAULT_P 504
  1191. #define QGEMM_DEFAULT_R qgemm_r
  1192. #define XGEMM_DEFAULT_P 252
  1193. #define XGEMM_DEFAULT_R xgemm_r
  1194. #define XGEMM_DEFAULT_Q 128
  1195. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1196. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1197. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1198. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1199. #define CGEMM3M_DEFAULT_P 448
  1200. #define ZGEMM3M_DEFAULT_P 224
  1201. #define XGEMM3M_DEFAULT_P 112
  1202. #define CGEMM3M_DEFAULT_Q 224
  1203. #define ZGEMM3M_DEFAULT_Q 224
  1204. #define XGEMM3M_DEFAULT_Q 224
  1205. #define CGEMM3M_DEFAULT_R 12288
  1206. #define ZGEMM3M_DEFAULT_R 12288
  1207. #define XGEMM3M_DEFAULT_R 12288
  1208. #endif
  1209. #endif
  1210. #ifdef ATOM
  1211. #define SNUMOPT 2
  1212. #define DNUMOPT 1
  1213. #define GEMM_DEFAULT_OFFSET_A 64
  1214. #define GEMM_DEFAULT_OFFSET_B 0
  1215. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1216. #define SYMV_P 8
  1217. #ifdef ARCH_X86
  1218. #define SGEMM_DEFAULT_UNROLL_M 4
  1219. #define DGEMM_DEFAULT_UNROLL_M 2
  1220. #define QGEMM_DEFAULT_UNROLL_M 2
  1221. #define CGEMM_DEFAULT_UNROLL_M 2
  1222. #define ZGEMM_DEFAULT_UNROLL_M 1
  1223. #define XGEMM_DEFAULT_UNROLL_M 1
  1224. #else
  1225. #define SGEMM_DEFAULT_UNROLL_M 8
  1226. #define DGEMM_DEFAULT_UNROLL_M 4
  1227. #define QGEMM_DEFAULT_UNROLL_M 2
  1228. #define CGEMM_DEFAULT_UNROLL_M 4
  1229. #define ZGEMM_DEFAULT_UNROLL_M 2
  1230. #define XGEMM_DEFAULT_UNROLL_M 1
  1231. #endif
  1232. #define SGEMM_DEFAULT_UNROLL_N 4
  1233. #define DGEMM_DEFAULT_UNROLL_N 2
  1234. #define QGEMM_DEFAULT_UNROLL_N 2
  1235. #define CGEMM_DEFAULT_UNROLL_N 2
  1236. #define ZGEMM_DEFAULT_UNROLL_N 1
  1237. #define XGEMM_DEFAULT_UNROLL_N 1
  1238. #define SGEMM_DEFAULT_P sgemm_p
  1239. #define SGEMM_DEFAULT_R sgemm_r
  1240. #define DGEMM_DEFAULT_P dgemm_p
  1241. #define DGEMM_DEFAULT_R dgemm_r
  1242. #define QGEMM_DEFAULT_P qgemm_p
  1243. #define QGEMM_DEFAULT_R qgemm_r
  1244. #define CGEMM_DEFAULT_P cgemm_p
  1245. #define CGEMM_DEFAULT_R cgemm_r
  1246. #define ZGEMM_DEFAULT_P zgemm_p
  1247. #define ZGEMM_DEFAULT_R zgemm_r
  1248. #define XGEMM_DEFAULT_P xgemm_p
  1249. #define XGEMM_DEFAULT_R xgemm_r
  1250. #define SGEMM_DEFAULT_Q 256
  1251. #define DGEMM_DEFAULT_Q 256
  1252. #define QGEMM_DEFAULT_Q 256
  1253. #define CGEMM_DEFAULT_Q 256
  1254. #define ZGEMM_DEFAULT_Q 256
  1255. #define XGEMM_DEFAULT_Q 256
  1256. #endif
  1257. #ifdef ITANIUM2
  1258. #define SNUMOPT 4
  1259. #define DNUMOPT 4
  1260. #define GEMM_DEFAULT_OFFSET_A 0
  1261. #define GEMM_DEFAULT_OFFSET_B 128
  1262. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1263. #define SGEMM_DEFAULT_UNROLL_M 8
  1264. #define SGEMM_DEFAULT_UNROLL_N 8
  1265. #define DGEMM_DEFAULT_UNROLL_M 8
  1266. #define DGEMM_DEFAULT_UNROLL_N 8
  1267. #define QGEMM_DEFAULT_UNROLL_M 8
  1268. #define QGEMM_DEFAULT_UNROLL_N 8
  1269. #define CGEMM_DEFAULT_UNROLL_M 4
  1270. #define CGEMM_DEFAULT_UNROLL_N 4
  1271. #define ZGEMM_DEFAULT_UNROLL_M 4
  1272. #define ZGEMM_DEFAULT_UNROLL_N 4
  1273. #define XGEMM_DEFAULT_UNROLL_M 4
  1274. #define XGEMM_DEFAULT_UNROLL_N 4
  1275. #define SGEMM_DEFAULT_P sgemm_p
  1276. #define DGEMM_DEFAULT_P dgemm_p
  1277. #define QGEMM_DEFAULT_P qgemm_p
  1278. #define CGEMM_DEFAULT_P cgemm_p
  1279. #define ZGEMM_DEFAULT_P zgemm_p
  1280. #define XGEMM_DEFAULT_P xgemm_p
  1281. #define SGEMM_DEFAULT_Q 1024
  1282. #define DGEMM_DEFAULT_Q 1024
  1283. #define QGEMM_DEFAULT_Q 1024
  1284. #define CGEMM_DEFAULT_Q 1024
  1285. #define ZGEMM_DEFAULT_Q 1024
  1286. #define XGEMM_DEFAULT_Q 1024
  1287. #define SGEMM_DEFAULT_R sgemm_r
  1288. #define DGEMM_DEFAULT_R dgemm_r
  1289. #define QGEMM_DEFAULT_R qgemm_r
  1290. #define CGEMM_DEFAULT_R cgemm_r
  1291. #define ZGEMM_DEFAULT_R zgemm_r
  1292. #define XGEMM_DEFAULT_R xgemm_r
  1293. #define SYMV_P 16
  1294. #define GETRF_FACTOR 0.65
  1295. #endif
  1296. #if defined(EV4) || defined(EV5) || defined(EV6)
  1297. #ifdef EV4
  1298. #define SNUMOPT 1
  1299. #define DNUMOPT 1
  1300. #else
  1301. #define SNUMOPT 2
  1302. #define DNUMOPT 2
  1303. #endif
  1304. #define GEMM_DEFAULT_OFFSET_A 512
  1305. #define GEMM_DEFAULT_OFFSET_B 512
  1306. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1307. #define SGEMM_DEFAULT_UNROLL_M 4
  1308. #define SGEMM_DEFAULT_UNROLL_N 4
  1309. #define DGEMM_DEFAULT_UNROLL_M 4
  1310. #define DGEMM_DEFAULT_UNROLL_N 4
  1311. #define CGEMM_DEFAULT_UNROLL_M 2
  1312. #define CGEMM_DEFAULT_UNROLL_N 2
  1313. #define ZGEMM_DEFAULT_UNROLL_M 2
  1314. #define ZGEMM_DEFAULT_UNROLL_N 2
  1315. #define SYMV_P 8
  1316. #ifdef EV4
  1317. #define SGEMM_DEFAULT_P 32
  1318. #define SGEMM_DEFAULT_Q 112
  1319. #define SGEMM_DEFAULT_R 256
  1320. #define DGEMM_DEFAULT_P 32
  1321. #define DGEMM_DEFAULT_Q 56
  1322. #define DGEMM_DEFAULT_R 256
  1323. #define CGEMM_DEFAULT_P 32
  1324. #define CGEMM_DEFAULT_Q 64
  1325. #define CGEMM_DEFAULT_R 240
  1326. #define ZGEMM_DEFAULT_P 32
  1327. #define ZGEMM_DEFAULT_Q 32
  1328. #define ZGEMM_DEFAULT_R 240
  1329. #endif
  1330. #ifdef EV5
  1331. #define SGEMM_DEFAULT_P 64
  1332. #define SGEMM_DEFAULT_Q 256
  1333. #define DGEMM_DEFAULT_P 64
  1334. #define DGEMM_DEFAULT_Q 128
  1335. #define CGEMM_DEFAULT_P 64
  1336. #define CGEMM_DEFAULT_Q 128
  1337. #define ZGEMM_DEFAULT_P 64
  1338. #define ZGEMM_DEFAULT_Q 64
  1339. #endif
  1340. #ifdef EV6
  1341. #define SGEMM_DEFAULT_P 256
  1342. #define SGEMM_DEFAULT_Q 512
  1343. #define DGEMM_DEFAULT_P 256
  1344. #define DGEMM_DEFAULT_Q 256
  1345. #define CGEMM_DEFAULT_P 256
  1346. #define CGEMM_DEFAULT_Q 256
  1347. #define ZGEMM_DEFAULT_P 128
  1348. #define ZGEMM_DEFAULT_Q 256
  1349. #endif
  1350. #endif
  1351. #ifdef CELL
  1352. #define SNUMOPT 2
  1353. #define DNUMOPT 2
  1354. #define GEMM_DEFAULT_OFFSET_A 0
  1355. #define GEMM_DEFAULT_OFFSET_B 8192
  1356. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1357. #define SGEMM_DEFAULT_UNROLL_M 16
  1358. #define SGEMM_DEFAULT_UNROLL_N 4
  1359. #define DGEMM_DEFAULT_UNROLL_M 4
  1360. #define DGEMM_DEFAULT_UNROLL_N 4
  1361. #define CGEMM_DEFAULT_UNROLL_M 8
  1362. #define CGEMM_DEFAULT_UNROLL_N 2
  1363. #define ZGEMM_DEFAULT_UNROLL_M 2
  1364. #define ZGEMM_DEFAULT_UNROLL_N 2
  1365. #define SGEMM_DEFAULT_P 128
  1366. #define DGEMM_DEFAULT_P 128
  1367. #define CGEMM_DEFAULT_P 128
  1368. #define ZGEMM_DEFAULT_P 128
  1369. #define SGEMM_DEFAULT_Q 512
  1370. #define DGEMM_DEFAULT_Q 256
  1371. #define CGEMM_DEFAULT_Q 256
  1372. #define ZGEMM_DEFAULT_Q 128
  1373. #define SYMV_P 4
  1374. #endif
  1375. #ifdef PPCG4
  1376. #define GEMM_DEFAULT_OFFSET_A 0
  1377. #define GEMM_DEFAULT_OFFSET_B 1024
  1378. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1379. #define SGEMM_DEFAULT_UNROLL_M 16
  1380. #define SGEMM_DEFAULT_UNROLL_N 4
  1381. #define DGEMM_DEFAULT_UNROLL_M 4
  1382. #define DGEMM_DEFAULT_UNROLL_N 4
  1383. #define CGEMM_DEFAULT_UNROLL_M 8
  1384. #define CGEMM_DEFAULT_UNROLL_N 2
  1385. #define ZGEMM_DEFAULT_UNROLL_M 2
  1386. #define ZGEMM_DEFAULT_UNROLL_N 2
  1387. #define SGEMM_DEFAULT_P 256
  1388. #define DGEMM_DEFAULT_P 128
  1389. #define CGEMM_DEFAULT_P 128
  1390. #define ZGEMM_DEFAULT_P 64
  1391. #define SGEMM_DEFAULT_Q 256
  1392. #define DGEMM_DEFAULT_Q 256
  1393. #define CGEMM_DEFAULT_Q 256
  1394. #define ZGEMM_DEFAULT_Q 256
  1395. #define SYMV_P 4
  1396. #endif
  1397. #ifdef PPC970
  1398. #define SNUMOPT 4
  1399. #define DNUMOPT 4
  1400. #define GEMM_DEFAULT_OFFSET_A 2688
  1401. #define GEMM_DEFAULT_OFFSET_B 3072
  1402. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1403. #define SGEMM_DEFAULT_UNROLL_M 16
  1404. #define SGEMM_DEFAULT_UNROLL_N 4
  1405. #define DGEMM_DEFAULT_UNROLL_M 4
  1406. #define DGEMM_DEFAULT_UNROLL_N 4
  1407. #define CGEMM_DEFAULT_UNROLL_M 8
  1408. #define CGEMM_DEFAULT_UNROLL_N 2
  1409. #define ZGEMM_DEFAULT_UNROLL_M 2
  1410. #define ZGEMM_DEFAULT_UNROLL_N 2
  1411. #ifdef OS_LINUX
  1412. #if L2_SIZE == 1024976
  1413. #define SGEMM_DEFAULT_P 320
  1414. #define DGEMM_DEFAULT_P 256
  1415. #define CGEMM_DEFAULT_P 256
  1416. #define ZGEMM_DEFAULT_P 256
  1417. #else
  1418. #define SGEMM_DEFAULT_P 176
  1419. #define DGEMM_DEFAULT_P 176
  1420. #define CGEMM_DEFAULT_P 176
  1421. #define ZGEMM_DEFAULT_P 176
  1422. #endif
  1423. #endif
  1424. #define SGEMM_DEFAULT_Q 512
  1425. #define DGEMM_DEFAULT_Q 256
  1426. #define CGEMM_DEFAULT_Q 256
  1427. #define ZGEMM_DEFAULT_Q 128
  1428. #define SYMV_P 4
  1429. #endif
  1430. #ifdef PPC440
  1431. #define SNUMOPT 2
  1432. #define DNUMOPT 2
  1433. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1434. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1435. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1436. #define SGEMM_DEFAULT_UNROLL_M 4
  1437. #define SGEMM_DEFAULT_UNROLL_N 4
  1438. #define DGEMM_DEFAULT_UNROLL_M 4
  1439. #define DGEMM_DEFAULT_UNROLL_N 4
  1440. #define CGEMM_DEFAULT_UNROLL_M 2
  1441. #define CGEMM_DEFAULT_UNROLL_N 2
  1442. #define ZGEMM_DEFAULT_UNROLL_M 2
  1443. #define ZGEMM_DEFAULT_UNROLL_N 2
  1444. #define SGEMM_DEFAULT_P 512
  1445. #define DGEMM_DEFAULT_P 512
  1446. #define CGEMM_DEFAULT_P 512
  1447. #define ZGEMM_DEFAULT_P 512
  1448. #define SGEMM_DEFAULT_Q 1024
  1449. #define DGEMM_DEFAULT_Q 512
  1450. #define CGEMM_DEFAULT_Q 512
  1451. #define ZGEMM_DEFAULT_Q 256
  1452. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1453. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1454. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1455. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1456. #define SYMV_P 4
  1457. #endif
  1458. #ifdef PPC440FP2
  1459. #define SNUMOPT 4
  1460. #define DNUMOPT 4
  1461. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1462. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1463. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1464. #define SGEMM_DEFAULT_UNROLL_M 8
  1465. #define SGEMM_DEFAULT_UNROLL_N 4
  1466. #define DGEMM_DEFAULT_UNROLL_M 8
  1467. #define DGEMM_DEFAULT_UNROLL_N 4
  1468. #define CGEMM_DEFAULT_UNROLL_M 4
  1469. #define CGEMM_DEFAULT_UNROLL_N 2
  1470. #define ZGEMM_DEFAULT_UNROLL_M 4
  1471. #define ZGEMM_DEFAULT_UNROLL_N 2
  1472. #define SGEMM_DEFAULT_P 128
  1473. #define DGEMM_DEFAULT_P 128
  1474. #define CGEMM_DEFAULT_P 128
  1475. #define ZGEMM_DEFAULT_P 128
  1476. #if 1
  1477. #define SGEMM_DEFAULT_Q 4096
  1478. #define DGEMM_DEFAULT_Q 3072
  1479. #define CGEMM_DEFAULT_Q 2048
  1480. #define ZGEMM_DEFAULT_Q 1024
  1481. #else
  1482. #define SGEMM_DEFAULT_Q 512
  1483. #define DGEMM_DEFAULT_Q 256
  1484. #define CGEMM_DEFAULT_Q 256
  1485. #define ZGEMM_DEFAULT_Q 128
  1486. #endif
  1487. #define SYMV_P 4
  1488. #endif
  1489. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1490. #define GEMM_DEFAULT_OFFSET_A 0
  1491. #define GEMM_DEFAULT_OFFSET_B 2048
  1492. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1493. #define SGEMM_DEFAULT_UNROLL_M 4
  1494. #define SGEMM_DEFAULT_UNROLL_N 4
  1495. #define DGEMM_DEFAULT_UNROLL_M 4
  1496. #define DGEMM_DEFAULT_UNROLL_N 4
  1497. #define CGEMM_DEFAULT_UNROLL_M 2
  1498. #define CGEMM_DEFAULT_UNROLL_N 2
  1499. #define ZGEMM_DEFAULT_UNROLL_M 2
  1500. #define ZGEMM_DEFAULT_UNROLL_N 2
  1501. #ifdef POWER3
  1502. #define SNUMOPT 4
  1503. #define DNUMOPT 4
  1504. #define SGEMM_DEFAULT_P 256
  1505. #define SGEMM_DEFAULT_Q 432
  1506. #define SGEMM_DEFAULT_R 1012
  1507. #define DGEMM_DEFAULT_P 256
  1508. #define DGEMM_DEFAULT_Q 216
  1509. #define DGEMM_DEFAULT_R 1012
  1510. #define ZGEMM_DEFAULT_P 256
  1511. #define ZGEMM_DEFAULT_Q 104
  1512. #define ZGEMM_DEFAULT_R 1012
  1513. #endif
  1514. #if defined(POWER4)
  1515. #ifdef ALLOC_HUGETLB
  1516. #define SGEMM_DEFAULT_P 184
  1517. #define DGEMM_DEFAULT_P 184
  1518. #define CGEMM_DEFAULT_P 184
  1519. #define ZGEMM_DEFAULT_P 184
  1520. #else
  1521. #define SGEMM_DEFAULT_P 144
  1522. #define DGEMM_DEFAULT_P 144
  1523. #define CGEMM_DEFAULT_P 144
  1524. #define ZGEMM_DEFAULT_P 144
  1525. #endif
  1526. #endif
  1527. #if defined(POWER5)
  1528. #ifdef ALLOC_HUGETLB
  1529. #define SGEMM_DEFAULT_P 512
  1530. #define DGEMM_DEFAULT_P 256
  1531. #define CGEMM_DEFAULT_P 256
  1532. #define ZGEMM_DEFAULT_P 128
  1533. #else
  1534. #define SGEMM_DEFAULT_P 320
  1535. #define DGEMM_DEFAULT_P 160
  1536. #define CGEMM_DEFAULT_P 160
  1537. #define ZGEMM_DEFAULT_P 80
  1538. #endif
  1539. #define SGEMM_DEFAULT_Q 256
  1540. #define CGEMM_DEFAULT_Q 256
  1541. #define DGEMM_DEFAULT_Q 256
  1542. #define ZGEMM_DEFAULT_Q 256
  1543. #endif
  1544. #define SYMV_P 8
  1545. #endif
  1546. #if defined(POWER6)
  1547. #define SNUMOPT 4
  1548. #define DNUMOPT 4
  1549. #define GEMM_DEFAULT_OFFSET_A 384
  1550. #define GEMM_DEFAULT_OFFSET_B 1024
  1551. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1552. #define SGEMM_DEFAULT_UNROLL_M 4
  1553. #define SGEMM_DEFAULT_UNROLL_N 4
  1554. #define DGEMM_DEFAULT_UNROLL_M 4
  1555. #define DGEMM_DEFAULT_UNROLL_N 4
  1556. #define CGEMM_DEFAULT_UNROLL_M 2
  1557. #define CGEMM_DEFAULT_UNROLL_N 4
  1558. #define ZGEMM_DEFAULT_UNROLL_M 2
  1559. #define ZGEMM_DEFAULT_UNROLL_N 4
  1560. #define SGEMM_DEFAULT_P 992
  1561. #define DGEMM_DEFAULT_P 480
  1562. #define CGEMM_DEFAULT_P 488
  1563. #define ZGEMM_DEFAULT_P 248
  1564. #define SGEMM_DEFAULT_Q 504
  1565. #define DGEMM_DEFAULT_Q 504
  1566. #define CGEMM_DEFAULT_Q 400
  1567. #define ZGEMM_DEFAULT_Q 400
  1568. #define SYMV_P 8
  1569. #endif
  1570. #if defined(SPARC) && defined(V7)
  1571. #define SNUMOPT 4
  1572. #define DNUMOPT 4
  1573. #define GEMM_DEFAULT_OFFSET_A 0
  1574. #define GEMM_DEFAULT_OFFSET_B 2048
  1575. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1576. #define SGEMM_DEFAULT_UNROLL_M 2
  1577. #define SGEMM_DEFAULT_UNROLL_N 8
  1578. #define DGEMM_DEFAULT_UNROLL_M 2
  1579. #define DGEMM_DEFAULT_UNROLL_N 8
  1580. #define CGEMM_DEFAULT_UNROLL_M 1
  1581. #define CGEMM_DEFAULT_UNROLL_N 4
  1582. #define ZGEMM_DEFAULT_UNROLL_M 1
  1583. #define ZGEMM_DEFAULT_UNROLL_N 4
  1584. #define SGEMM_DEFAULT_P 256
  1585. #define DGEMM_DEFAULT_P 256
  1586. #define CGEMM_DEFAULT_P 256
  1587. #define ZGEMM_DEFAULT_P 256
  1588. #define SGEMM_DEFAULT_Q 512
  1589. #define DGEMM_DEFAULT_Q 256
  1590. #define CGEMM_DEFAULT_Q 256
  1591. #define ZGEMM_DEFAULT_Q 128
  1592. #define SYMV_P 8
  1593. #define GEMM_THREAD gemm_thread_mn
  1594. #endif
  1595. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1596. #define SNUMOPT 2
  1597. #define DNUMOPT 2
  1598. #define GEMM_DEFAULT_OFFSET_A 0
  1599. #define GEMM_DEFAULT_OFFSET_B 2048
  1600. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1601. #define SGEMM_DEFAULT_UNROLL_M 4
  1602. #define SGEMM_DEFAULT_UNROLL_N 4
  1603. #define DGEMM_DEFAULT_UNROLL_M 4
  1604. #define DGEMM_DEFAULT_UNROLL_N 4
  1605. #define CGEMM_DEFAULT_UNROLL_M 2
  1606. #define CGEMM_DEFAULT_UNROLL_N 2
  1607. #define ZGEMM_DEFAULT_UNROLL_M 2
  1608. #define ZGEMM_DEFAULT_UNROLL_N 2
  1609. #define SGEMM_DEFAULT_P 512
  1610. #define DGEMM_DEFAULT_P 512
  1611. #define CGEMM_DEFAULT_P 512
  1612. #define ZGEMM_DEFAULT_P 512
  1613. #define SGEMM_DEFAULT_Q 1024
  1614. #define DGEMM_DEFAULT_Q 512
  1615. #define CGEMM_DEFAULT_Q 512
  1616. #define ZGEMM_DEFAULT_Q 256
  1617. #define SYMV_P 8
  1618. #endif
  1619. #ifdef SICORTEX
  1620. #define SNUMOPT 2
  1621. #define DNUMOPT 2
  1622. #define GEMM_DEFAULT_OFFSET_A 0
  1623. #define GEMM_DEFAULT_OFFSET_B 0
  1624. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1625. #define SGEMM_DEFAULT_UNROLL_M 2
  1626. #define SGEMM_DEFAULT_UNROLL_N 8
  1627. #define DGEMM_DEFAULT_UNROLL_M 2
  1628. #define DGEMM_DEFAULT_UNROLL_N 8
  1629. #define CGEMM_DEFAULT_UNROLL_M 1
  1630. #define CGEMM_DEFAULT_UNROLL_N 4
  1631. #define ZGEMM_DEFAULT_UNROLL_M 1
  1632. #define ZGEMM_DEFAULT_UNROLL_N 4
  1633. #define SGEMM_DEFAULT_P 108
  1634. #define DGEMM_DEFAULT_P 112
  1635. #define CGEMM_DEFAULT_P 108
  1636. #define ZGEMM_DEFAULT_P 112
  1637. #define SGEMM_DEFAULT_Q 288
  1638. #define DGEMM_DEFAULT_Q 144
  1639. #define CGEMM_DEFAULT_Q 144
  1640. #define ZGEMM_DEFAULT_Q 72
  1641. #define SGEMM_DEFAULT_R 2000
  1642. #define DGEMM_DEFAULT_R 2000
  1643. #define CGEMM_DEFAULT_R 2000
  1644. #define ZGEMM_DEFAULT_R 2000
  1645. #define SYMV_P 16
  1646. #endif
  1647. #ifdef LOONGSON3A
  1648. ////Copy from SICORTEX
  1649. #define SNUMOPT 2
  1650. #define DNUMOPT 2
  1651. #define GEMM_DEFAULT_OFFSET_A 0
  1652. #define GEMM_DEFAULT_OFFSET_B 0
  1653. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1654. #define SGEMM_DEFAULT_UNROLL_M 8
  1655. #define SGEMM_DEFAULT_UNROLL_N 4
  1656. #define DGEMM_DEFAULT_UNROLL_M 4
  1657. #define DGEMM_DEFAULT_UNROLL_N 4
  1658. #define CGEMM_DEFAULT_UNROLL_M 4
  1659. #define CGEMM_DEFAULT_UNROLL_N 2
  1660. #define ZGEMM_DEFAULT_UNROLL_M 2
  1661. #define ZGEMM_DEFAULT_UNROLL_N 2
  1662. #define SGEMM_DEFAULT_P 64
  1663. #define DGEMM_DEFAULT_P 44
  1664. #define CGEMM_DEFAULT_P 64
  1665. #define ZGEMM_DEFAULT_P 32
  1666. #define SGEMM_DEFAULT_Q 192
  1667. #define DGEMM_DEFAULT_Q 92
  1668. #define CGEMM_DEFAULT_Q 128
  1669. #define ZGEMM_DEFAULT_Q 80
  1670. #define SGEMM_DEFAULT_R 640
  1671. #define DGEMM_DEFAULT_R dgemm_r
  1672. #define CGEMM_DEFAULT_R 640
  1673. #define ZGEMM_DEFAULT_R 640
  1674. #define GEMM_OFFSET_A1 0x10000
  1675. #define GEMM_OFFSET_B1 0x100000
  1676. #define SYMV_P 16
  1677. #endif
  1678. #ifdef LOONGSON3B
  1679. #define SNUMOPT 2
  1680. #define DNUMOPT 2
  1681. #define GEMM_DEFAULT_OFFSET_A 0
  1682. #define GEMM_DEFAULT_OFFSET_B 0
  1683. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1684. #define SGEMM_DEFAULT_UNROLL_M 2
  1685. #define SGEMM_DEFAULT_UNROLL_N 2
  1686. #define DGEMM_DEFAULT_UNROLL_M 2
  1687. #define DGEMM_DEFAULT_UNROLL_N 2
  1688. #define CGEMM_DEFAULT_UNROLL_M 2
  1689. #define CGEMM_DEFAULT_UNROLL_N 2
  1690. #define ZGEMM_DEFAULT_UNROLL_M 2
  1691. #define ZGEMM_DEFAULT_UNROLL_N 2
  1692. #define SGEMM_DEFAULT_P 64
  1693. #define DGEMM_DEFAULT_P 24
  1694. #define CGEMM_DEFAULT_P 24
  1695. #define ZGEMM_DEFAULT_P 20
  1696. #define SGEMM_DEFAULT_Q 192
  1697. #define DGEMM_DEFAULT_Q 128
  1698. #define CGEMM_DEFAULT_Q 128
  1699. #define ZGEMM_DEFAULT_Q 64
  1700. #define SGEMM_DEFAULT_R 512
  1701. #define DGEMM_DEFAULT_R 512
  1702. #define CGEMM_DEFAULT_R 512
  1703. #define ZGEMM_DEFAULT_R 512
  1704. #define GEMM_OFFSET_A1 0x10000
  1705. #define GEMM_OFFSET_B1 0x100000
  1706. #define SYMV_P 16
  1707. #endif
  1708. #ifdef ARMV7
  1709. #define SNUMOPT 2
  1710. #define DNUMOPT 2
  1711. #define GEMM_DEFAULT_OFFSET_A 0
  1712. #define GEMM_DEFAULT_OFFSET_B 0
  1713. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1714. #define SGEMM_DEFAULT_UNROLL_M 4
  1715. #define SGEMM_DEFAULT_UNROLL_N 4
  1716. #define DGEMM_DEFAULT_UNROLL_M 4
  1717. #define DGEMM_DEFAULT_UNROLL_N 4
  1718. #define CGEMM_DEFAULT_UNROLL_M 2
  1719. #define CGEMM_DEFAULT_UNROLL_N 2
  1720. #define ZGEMM_DEFAULT_UNROLL_M 2
  1721. #define ZGEMM_DEFAULT_UNROLL_N 2
  1722. #define SGEMM_DEFAULT_P 128
  1723. #define DGEMM_DEFAULT_P 128
  1724. #define CGEMM_DEFAULT_P 96
  1725. #define ZGEMM_DEFAULT_P 64
  1726. #define SGEMM_DEFAULT_Q 240
  1727. #define DGEMM_DEFAULT_Q 120
  1728. #define CGEMM_DEFAULT_Q 120
  1729. #define ZGEMM_DEFAULT_Q 120
  1730. #define SGEMM_DEFAULT_R 12288
  1731. #define DGEMM_DEFAULT_R 8192
  1732. #define CGEMM_DEFAULT_R 4096
  1733. #define ZGEMM_DEFAULT_R 4096
  1734. #define SYMV_P 16
  1735. #endif
  1736. #if defined(ARMV6)
  1737. #define SNUMOPT 2
  1738. #define DNUMOPT 2
  1739. #define GEMM_DEFAULT_OFFSET_A 0
  1740. #define GEMM_DEFAULT_OFFSET_B 0
  1741. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1742. #define SGEMM_DEFAULT_UNROLL_M 4
  1743. #define SGEMM_DEFAULT_UNROLL_N 2
  1744. #define DGEMM_DEFAULT_UNROLL_M 4
  1745. #define DGEMM_DEFAULT_UNROLL_N 2
  1746. #define CGEMM_DEFAULT_UNROLL_M 2
  1747. #define CGEMM_DEFAULT_UNROLL_N 2
  1748. #define ZGEMM_DEFAULT_UNROLL_M 2
  1749. #define ZGEMM_DEFAULT_UNROLL_N 2
  1750. #define SGEMM_DEFAULT_P 128
  1751. #define DGEMM_DEFAULT_P 128
  1752. #define CGEMM_DEFAULT_P 96
  1753. #define ZGEMM_DEFAULT_P 64
  1754. #define SGEMM_DEFAULT_Q 240
  1755. #define DGEMM_DEFAULT_Q 120
  1756. #define CGEMM_DEFAULT_Q 120
  1757. #define ZGEMM_DEFAULT_Q 120
  1758. #define SGEMM_DEFAULT_R 12288
  1759. #define DGEMM_DEFAULT_R 8192
  1760. #define CGEMM_DEFAULT_R 4096
  1761. #define ZGEMM_DEFAULT_R 4096
  1762. #define SYMV_P 16
  1763. #endif
  1764. #if defined(CORTEXA57)
  1765. #define SNUMOPT 2
  1766. #define DNUMOPT 2
  1767. #define GEMM_DEFAULT_OFFSET_A 0
  1768. #define GEMM_DEFAULT_OFFSET_B 0
  1769. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1770. #define SGEMM_DEFAULT_UNROLL_M 4
  1771. #define SGEMM_DEFAULT_UNROLL_N 4
  1772. #define DGEMM_DEFAULT_UNROLL_M 4
  1773. #define DGEMM_DEFAULT_UNROLL_N 4
  1774. #define CGEMM_DEFAULT_UNROLL_M 4
  1775. #define CGEMM_DEFAULT_UNROLL_N 4
  1776. #define ZGEMM_DEFAULT_UNROLL_M 4
  1777. #define ZGEMM_DEFAULT_UNROLL_N 4
  1778. #define SGEMM_DEFAULT_P 512
  1779. #define DGEMM_DEFAULT_P 256
  1780. #define CGEMM_DEFAULT_P 256
  1781. #define ZGEMM_DEFAULT_P 128
  1782. #define SGEMM_DEFAULT_Q 1024
  1783. #define DGEMM_DEFAULT_Q 512
  1784. #define CGEMM_DEFAULT_Q 512
  1785. #define ZGEMM_DEFAULT_Q 512
  1786. #define SGEMM_DEFAULT_R 4096
  1787. #define DGEMM_DEFAULT_R 4096
  1788. #define CGEMM_DEFAULT_R 4096
  1789. #define ZGEMM_DEFAULT_R 2048
  1790. #define SYMV_P 16
  1791. #endif
  1792. #if defined(ARMV8)
  1793. #define SNUMOPT 2
  1794. #define DNUMOPT 2
  1795. #define GEMM_DEFAULT_OFFSET_A 0
  1796. #define GEMM_DEFAULT_OFFSET_B 0
  1797. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1798. #define SGEMM_DEFAULT_UNROLL_M 4
  1799. #define SGEMM_DEFAULT_UNROLL_N 4
  1800. #define DGEMM_DEFAULT_UNROLL_M 2
  1801. #define DGEMM_DEFAULT_UNROLL_N 2
  1802. #define CGEMM_DEFAULT_UNROLL_M 2
  1803. #define CGEMM_DEFAULT_UNROLL_N 2
  1804. #define ZGEMM_DEFAULT_UNROLL_M 2
  1805. #define ZGEMM_DEFAULT_UNROLL_N 2
  1806. #define SGEMM_DEFAULT_P 128
  1807. #define DGEMM_DEFAULT_P 128
  1808. #define CGEMM_DEFAULT_P 96
  1809. #define ZGEMM_DEFAULT_P 64
  1810. #define SGEMM_DEFAULT_Q 240
  1811. #define DGEMM_DEFAULT_Q 120
  1812. #define CGEMM_DEFAULT_Q 120
  1813. #define ZGEMM_DEFAULT_Q 120
  1814. #define SGEMM_DEFAULT_R 12288
  1815. #define DGEMM_DEFAULT_R 8192
  1816. #define CGEMM_DEFAULT_R 4096
  1817. #define ZGEMM_DEFAULT_R 4096
  1818. #define SYMV_P 16
  1819. #endif
  1820. #if defined(ARMV5)
  1821. #define SNUMOPT 2
  1822. #define DNUMOPT 2
  1823. #define GEMM_DEFAULT_OFFSET_A 0
  1824. #define GEMM_DEFAULT_OFFSET_B 0
  1825. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1826. #define SGEMM_DEFAULT_UNROLL_M 2
  1827. #define SGEMM_DEFAULT_UNROLL_N 2
  1828. #define DGEMM_DEFAULT_UNROLL_M 2
  1829. #define DGEMM_DEFAULT_UNROLL_N 2
  1830. #define CGEMM_DEFAULT_UNROLL_M 2
  1831. #define CGEMM_DEFAULT_UNROLL_N 2
  1832. #define ZGEMM_DEFAULT_UNROLL_M 2
  1833. #define ZGEMM_DEFAULT_UNROLL_N 2
  1834. #define SGEMM_DEFAULT_P 128
  1835. #define DGEMM_DEFAULT_P 128
  1836. #define CGEMM_DEFAULT_P 96
  1837. #define ZGEMM_DEFAULT_P 64
  1838. #define SGEMM_DEFAULT_Q 240
  1839. #define DGEMM_DEFAULT_Q 120
  1840. #define CGEMM_DEFAULT_Q 120
  1841. #define ZGEMM_DEFAULT_Q 120
  1842. #define SGEMM_DEFAULT_R 12288
  1843. #define DGEMM_DEFAULT_R 8192
  1844. #define CGEMM_DEFAULT_R 4096
  1845. #define ZGEMM_DEFAULT_R 4096
  1846. #define SYMV_P 16
  1847. #endif
  1848. #ifdef CORTEXA9
  1849. #define SNUMOPT 2
  1850. #define DNUMOPT 2
  1851. #define GEMM_DEFAULT_OFFSET_A 0
  1852. #define GEMM_DEFAULT_OFFSET_B 0
  1853. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1854. #define SGEMM_DEFAULT_UNROLL_M 4
  1855. #define SGEMM_DEFAULT_UNROLL_N 4
  1856. #define DGEMM_DEFAULT_UNROLL_M 4
  1857. #define DGEMM_DEFAULT_UNROLL_N 4
  1858. #define CGEMM_DEFAULT_UNROLL_M 2
  1859. #define CGEMM_DEFAULT_UNROLL_N 2
  1860. #define ZGEMM_DEFAULT_UNROLL_M 2
  1861. #define ZGEMM_DEFAULT_UNROLL_N 2
  1862. #define SGEMM_DEFAULT_P 128
  1863. #define DGEMM_DEFAULT_P 128
  1864. #define CGEMM_DEFAULT_P 96
  1865. #define ZGEMM_DEFAULT_P 64
  1866. #define SGEMM_DEFAULT_Q 240
  1867. #define DGEMM_DEFAULT_Q 120
  1868. #define CGEMM_DEFAULT_Q 120
  1869. #define ZGEMM_DEFAULT_Q 120
  1870. #define SGEMM_DEFAULT_R 12288
  1871. #define DGEMM_DEFAULT_R 8192
  1872. #define CGEMM_DEFAULT_R 4096
  1873. #define ZGEMM_DEFAULT_R 4096
  1874. #define SYMV_P 16
  1875. #endif
  1876. #ifdef CORTEXA15
  1877. #define SNUMOPT 2
  1878. #define DNUMOPT 2
  1879. #define GEMM_DEFAULT_OFFSET_A 0
  1880. #define GEMM_DEFAULT_OFFSET_B 0
  1881. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1882. #define SGEMM_DEFAULT_UNROLL_M 4
  1883. #define SGEMM_DEFAULT_UNROLL_N 4
  1884. #define DGEMM_DEFAULT_UNROLL_M 4
  1885. #define DGEMM_DEFAULT_UNROLL_N 4
  1886. #define CGEMM_DEFAULT_UNROLL_M 2
  1887. #define CGEMM_DEFAULT_UNROLL_N 2
  1888. #define ZGEMM_DEFAULT_UNROLL_M 2
  1889. #define ZGEMM_DEFAULT_UNROLL_N 2
  1890. #define SGEMM_DEFAULT_P 128
  1891. #define DGEMM_DEFAULT_P 128
  1892. #define CGEMM_DEFAULT_P 96
  1893. #define ZGEMM_DEFAULT_P 64
  1894. #define SGEMM_DEFAULT_Q 240
  1895. #define DGEMM_DEFAULT_Q 120
  1896. #define CGEMM_DEFAULT_Q 120
  1897. #define ZGEMM_DEFAULT_Q 120
  1898. #define SGEMM_DEFAULT_R 12288
  1899. #define DGEMM_DEFAULT_R 8192
  1900. #define CGEMM_DEFAULT_R 4096
  1901. #define ZGEMM_DEFAULT_R 4096
  1902. #define SYMV_P 16
  1903. #endif
  1904. #ifdef GENERIC
  1905. #define SNUMOPT 2
  1906. #define DNUMOPT 2
  1907. #define GEMM_DEFAULT_OFFSET_A 0
  1908. #define GEMM_DEFAULT_OFFSET_B 0
  1909. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1910. #define SGEMM_DEFAULT_UNROLL_N 2
  1911. #define DGEMM_DEFAULT_UNROLL_N 2
  1912. #define QGEMM_DEFAULT_UNROLL_N 2
  1913. #define CGEMM_DEFAULT_UNROLL_N 2
  1914. #define ZGEMM_DEFAULT_UNROLL_N 2
  1915. #define XGEMM_DEFAULT_UNROLL_N 1
  1916. #ifdef ARCH_X86
  1917. #define SGEMM_DEFAULT_UNROLL_M 2
  1918. #define DGEMM_DEFAULT_UNROLL_M 2
  1919. #define QGEMM_DEFAULT_UNROLL_M 2
  1920. #define CGEMM_DEFAULT_UNROLL_M 2
  1921. #define ZGEMM_DEFAULT_UNROLL_M 2
  1922. #define XGEMM_DEFAULT_UNROLL_M 1
  1923. #else
  1924. #define SGEMM_DEFAULT_UNROLL_M 2
  1925. #define DGEMM_DEFAULT_UNROLL_M 2
  1926. #define QGEMM_DEFAULT_UNROLL_M 2
  1927. #define CGEMM_DEFAULT_UNROLL_M 2
  1928. #define ZGEMM_DEFAULT_UNROLL_M 2
  1929. #define XGEMM_DEFAULT_UNROLL_M 1
  1930. #endif
  1931. #define SGEMM_DEFAULT_P sgemm_p
  1932. #define DGEMM_DEFAULT_P dgemm_p
  1933. #define QGEMM_DEFAULT_P qgemm_p
  1934. #define CGEMM_DEFAULT_P cgemm_p
  1935. #define ZGEMM_DEFAULT_P zgemm_p
  1936. #define XGEMM_DEFAULT_P xgemm_p
  1937. #define SGEMM_DEFAULT_R sgemm_r
  1938. #define DGEMM_DEFAULT_R dgemm_r
  1939. #define QGEMM_DEFAULT_R qgemm_r
  1940. #define CGEMM_DEFAULT_R cgemm_r
  1941. #define ZGEMM_DEFAULT_R zgemm_r
  1942. #define XGEMM_DEFAULT_R xgemm_r
  1943. #define SGEMM_DEFAULT_Q 128
  1944. #define DGEMM_DEFAULT_Q 128
  1945. #define QGEMM_DEFAULT_Q 128
  1946. #define CGEMM_DEFAULT_Q 128
  1947. #define ZGEMM_DEFAULT_Q 128
  1948. #define XGEMM_DEFAULT_Q 128
  1949. #define SYMV_P 16
  1950. #endif
  1951. #ifndef QGEMM_DEFAULT_UNROLL_M
  1952. #define QGEMM_DEFAULT_UNROLL_M 2
  1953. #endif
  1954. #ifndef QGEMM_DEFAULT_UNROLL_N
  1955. #define QGEMM_DEFAULT_UNROLL_N 2
  1956. #endif
  1957. #ifndef XGEMM_DEFAULT_UNROLL_M
  1958. #define XGEMM_DEFAULT_UNROLL_M 2
  1959. #endif
  1960. #ifndef XGEMM_DEFAULT_UNROLL_N
  1961. #define XGEMM_DEFAULT_UNROLL_N 2
  1962. #endif
  1963. #ifndef HAVE_SSE2
  1964. #define SHUFPD_0 shufps $0x44,
  1965. #define SHUFPD_1 shufps $0x4e,
  1966. #define SHUFPD_2 shufps $0xe4,
  1967. #define SHUFPD_3 shufps $0xee,
  1968. #endif
  1969. #ifndef SHUFPD_0
  1970. #define SHUFPD_0 shufpd $0,
  1971. #endif
  1972. #ifndef SHUFPD_1
  1973. #define SHUFPD_1 shufpd $1,
  1974. #endif
  1975. #ifndef SHUFPD_2
  1976. #define SHUFPD_2 shufpd $2,
  1977. #endif
  1978. #ifndef SHUFPD_3
  1979. #define SHUFPD_3 shufpd $3,
  1980. #endif
  1981. #ifndef SHUFPS_39
  1982. #define SHUFPS_39 shufps $0x39,
  1983. #endif
  1984. #endif