You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 64 kB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769
  1. /*****************************************************************************
  2. Copyright (c) 2011-2014, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written
  16. permission.
  17. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  23. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  25. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  26. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. **********************************************************************************/
  28. /*********************************************************************/
  29. /* Copyright 2009, 2010 The University of Texas at Austin. */
  30. /* All rights reserved. */
  31. /* */
  32. /* Redistribution and use in source and binary forms, with or */
  33. /* without modification, are permitted provided that the following */
  34. /* conditions are met: */
  35. /* */
  36. /* 1. Redistributions of source code must retain the above */
  37. /* copyright notice, this list of conditions and the following */
  38. /* disclaimer. */
  39. /* */
  40. /* 2. Redistributions in binary form must reproduce the above */
  41. /* copyright notice, this list of conditions and the following */
  42. /* disclaimer in the documentation and/or other materials */
  43. /* provided with the distribution. */
  44. /* */
  45. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  46. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  47. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  48. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  49. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  50. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  51. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  52. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  53. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  54. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  55. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  56. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  57. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  58. /* POSSIBILITY OF SUCH DAMAGE. */
  59. /* */
  60. /* The views and conclusions contained in the software and */
  61. /* documentation are those of the authors and should not be */
  62. /* interpreted as representing official policies, either expressed */
  63. /* or implied, of The University of Texas at Austin. */
  64. /*********************************************************************/
  65. #ifndef PARAM_H
  66. #define PARAM_H
  67. #ifdef OPTERON
  68. #define SNUMOPT 4
  69. #define DNUMOPT 2
  70. #define GEMM_DEFAULT_OFFSET_A 64
  71. #define GEMM_DEFAULT_OFFSET_B 256
  72. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  73. #define SGEMM_DEFAULT_UNROLL_N 4
  74. #define DGEMM_DEFAULT_UNROLL_N 4
  75. #define QGEMM_DEFAULT_UNROLL_N 2
  76. #define CGEMM_DEFAULT_UNROLL_N 2
  77. #define ZGEMM_DEFAULT_UNROLL_N 2
  78. #define XGEMM_DEFAULT_UNROLL_N 1
  79. #ifdef ARCH_X86
  80. #define SGEMM_DEFAULT_UNROLL_M 4
  81. #define DGEMM_DEFAULT_UNROLL_M 2
  82. #define QGEMM_DEFAULT_UNROLL_M 2
  83. #define CGEMM_DEFAULT_UNROLL_M 2
  84. #define ZGEMM_DEFAULT_UNROLL_M 1
  85. #define XGEMM_DEFAULT_UNROLL_M 1
  86. #else
  87. #define SGEMM_DEFAULT_UNROLL_M 8
  88. #define DGEMM_DEFAULT_UNROLL_M 4
  89. #define QGEMM_DEFAULT_UNROLL_M 2
  90. #define CGEMM_DEFAULT_UNROLL_M 4
  91. #define ZGEMM_DEFAULT_UNROLL_M 2
  92. #define XGEMM_DEFAULT_UNROLL_M 1
  93. #endif
  94. #define SGEMM_DEFAULT_P sgemm_p
  95. #define DGEMM_DEFAULT_P dgemm_p
  96. #define QGEMM_DEFAULT_P qgemm_p
  97. #define CGEMM_DEFAULT_P cgemm_p
  98. #define ZGEMM_DEFAULT_P zgemm_p
  99. #define XGEMM_DEFAULT_P xgemm_p
  100. #define SGEMM_DEFAULT_R sgemm_r
  101. #define DGEMM_DEFAULT_R dgemm_r
  102. #define QGEMM_DEFAULT_R qgemm_r
  103. #define CGEMM_DEFAULT_R cgemm_r
  104. #define ZGEMM_DEFAULT_R zgemm_r
  105. #define XGEMM_DEFAULT_R xgemm_r
  106. #ifdef ALLOC_HUGETLB
  107. #define SGEMM_DEFAULT_Q 248
  108. #define DGEMM_DEFAULT_Q 248
  109. #define QGEMM_DEFAULT_Q 248
  110. #define CGEMM_DEFAULT_Q 248
  111. #define ZGEMM_DEFAULT_Q 248
  112. #define XGEMM_DEFAULT_Q 248
  113. #else
  114. #define SGEMM_DEFAULT_Q 240
  115. #define DGEMM_DEFAULT_Q 240
  116. #define QGEMM_DEFAULT_Q 240
  117. #define CGEMM_DEFAULT_Q 240
  118. #define ZGEMM_DEFAULT_Q 240
  119. #define XGEMM_DEFAULT_Q 240
  120. #endif
  121. #define SYMV_P 16
  122. #define HAVE_EXCLUSIVE_CACHE
  123. #endif
  124. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  125. #define SNUMOPT 8
  126. #define DNUMOPT 4
  127. #define GEMM_DEFAULT_OFFSET_A 64
  128. #define GEMM_DEFAULT_OFFSET_B 832
  129. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  130. #define SGEMM_DEFAULT_UNROLL_N 4
  131. #define DGEMM_DEFAULT_UNROLL_N 4
  132. #define QGEMM_DEFAULT_UNROLL_N 2
  133. #define CGEMM_DEFAULT_UNROLL_N 2
  134. #define ZGEMM_DEFAULT_UNROLL_N 2
  135. #define XGEMM_DEFAULT_UNROLL_N 1
  136. #ifdef ARCH_X86
  137. #define SGEMM_DEFAULT_UNROLL_M 4
  138. #define DGEMM_DEFAULT_UNROLL_M 2
  139. #define QGEMM_DEFAULT_UNROLL_M 2
  140. #define CGEMM_DEFAULT_UNROLL_M 2
  141. #define ZGEMM_DEFAULT_UNROLL_M 1
  142. #define XGEMM_DEFAULT_UNROLL_M 1
  143. #else
  144. #define SGEMM_DEFAULT_UNROLL_M 8
  145. #define DGEMM_DEFAULT_UNROLL_M 4
  146. #define QGEMM_DEFAULT_UNROLL_M 2
  147. #define CGEMM_DEFAULT_UNROLL_M 4
  148. #define ZGEMM_DEFAULT_UNROLL_M 2
  149. #define XGEMM_DEFAULT_UNROLL_M 1
  150. #endif
  151. #if 0
  152. #define SGEMM_DEFAULT_P 496
  153. #define DGEMM_DEFAULT_P 248
  154. #define QGEMM_DEFAULT_P 124
  155. #define CGEMM_DEFAULT_P 248
  156. #define ZGEMM_DEFAULT_P 124
  157. #define XGEMM_DEFAULT_P 62
  158. #define SGEMM_DEFAULT_Q 248
  159. #define DGEMM_DEFAULT_Q 248
  160. #define QGEMM_DEFAULT_Q 248
  161. #define CGEMM_DEFAULT_Q 248
  162. #define ZGEMM_DEFAULT_Q 248
  163. #define XGEMM_DEFAULT_Q 248
  164. #else
  165. #define SGEMM_DEFAULT_P 448
  166. #define DGEMM_DEFAULT_P 224
  167. #define QGEMM_DEFAULT_P 112
  168. #define CGEMM_DEFAULT_P 224
  169. #define ZGEMM_DEFAULT_P 112
  170. #define XGEMM_DEFAULT_P 56
  171. #define SGEMM_DEFAULT_Q 224
  172. #define DGEMM_DEFAULT_Q 224
  173. #define QGEMM_DEFAULT_Q 224
  174. #define CGEMM_DEFAULT_Q 224
  175. #define ZGEMM_DEFAULT_Q 224
  176. #define XGEMM_DEFAULT_Q 224
  177. #endif
  178. #define SGEMM_DEFAULT_R sgemm_r
  179. #define QGEMM_DEFAULT_R qgemm_r
  180. #define DGEMM_DEFAULT_R dgemm_r
  181. #define CGEMM_DEFAULT_R cgemm_r
  182. #define ZGEMM_DEFAULT_R zgemm_r
  183. #define XGEMM_DEFAULT_R xgemm_r
  184. #define SYMV_P 16
  185. #define HAVE_EXCLUSIVE_CACHE
  186. #define GEMM_THREAD gemm_thread_mn
  187. #endif
  188. #ifdef BULLDOZER
  189. #define SNUMOPT 8
  190. #define DNUMOPT 4
  191. #define GEMM_DEFAULT_OFFSET_A 64
  192. #define GEMM_DEFAULT_OFFSET_B 832
  193. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  194. #define QGEMM_DEFAULT_UNROLL_N 2
  195. #define CGEMM_DEFAULT_UNROLL_N 2
  196. #define ZGEMM_DEFAULT_UNROLL_N 2
  197. #define XGEMM_DEFAULT_UNROLL_N 1
  198. #ifdef ARCH_X86
  199. #define SGEMM_DEFAULT_UNROLL_N 4
  200. #define DGEMM_DEFAULT_UNROLL_N 4
  201. #define SGEMM_DEFAULT_UNROLL_M 4
  202. #define DGEMM_DEFAULT_UNROLL_M 2
  203. #define QGEMM_DEFAULT_UNROLL_M 2
  204. #define CGEMM_DEFAULT_UNROLL_M 2
  205. #define ZGEMM_DEFAULT_UNROLL_M 1
  206. #define XGEMM_DEFAULT_UNROLL_M 1
  207. #else
  208. #define SGEMM_DEFAULT_UNROLL_N 2
  209. #define DGEMM_DEFAULT_UNROLL_N 2
  210. #define SGEMM_DEFAULT_UNROLL_M 16
  211. #define DGEMM_DEFAULT_UNROLL_M 8
  212. #define QGEMM_DEFAULT_UNROLL_M 2
  213. #define CGEMM_DEFAULT_UNROLL_M 4
  214. #define ZGEMM_DEFAULT_UNROLL_M 2
  215. #define XGEMM_DEFAULT_UNROLL_M 1
  216. #define CGEMM3M_DEFAULT_UNROLL_N 4
  217. #define CGEMM3M_DEFAULT_UNROLL_M 8
  218. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  219. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  220. #define DGEMM_DEFAULT_UNROLL_MN 16
  221. #define GEMV_UNROLL 8
  222. #endif
  223. #if defined(ARCH_X86_64)
  224. #define SGEMM_DEFAULT_P 768
  225. #define DGEMM_DEFAULT_P 384
  226. #else
  227. #define SGEMM_DEFAULT_P 448
  228. #define DGEMM_DEFAULT_P 224
  229. #endif
  230. #define QGEMM_DEFAULT_P 112
  231. #define CGEMM_DEFAULT_P 224
  232. #define ZGEMM_DEFAULT_P 112
  233. #define XGEMM_DEFAULT_P 56
  234. #if defined(ARCH_X86_64)
  235. #define SGEMM_DEFAULT_Q 168
  236. #define DGEMM_DEFAULT_Q 168
  237. #else
  238. #define SGEMM_DEFAULT_Q 224
  239. #define DGEMM_DEFAULT_Q 224
  240. #endif
  241. #define QGEMM_DEFAULT_Q 224
  242. #define CGEMM_DEFAULT_Q 224
  243. #define ZGEMM_DEFAULT_Q 224
  244. #define XGEMM_DEFAULT_Q 224
  245. #define CGEMM3M_DEFAULT_P 448
  246. #define ZGEMM3M_DEFAULT_P 224
  247. #define XGEMM3M_DEFAULT_P 112
  248. #define CGEMM3M_DEFAULT_Q 224
  249. #define ZGEMM3M_DEFAULT_Q 224
  250. #define XGEMM3M_DEFAULT_Q 224
  251. #define CGEMM3M_DEFAULT_R 12288
  252. #define ZGEMM3M_DEFAULT_R 12288
  253. #define XGEMM3M_DEFAULT_R 12288
  254. #define SGEMM_DEFAULT_R sgemm_r
  255. #define QGEMM_DEFAULT_R qgemm_r
  256. #define DGEMM_DEFAULT_R dgemm_r
  257. #define CGEMM_DEFAULT_R cgemm_r
  258. #define ZGEMM_DEFAULT_R zgemm_r
  259. #define XGEMM_DEFAULT_R xgemm_r
  260. #define SYMV_P 16
  261. #define HAVE_EXCLUSIVE_CACHE
  262. #define GEMM_THREAD gemm_thread_mn
  263. #endif
  264. #ifdef PILEDRIVER
  265. #define SNUMOPT 8
  266. #define DNUMOPT 4
  267. #define GEMM_DEFAULT_OFFSET_A 64
  268. #define GEMM_DEFAULT_OFFSET_B 832
  269. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  270. #define QGEMM_DEFAULT_UNROLL_N 2
  271. #define CGEMM_DEFAULT_UNROLL_N 2
  272. #define ZGEMM_DEFAULT_UNROLL_N 2
  273. #define XGEMM_DEFAULT_UNROLL_N 1
  274. #ifdef ARCH_X86
  275. #define SGEMM_DEFAULT_UNROLL_N 4
  276. #define DGEMM_DEFAULT_UNROLL_N 4
  277. #define SGEMM_DEFAULT_UNROLL_M 4
  278. #define DGEMM_DEFAULT_UNROLL_M 2
  279. #define QGEMM_DEFAULT_UNROLL_M 2
  280. #define CGEMM_DEFAULT_UNROLL_M 2
  281. #define ZGEMM_DEFAULT_UNROLL_M 1
  282. #define XGEMM_DEFAULT_UNROLL_M 1
  283. #else
  284. #define SGEMM_DEFAULT_UNROLL_N 2
  285. #define DGEMM_DEFAULT_UNROLL_N 2
  286. #define SGEMM_DEFAULT_UNROLL_M 16
  287. #define DGEMM_DEFAULT_UNROLL_M 8
  288. #define QGEMM_DEFAULT_UNROLL_M 2
  289. #define CGEMM_DEFAULT_UNROLL_M 4
  290. #define ZGEMM_DEFAULT_UNROLL_M 2
  291. #define XGEMM_DEFAULT_UNROLL_M 1
  292. #define CGEMM3M_DEFAULT_UNROLL_N 4
  293. #define CGEMM3M_DEFAULT_UNROLL_M 8
  294. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  295. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  296. #define GEMV_UNROLL 8
  297. #endif
  298. #if defined(ARCH_X86_64)
  299. #define SGEMM_DEFAULT_P 768
  300. #define DGEMM_DEFAULT_P 768
  301. #define ZGEMM_DEFAULT_P 384
  302. #define CGEMM_DEFAULT_P 768
  303. #else
  304. #define SGEMM_DEFAULT_P 448
  305. #define DGEMM_DEFAULT_P 480
  306. #define ZGEMM_DEFAULT_P 112
  307. #define CGEMM_DEFAULT_P 224
  308. #endif
  309. #define QGEMM_DEFAULT_P 112
  310. #define XGEMM_DEFAULT_P 56
  311. #if defined(ARCH_X86_64)
  312. #define SGEMM_DEFAULT_Q 192
  313. #define DGEMM_DEFAULT_Q 168
  314. #define ZGEMM_DEFAULT_Q 168
  315. #define CGEMM_DEFAULT_Q 168
  316. #else
  317. #define SGEMM_DEFAULT_Q 224
  318. #define DGEMM_DEFAULT_Q 224
  319. #define ZGEMM_DEFAULT_Q 224
  320. #define CGEMM_DEFAULT_Q 224
  321. #endif
  322. #define QGEMM_DEFAULT_Q 224
  323. #define XGEMM_DEFAULT_Q 224
  324. #define CGEMM3M_DEFAULT_P 448
  325. #define ZGEMM3M_DEFAULT_P 224
  326. #define XGEMM3M_DEFAULT_P 112
  327. #define CGEMM3M_DEFAULT_Q 224
  328. #define ZGEMM3M_DEFAULT_Q 224
  329. #define XGEMM3M_DEFAULT_Q 224
  330. #define CGEMM3M_DEFAULT_R 12288
  331. #define ZGEMM3M_DEFAULT_R 12288
  332. #define XGEMM3M_DEFAULT_R 12288
  333. #define SGEMM_DEFAULT_R 12288
  334. #define QGEMM_DEFAULT_R qgemm_r
  335. #define DGEMM_DEFAULT_R 12288
  336. #define CGEMM_DEFAULT_R cgemm_r
  337. #define ZGEMM_DEFAULT_R zgemm_r
  338. #define XGEMM_DEFAULT_R xgemm_r
  339. #define SYMV_P 16
  340. #define HAVE_EXCLUSIVE_CACHE
  341. #define GEMM_THREAD gemm_thread_mn
  342. #endif
  343. #ifdef STEAMROLLER
  344. #define SNUMOPT 8
  345. #define DNUMOPT 4
  346. #define GEMM_DEFAULT_OFFSET_A 64
  347. #define GEMM_DEFAULT_OFFSET_B 832
  348. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  349. #define QGEMM_DEFAULT_UNROLL_N 2
  350. #define CGEMM_DEFAULT_UNROLL_N 2
  351. #define ZGEMM_DEFAULT_UNROLL_N 2
  352. #define XGEMM_DEFAULT_UNROLL_N 1
  353. #ifdef ARCH_X86
  354. #define SGEMM_DEFAULT_UNROLL_N 4
  355. #define DGEMM_DEFAULT_UNROLL_N 4
  356. #define SGEMM_DEFAULT_UNROLL_M 4
  357. #define DGEMM_DEFAULT_UNROLL_M 2
  358. #define QGEMM_DEFAULT_UNROLL_M 2
  359. #define CGEMM_DEFAULT_UNROLL_M 2
  360. #define ZGEMM_DEFAULT_UNROLL_M 1
  361. #define XGEMM_DEFAULT_UNROLL_M 1
  362. #else
  363. #define SGEMM_DEFAULT_UNROLL_N 2
  364. #define DGEMM_DEFAULT_UNROLL_N 2
  365. #define SGEMM_DEFAULT_UNROLL_M 16
  366. #define DGEMM_DEFAULT_UNROLL_M 8
  367. #define QGEMM_DEFAULT_UNROLL_M 2
  368. #define CGEMM_DEFAULT_UNROLL_M 4
  369. #define ZGEMM_DEFAULT_UNROLL_M 2
  370. #define XGEMM_DEFAULT_UNROLL_M 1
  371. #define CGEMM3M_DEFAULT_UNROLL_N 4
  372. #define CGEMM3M_DEFAULT_UNROLL_M 8
  373. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  374. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  375. #define GEMV_UNROLL 8
  376. #endif
  377. #if defined(ARCH_X86_64)
  378. #define SGEMM_DEFAULT_P 768
  379. #define DGEMM_DEFAULT_P 576
  380. #define ZGEMM_DEFAULT_P 288
  381. #define CGEMM_DEFAULT_P 576
  382. #else
  383. #define SGEMM_DEFAULT_P 448
  384. #define DGEMM_DEFAULT_P 480
  385. #define ZGEMM_DEFAULT_P 112
  386. #define CGEMM_DEFAULT_P 224
  387. #endif
  388. #define QGEMM_DEFAULT_P 112
  389. #define XGEMM_DEFAULT_P 56
  390. #if defined(ARCH_X86_64)
  391. #define SGEMM_DEFAULT_Q 192
  392. #define DGEMM_DEFAULT_Q 160
  393. #define ZGEMM_DEFAULT_Q 160
  394. #define CGEMM_DEFAULT_Q 160
  395. #else
  396. #define SGEMM_DEFAULT_Q 224
  397. #define DGEMM_DEFAULT_Q 224
  398. #define ZGEMM_DEFAULT_Q 224
  399. #define CGEMM_DEFAULT_Q 224
  400. #endif
  401. #define QGEMM_DEFAULT_Q 224
  402. #define XGEMM_DEFAULT_Q 224
  403. #define CGEMM3M_DEFAULT_P 448
  404. #define ZGEMM3M_DEFAULT_P 224
  405. #define XGEMM3M_DEFAULT_P 112
  406. #define CGEMM3M_DEFAULT_Q 224
  407. #define ZGEMM3M_DEFAULT_Q 224
  408. #define XGEMM3M_DEFAULT_Q 224
  409. #define CGEMM3M_DEFAULT_R 12288
  410. #define ZGEMM3M_DEFAULT_R 12288
  411. #define XGEMM3M_DEFAULT_R 12288
  412. #define SGEMM_DEFAULT_R 12288
  413. #define QGEMM_DEFAULT_R qgemm_r
  414. #define DGEMM_DEFAULT_R 12288
  415. #define CGEMM_DEFAULT_R cgemm_r
  416. #define ZGEMM_DEFAULT_R zgemm_r
  417. #define XGEMM_DEFAULT_R xgemm_r
  418. #define SYMV_P 16
  419. #define HAVE_EXCLUSIVE_CACHE
  420. #define GEMM_THREAD gemm_thread_mn
  421. #endif
  422. #ifdef EXCAVATOR
  423. #define SNUMOPT 8
  424. #define DNUMOPT 4
  425. #define GEMM_DEFAULT_OFFSET_A 64
  426. #define GEMM_DEFAULT_OFFSET_B 832
  427. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  428. #define QGEMM_DEFAULT_UNROLL_N 2
  429. #define CGEMM_DEFAULT_UNROLL_N 2
  430. #define ZGEMM_DEFAULT_UNROLL_N 2
  431. #define XGEMM_DEFAULT_UNROLL_N 1
  432. #ifdef ARCH_X86
  433. #define SGEMM_DEFAULT_UNROLL_N 4
  434. #define DGEMM_DEFAULT_UNROLL_N 4
  435. #define SGEMM_DEFAULT_UNROLL_M 4
  436. #define DGEMM_DEFAULT_UNROLL_M 2
  437. #define QGEMM_DEFAULT_UNROLL_M 2
  438. #define CGEMM_DEFAULT_UNROLL_M 2
  439. #define ZGEMM_DEFAULT_UNROLL_M 1
  440. #define XGEMM_DEFAULT_UNROLL_M 1
  441. #else
  442. #define SGEMM_DEFAULT_UNROLL_N 2
  443. #define DGEMM_DEFAULT_UNROLL_N 2
  444. #define SGEMM_DEFAULT_UNROLL_M 16
  445. #define DGEMM_DEFAULT_UNROLL_M 8
  446. #define QGEMM_DEFAULT_UNROLL_M 2
  447. #define CGEMM_DEFAULT_UNROLL_M 4
  448. #define ZGEMM_DEFAULT_UNROLL_M 2
  449. #define XGEMM_DEFAULT_UNROLL_M 1
  450. #define CGEMM3M_DEFAULT_UNROLL_N 4
  451. #define CGEMM3M_DEFAULT_UNROLL_M 8
  452. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  453. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  454. #define GEMV_UNROLL 8
  455. #endif
  456. #if defined(ARCH_X86_64)
  457. #define SGEMM_DEFAULT_P 768
  458. #define DGEMM_DEFAULT_P 576
  459. #define ZGEMM_DEFAULT_P 288
  460. #define CGEMM_DEFAULT_P 576
  461. #else
  462. #define SGEMM_DEFAULT_P 448
  463. #define DGEMM_DEFAULT_P 480
  464. #define ZGEMM_DEFAULT_P 112
  465. #define CGEMM_DEFAULT_P 224
  466. #endif
  467. #define QGEMM_DEFAULT_P 112
  468. #define XGEMM_DEFAULT_P 56
  469. #if defined(ARCH_X86_64)
  470. #define SGEMM_DEFAULT_Q 192
  471. #define DGEMM_DEFAULT_Q 160
  472. #define ZGEMM_DEFAULT_Q 160
  473. #define CGEMM_DEFAULT_Q 160
  474. #else
  475. #define SGEMM_DEFAULT_Q 224
  476. #define DGEMM_DEFAULT_Q 224
  477. #define ZGEMM_DEFAULT_Q 224
  478. #define CGEMM_DEFAULT_Q 224
  479. #endif
  480. #define QGEMM_DEFAULT_Q 224
  481. #define XGEMM_DEFAULT_Q 224
  482. #define CGEMM3M_DEFAULT_P 448
  483. #define ZGEMM3M_DEFAULT_P 224
  484. #define XGEMM3M_DEFAULT_P 112
  485. #define CGEMM3M_DEFAULT_Q 224
  486. #define ZGEMM3M_DEFAULT_Q 224
  487. #define XGEMM3M_DEFAULT_Q 224
  488. #define CGEMM3M_DEFAULT_R 12288
  489. #define ZGEMM3M_DEFAULT_R 12288
  490. #define XGEMM3M_DEFAULT_R 12288
  491. #define SGEMM_DEFAULT_R 12288
  492. #define QGEMM_DEFAULT_R qgemm_r
  493. #define DGEMM_DEFAULT_R 12288
  494. #define CGEMM_DEFAULT_R cgemm_r
  495. #define ZGEMM_DEFAULT_R zgemm_r
  496. #define XGEMM_DEFAULT_R xgemm_r
  497. #define SYMV_P 16
  498. #define HAVE_EXCLUSIVE_CACHE
  499. #define GEMM_THREAD gemm_thread_mn
  500. #endif
  501. #ifdef ATHLON
  502. #define SNUMOPT 4
  503. #define DNUMOPT 2
  504. #define GEMM_DEFAULT_OFFSET_A 0
  505. #define GEMM_DEFAULT_OFFSET_B 384
  506. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  507. #define SGEMM_DEFAULT_UNROLL_N 4
  508. #define DGEMM_DEFAULT_UNROLL_N 4
  509. #define QGEMM_DEFAULT_UNROLL_N 2
  510. #define CGEMM_DEFAULT_UNROLL_N 2
  511. #define ZGEMM_DEFAULT_UNROLL_N 2
  512. #define XGEMM_DEFAULT_UNROLL_N 1
  513. #define SGEMM_DEFAULT_UNROLL_M 2
  514. #define DGEMM_DEFAULT_UNROLL_M 1
  515. #define QGEMM_DEFAULT_UNROLL_M 2
  516. #define CGEMM_DEFAULT_UNROLL_M 1
  517. #define ZGEMM_DEFAULT_UNROLL_M 1
  518. #define XGEMM_DEFAULT_UNROLL_M 1
  519. #define SGEMM_DEFAULT_R sgemm_r
  520. #define DGEMM_DEFAULT_R dgemm_r
  521. #define QGEMM_DEFAULT_R qgemm_r
  522. #define CGEMM_DEFAULT_R cgemm_r
  523. #define ZGEMM_DEFAULT_R zgemm_r
  524. #define XGEMM_DEFAULT_R xgemm_r
  525. #define SGEMM_DEFAULT_P 208
  526. #define DGEMM_DEFAULT_P 104
  527. #define QGEMM_DEFAULT_P 56
  528. #define CGEMM_DEFAULT_P 104
  529. #define ZGEMM_DEFAULT_P 56
  530. #define XGEMM_DEFAULT_P 28
  531. #define SGEMM_DEFAULT_Q 208
  532. #define DGEMM_DEFAULT_Q 208
  533. #define QGEMM_DEFAULT_Q 208
  534. #define CGEMM_DEFAULT_Q 208
  535. #define ZGEMM_DEFAULT_Q 208
  536. #define XGEMM_DEFAULT_Q 208
  537. #define SYMV_P 16
  538. #define HAVE_EXCLUSIVE_CACHE
  539. #endif
  540. #ifdef VIAC3
  541. #define SNUMOPT 2
  542. #define DNUMOPT 1
  543. #define GEMM_DEFAULT_OFFSET_A 0
  544. #define GEMM_DEFAULT_OFFSET_B 256
  545. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  546. #define SGEMM_DEFAULT_UNROLL_N 4
  547. #define DGEMM_DEFAULT_UNROLL_N 4
  548. #define QGEMM_DEFAULT_UNROLL_N 2
  549. #define CGEMM_DEFAULT_UNROLL_N 2
  550. #define ZGEMM_DEFAULT_UNROLL_N 2
  551. #define XGEMM_DEFAULT_UNROLL_N 1
  552. #define SGEMM_DEFAULT_UNROLL_M 2
  553. #define DGEMM_DEFAULT_UNROLL_M 1
  554. #define QGEMM_DEFAULT_UNROLL_M 2
  555. #define CGEMM_DEFAULT_UNROLL_M 1
  556. #define ZGEMM_DEFAULT_UNROLL_M 1
  557. #define XGEMM_DEFAULT_UNROLL_M 1
  558. #define SGEMM_DEFAULT_R sgemm_r
  559. #define DGEMM_DEFAULT_R dgemm_r
  560. #define QGEMM_DEFAULT_R qgemm_r
  561. #define CGEMM_DEFAULT_R cgemm_r
  562. #define ZGEMM_DEFAULT_R zgemm_r
  563. #define XGEMM_DEFAULT_R xgemm_r
  564. #define SGEMM_DEFAULT_P 128
  565. #define DGEMM_DEFAULT_P 128
  566. #define QGEMM_DEFAULT_P 128
  567. #define CGEMM_DEFAULT_P 128
  568. #define ZGEMM_DEFAULT_P 128
  569. #define XGEMM_DEFAULT_P 128
  570. #define SGEMM_DEFAULT_Q 512
  571. #define DGEMM_DEFAULT_Q 256
  572. #define QGEMM_DEFAULT_Q 256
  573. #define CGEMM_DEFAULT_Q 256
  574. #define ZGEMM_DEFAULT_Q 128
  575. #define XGEMM_DEFAULT_Q 128
  576. #define SYMV_P 16
  577. #endif
  578. #ifdef NANO
  579. #define SNUMOPT 4
  580. #define DNUMOPT 2
  581. #define GEMM_DEFAULT_OFFSET_A 64
  582. #define GEMM_DEFAULT_OFFSET_B 256
  583. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  584. #ifdef ARCH_X86
  585. #define SGEMM_DEFAULT_UNROLL_N 4
  586. #define DGEMM_DEFAULT_UNROLL_N 4
  587. #define QGEMM_DEFAULT_UNROLL_N 2
  588. #define CGEMM_DEFAULT_UNROLL_N 2
  589. #define ZGEMM_DEFAULT_UNROLL_N 2
  590. #define XGEMM_DEFAULT_UNROLL_N 1
  591. #define SGEMM_DEFAULT_UNROLL_M 4
  592. #define DGEMM_DEFAULT_UNROLL_M 2
  593. #define QGEMM_DEFAULT_UNROLL_M 2
  594. #define CGEMM_DEFAULT_UNROLL_M 2
  595. #define ZGEMM_DEFAULT_UNROLL_M 1
  596. #define XGEMM_DEFAULT_UNROLL_M 1
  597. #else
  598. #define SGEMM_DEFAULT_UNROLL_N 8
  599. #define DGEMM_DEFAULT_UNROLL_N 4
  600. #define QGEMM_DEFAULT_UNROLL_N 2
  601. #define CGEMM_DEFAULT_UNROLL_N 4
  602. #define ZGEMM_DEFAULT_UNROLL_N 2
  603. #define XGEMM_DEFAULT_UNROLL_N 1
  604. #define SGEMM_DEFAULT_UNROLL_M 4
  605. #define DGEMM_DEFAULT_UNROLL_M 4
  606. #define QGEMM_DEFAULT_UNROLL_M 2
  607. #define CGEMM_DEFAULT_UNROLL_M 2
  608. #define ZGEMM_DEFAULT_UNROLL_M 2
  609. #define XGEMM_DEFAULT_UNROLL_M 1
  610. #endif
  611. #define SGEMM_DEFAULT_P 288
  612. #define DGEMM_DEFAULT_P 288
  613. #define QGEMM_DEFAULT_P 288
  614. #define CGEMM_DEFAULT_P 288
  615. #define ZGEMM_DEFAULT_P 288
  616. #define XGEMM_DEFAULT_P 288
  617. #define SGEMM_DEFAULT_R sgemm_r
  618. #define DGEMM_DEFAULT_R dgemm_r
  619. #define QGEMM_DEFAULT_R qgemm_r
  620. #define CGEMM_DEFAULT_R cgemm_r
  621. #define ZGEMM_DEFAULT_R zgemm_r
  622. #define XGEMM_DEFAULT_R xgemm_r
  623. #define SGEMM_DEFAULT_Q 256
  624. #define DGEMM_DEFAULT_Q 128
  625. #define QGEMM_DEFAULT_Q 64
  626. #define CGEMM_DEFAULT_Q 128
  627. #define ZGEMM_DEFAULT_Q 64
  628. #define XGEMM_DEFAULT_Q 32
  629. #define SYMV_P 16
  630. #define HAVE_EXCLUSIVE_CACHE
  631. #endif
  632. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  633. #ifdef HAVE_SSE
  634. #define SNUMOPT 2
  635. #else
  636. #define SNUMOPT 1
  637. #endif
  638. #define DNUMOPT 1
  639. #define GEMM_DEFAULT_OFFSET_A 0
  640. #define GEMM_DEFAULT_OFFSET_B 0
  641. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  642. #ifdef HAVE_SSE
  643. #define SGEMM_DEFAULT_UNROLL_M 8
  644. #define CGEMM_DEFAULT_UNROLL_M 4
  645. #else
  646. #define SGEMM_DEFAULT_UNROLL_M 4
  647. #define CGEMM_DEFAULT_UNROLL_M 2
  648. #endif
  649. #define DGEMM_DEFAULT_UNROLL_M 2
  650. #define SGEMM_DEFAULT_UNROLL_N 2
  651. #define DGEMM_DEFAULT_UNROLL_N 2
  652. #define QGEMM_DEFAULT_UNROLL_M 2
  653. #define QGEMM_DEFAULT_UNROLL_N 2
  654. #define CGEMM_DEFAULT_UNROLL_N 1
  655. #define ZGEMM_DEFAULT_UNROLL_M 1
  656. #define ZGEMM_DEFAULT_UNROLL_N 1
  657. #define XGEMM_DEFAULT_UNROLL_M 1
  658. #define XGEMM_DEFAULT_UNROLL_N 1
  659. #define SGEMM_DEFAULT_P sgemm_p
  660. #define SGEMM_DEFAULT_Q 256
  661. #define SGEMM_DEFAULT_R sgemm_r
  662. #define DGEMM_DEFAULT_P dgemm_p
  663. #define DGEMM_DEFAULT_Q 256
  664. #define DGEMM_DEFAULT_R dgemm_r
  665. #define QGEMM_DEFAULT_P qgemm_p
  666. #define QGEMM_DEFAULT_Q 256
  667. #define QGEMM_DEFAULT_R qgemm_r
  668. #define CGEMM_DEFAULT_P cgemm_p
  669. #define CGEMM_DEFAULT_Q 256
  670. #define CGEMM_DEFAULT_R cgemm_r
  671. #define ZGEMM_DEFAULT_P zgemm_p
  672. #define ZGEMM_DEFAULT_Q 256
  673. #define ZGEMM_DEFAULT_R zgemm_r
  674. #define XGEMM_DEFAULT_P xgemm_p
  675. #define XGEMM_DEFAULT_Q 256
  676. #define XGEMM_DEFAULT_R xgemm_r
  677. #define SYMV_P 4
  678. #endif
  679. #ifdef PENTIUMM
  680. #define SNUMOPT 2
  681. #define DNUMOPT 1
  682. #define GEMM_DEFAULT_OFFSET_A 0
  683. #define GEMM_DEFAULT_OFFSET_B 0
  684. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  685. #ifdef CORE_YONAH
  686. #define SGEMM_DEFAULT_UNROLL_M 4
  687. #define SGEMM_DEFAULT_UNROLL_N 4
  688. #define DGEMM_DEFAULT_UNROLL_M 2
  689. #define DGEMM_DEFAULT_UNROLL_N 4
  690. #define QGEMM_DEFAULT_UNROLL_M 2
  691. #define QGEMM_DEFAULT_UNROLL_N 2
  692. #define CGEMM_DEFAULT_UNROLL_M 2
  693. #define CGEMM_DEFAULT_UNROLL_N 2
  694. #define ZGEMM_DEFAULT_UNROLL_M 1
  695. #define ZGEMM_DEFAULT_UNROLL_N 2
  696. #define XGEMM_DEFAULT_UNROLL_M 1
  697. #define XGEMM_DEFAULT_UNROLL_N 1
  698. #else
  699. #define SGEMM_DEFAULT_UNROLL_M 8
  700. #define SGEMM_DEFAULT_UNROLL_N 2
  701. #define DGEMM_DEFAULT_UNROLL_M 2
  702. #define DGEMM_DEFAULT_UNROLL_N 2
  703. #define QGEMM_DEFAULT_UNROLL_M 2
  704. #define QGEMM_DEFAULT_UNROLL_N 2
  705. #define CGEMM_DEFAULT_UNROLL_M 4
  706. #define CGEMM_DEFAULT_UNROLL_N 1
  707. #define ZGEMM_DEFAULT_UNROLL_M 1
  708. #define ZGEMM_DEFAULT_UNROLL_N 1
  709. #define XGEMM_DEFAULT_UNROLL_M 1
  710. #define XGEMM_DEFAULT_UNROLL_N 1
  711. #endif
  712. #define SGEMM_DEFAULT_P sgemm_p
  713. #define SGEMM_DEFAULT_Q 256
  714. #define SGEMM_DEFAULT_R sgemm_r
  715. #define DGEMM_DEFAULT_P dgemm_p
  716. #define DGEMM_DEFAULT_Q 256
  717. #define DGEMM_DEFAULT_R dgemm_r
  718. #define QGEMM_DEFAULT_P qgemm_p
  719. #define QGEMM_DEFAULT_Q 256
  720. #define QGEMM_DEFAULT_R qgemm_r
  721. #define CGEMM_DEFAULT_P cgemm_p
  722. #define CGEMM_DEFAULT_Q 256
  723. #define CGEMM_DEFAULT_R cgemm_r
  724. #define ZGEMM_DEFAULT_P zgemm_p
  725. #define ZGEMM_DEFAULT_Q 256
  726. #define ZGEMM_DEFAULT_R zgemm_r
  727. #define XGEMM_DEFAULT_P xgemm_p
  728. #define XGEMM_DEFAULT_Q 256
  729. #define XGEMM_DEFAULT_R xgemm_r
  730. #define SYMV_P 4
  731. #endif
  732. #ifdef CORE_NORTHWOOD
  733. #define SNUMOPT 4
  734. #define DNUMOPT 2
  735. #define GEMM_DEFAULT_OFFSET_A 0
  736. #define GEMM_DEFAULT_OFFSET_B 32
  737. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  738. #define SYMV_P 8
  739. #define SGEMM_DEFAULT_UNROLL_M 8
  740. #define DGEMM_DEFAULT_UNROLL_M 4
  741. #define QGEMM_DEFAULT_UNROLL_M 2
  742. #define CGEMM_DEFAULT_UNROLL_M 4
  743. #define ZGEMM_DEFAULT_UNROLL_M 2
  744. #define XGEMM_DEFAULT_UNROLL_M 1
  745. #define SGEMM_DEFAULT_UNROLL_N 2
  746. #define DGEMM_DEFAULT_UNROLL_N 2
  747. #define QGEMM_DEFAULT_UNROLL_N 2
  748. #define CGEMM_DEFAULT_UNROLL_N 1
  749. #define ZGEMM_DEFAULT_UNROLL_N 1
  750. #define XGEMM_DEFAULT_UNROLL_N 1
  751. #define SGEMM_DEFAULT_P sgemm_p
  752. #define SGEMM_DEFAULT_R sgemm_r
  753. #define DGEMM_DEFAULT_P dgemm_p
  754. #define DGEMM_DEFAULT_R dgemm_r
  755. #define QGEMM_DEFAULT_P qgemm_p
  756. #define QGEMM_DEFAULT_R qgemm_r
  757. #define CGEMM_DEFAULT_P cgemm_p
  758. #define CGEMM_DEFAULT_R cgemm_r
  759. #define ZGEMM_DEFAULT_P zgemm_p
  760. #define ZGEMM_DEFAULT_R zgemm_r
  761. #define XGEMM_DEFAULT_P xgemm_p
  762. #define XGEMM_DEFAULT_R xgemm_r
  763. #define SGEMM_DEFAULT_Q 128
  764. #define DGEMM_DEFAULT_Q 128
  765. #define QGEMM_DEFAULT_Q 128
  766. #define CGEMM_DEFAULT_Q 128
  767. #define ZGEMM_DEFAULT_Q 128
  768. #define XGEMM_DEFAULT_Q 128
  769. #endif
  770. #ifdef CORE_PRESCOTT
  771. #define SNUMOPT 4
  772. #define DNUMOPT 2
  773. #ifndef __64BIT__
  774. #define GEMM_DEFAULT_OFFSET_A 128
  775. #define GEMM_DEFAULT_OFFSET_B 192
  776. #else
  777. #define GEMM_DEFAULT_OFFSET_A 0
  778. #define GEMM_DEFAULT_OFFSET_B 256
  779. #endif
  780. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  781. #define SYMV_P 8
  782. #ifdef ARCH_X86
  783. #define SGEMM_DEFAULT_UNROLL_M 4
  784. #define DGEMM_DEFAULT_UNROLL_M 2
  785. #define QGEMM_DEFAULT_UNROLL_M 2
  786. #define CGEMM_DEFAULT_UNROLL_M 2
  787. #define ZGEMM_DEFAULT_UNROLL_M 1
  788. #define XGEMM_DEFAULT_UNROLL_M 1
  789. #else
  790. #define SGEMM_DEFAULT_UNROLL_M 8
  791. #define DGEMM_DEFAULT_UNROLL_M 4
  792. #define QGEMM_DEFAULT_UNROLL_M 2
  793. #define CGEMM_DEFAULT_UNROLL_M 4
  794. #define ZGEMM_DEFAULT_UNROLL_M 2
  795. #define XGEMM_DEFAULT_UNROLL_M 1
  796. #endif
  797. #define SGEMM_DEFAULT_UNROLL_N 4
  798. #define DGEMM_DEFAULT_UNROLL_N 4
  799. #define QGEMM_DEFAULT_UNROLL_N 2
  800. #define CGEMM_DEFAULT_UNROLL_N 2
  801. #define ZGEMM_DEFAULT_UNROLL_N 2
  802. #define XGEMM_DEFAULT_UNROLL_N 1
  803. #define SGEMM_DEFAULT_P sgemm_p
  804. #define SGEMM_DEFAULT_R sgemm_r
  805. #define DGEMM_DEFAULT_P dgemm_p
  806. #define DGEMM_DEFAULT_R dgemm_r
  807. #define QGEMM_DEFAULT_P qgemm_p
  808. #define QGEMM_DEFAULT_R qgemm_r
  809. #define CGEMM_DEFAULT_P cgemm_p
  810. #define CGEMM_DEFAULT_R cgemm_r
  811. #define ZGEMM_DEFAULT_P zgemm_p
  812. #define ZGEMM_DEFAULT_R zgemm_r
  813. #define XGEMM_DEFAULT_P xgemm_p
  814. #define XGEMM_DEFAULT_R xgemm_r
  815. #define SGEMM_DEFAULT_Q 128
  816. #define DGEMM_DEFAULT_Q 128
  817. #define QGEMM_DEFAULT_Q 128
  818. #define CGEMM_DEFAULT_Q 128
  819. #define ZGEMM_DEFAULT_Q 128
  820. #define XGEMM_DEFAULT_Q 128
  821. #endif
  822. #ifdef CORE2
  823. #define SNUMOPT 8
  824. #define DNUMOPT 4
  825. #define GEMM_DEFAULT_OFFSET_A 448
  826. #define GEMM_DEFAULT_OFFSET_B 128
  827. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  828. #define SYMV_P 8
  829. #define SWITCH_RATIO 4
  830. #ifdef ARCH_X86
  831. #define SGEMM_DEFAULT_UNROLL_M 8
  832. #define DGEMM_DEFAULT_UNROLL_M 4
  833. #define QGEMM_DEFAULT_UNROLL_M 2
  834. #define CGEMM_DEFAULT_UNROLL_M 4
  835. #define ZGEMM_DEFAULT_UNROLL_M 2
  836. #define XGEMM_DEFAULT_UNROLL_M 1
  837. #define SGEMM_DEFAULT_UNROLL_N 2
  838. #define DGEMM_DEFAULT_UNROLL_N 2
  839. #define QGEMM_DEFAULT_UNROLL_N 2
  840. #define CGEMM_DEFAULT_UNROLL_N 1
  841. #define ZGEMM_DEFAULT_UNROLL_N 1
  842. #define XGEMM_DEFAULT_UNROLL_N 1
  843. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  844. #else
  845. #define SGEMM_DEFAULT_UNROLL_M 8
  846. #define DGEMM_DEFAULT_UNROLL_M 4
  847. #define QGEMM_DEFAULT_UNROLL_M 2
  848. #define CGEMM_DEFAULT_UNROLL_M 4
  849. #define ZGEMM_DEFAULT_UNROLL_M 2
  850. #define XGEMM_DEFAULT_UNROLL_M 1
  851. #define SGEMM_DEFAULT_UNROLL_N 4
  852. #define DGEMM_DEFAULT_UNROLL_N 4
  853. #define QGEMM_DEFAULT_UNROLL_N 2
  854. #define CGEMM_DEFAULT_UNROLL_N 2
  855. #define ZGEMM_DEFAULT_UNROLL_N 2
  856. #define XGEMM_DEFAULT_UNROLL_N 1
  857. #endif
  858. #define SGEMM_DEFAULT_P sgemm_p
  859. #define SGEMM_DEFAULT_R sgemm_r
  860. #define DGEMM_DEFAULT_P dgemm_p
  861. #define DGEMM_DEFAULT_R dgemm_r
  862. #define QGEMM_DEFAULT_P qgemm_p
  863. #define QGEMM_DEFAULT_R qgemm_r
  864. #define CGEMM_DEFAULT_P cgemm_p
  865. #define CGEMM_DEFAULT_R cgemm_r
  866. #define ZGEMM_DEFAULT_P zgemm_p
  867. #define ZGEMM_DEFAULT_R zgemm_r
  868. #define XGEMM_DEFAULT_P xgemm_p
  869. #define XGEMM_DEFAULT_R xgemm_r
  870. #define SGEMM_DEFAULT_Q 256
  871. #define DGEMM_DEFAULT_Q 256
  872. #define QGEMM_DEFAULT_Q 256
  873. #define CGEMM_DEFAULT_Q 256
  874. #define ZGEMM_DEFAULT_Q 256
  875. #define XGEMM_DEFAULT_Q 256
  876. #endif
  877. #ifdef PENRYN
  878. #define SNUMOPT 8
  879. #define DNUMOPT 4
  880. #define GEMM_DEFAULT_OFFSET_A 128
  881. #define GEMM_DEFAULT_OFFSET_B 0
  882. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  883. #define SYMV_P 8
  884. #define SWITCH_RATIO 4
  885. #ifdef ARCH_X86
  886. #define SGEMM_DEFAULT_UNROLL_M 4
  887. #define DGEMM_DEFAULT_UNROLL_M 2
  888. #define QGEMM_DEFAULT_UNROLL_M 2
  889. #define CGEMM_DEFAULT_UNROLL_M 2
  890. #define ZGEMM_DEFAULT_UNROLL_M 1
  891. #define XGEMM_DEFAULT_UNROLL_M 1
  892. #define SGEMM_DEFAULT_UNROLL_N 4
  893. #define DGEMM_DEFAULT_UNROLL_N 4
  894. #define QGEMM_DEFAULT_UNROLL_N 2
  895. #define CGEMM_DEFAULT_UNROLL_N 2
  896. #define ZGEMM_DEFAULT_UNROLL_N 2
  897. #define XGEMM_DEFAULT_UNROLL_N 1
  898. #else
  899. #define SGEMM_DEFAULT_UNROLL_M 8
  900. #define DGEMM_DEFAULT_UNROLL_M 4
  901. #define QGEMM_DEFAULT_UNROLL_M 2
  902. #define CGEMM_DEFAULT_UNROLL_M 4
  903. #define ZGEMM_DEFAULT_UNROLL_M 2
  904. #define XGEMM_DEFAULT_UNROLL_M 1
  905. #define SGEMM_DEFAULT_UNROLL_N 4
  906. #define DGEMM_DEFAULT_UNROLL_N 4
  907. #define QGEMM_DEFAULT_UNROLL_N 2
  908. #define CGEMM_DEFAULT_UNROLL_N 2
  909. #define ZGEMM_DEFAULT_UNROLL_N 2
  910. #define XGEMM_DEFAULT_UNROLL_N 1
  911. #endif
  912. #define SGEMM_DEFAULT_P sgemm_p
  913. #define SGEMM_DEFAULT_R sgemm_r
  914. #define DGEMM_DEFAULT_P dgemm_p
  915. #define DGEMM_DEFAULT_R dgemm_r
  916. #define QGEMM_DEFAULT_P qgemm_p
  917. #define QGEMM_DEFAULT_R qgemm_r
  918. #define CGEMM_DEFAULT_P cgemm_p
  919. #define CGEMM_DEFAULT_R cgemm_r
  920. #define ZGEMM_DEFAULT_P zgemm_p
  921. #define ZGEMM_DEFAULT_R zgemm_r
  922. #define XGEMM_DEFAULT_P xgemm_p
  923. #define XGEMM_DEFAULT_R xgemm_r
  924. #define SGEMM_DEFAULT_Q 512
  925. #define DGEMM_DEFAULT_Q 256
  926. #define QGEMM_DEFAULT_Q 128
  927. #define CGEMM_DEFAULT_Q 512
  928. #define ZGEMM_DEFAULT_Q 256
  929. #define XGEMM_DEFAULT_Q 128
  930. #define GETRF_FACTOR 0.75
  931. #endif
  932. #ifdef DUNNINGTON
  933. #define SNUMOPT 8
  934. #define DNUMOPT 4
  935. #define GEMM_DEFAULT_OFFSET_A 128
  936. #define GEMM_DEFAULT_OFFSET_B 0
  937. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  938. #define SYMV_P 8
  939. #define SWITCH_RATIO 4
  940. #ifdef ARCH_X86
  941. #define SGEMM_DEFAULT_UNROLL_M 4
  942. #define DGEMM_DEFAULT_UNROLL_M 2
  943. #define QGEMM_DEFAULT_UNROLL_M 2
  944. #define CGEMM_DEFAULT_UNROLL_M 2
  945. #define ZGEMM_DEFAULT_UNROLL_M 1
  946. #define XGEMM_DEFAULT_UNROLL_M 1
  947. #define SGEMM_DEFAULT_UNROLL_N 4
  948. #define DGEMM_DEFAULT_UNROLL_N 4
  949. #define QGEMM_DEFAULT_UNROLL_N 2
  950. #define CGEMM_DEFAULT_UNROLL_N 2
  951. #define ZGEMM_DEFAULT_UNROLL_N 2
  952. #define XGEMM_DEFAULT_UNROLL_N 1
  953. #else
  954. #define SGEMM_DEFAULT_UNROLL_M 8
  955. #define DGEMM_DEFAULT_UNROLL_M 4
  956. #define QGEMM_DEFAULT_UNROLL_M 2
  957. #define CGEMM_DEFAULT_UNROLL_M 4
  958. #define ZGEMM_DEFAULT_UNROLL_M 2
  959. #define XGEMM_DEFAULT_UNROLL_M 1
  960. #define SGEMM_DEFAULT_UNROLL_N 4
  961. #define DGEMM_DEFAULT_UNROLL_N 4
  962. #define QGEMM_DEFAULT_UNROLL_N 2
  963. #define CGEMM_DEFAULT_UNROLL_N 2
  964. #define ZGEMM_DEFAULT_UNROLL_N 2
  965. #define XGEMM_DEFAULT_UNROLL_N 1
  966. #endif
  967. #define SGEMM_DEFAULT_P sgemm_p
  968. #define SGEMM_DEFAULT_R sgemm_r
  969. #define DGEMM_DEFAULT_P dgemm_p
  970. #define DGEMM_DEFAULT_R dgemm_r
  971. #define QGEMM_DEFAULT_P qgemm_p
  972. #define QGEMM_DEFAULT_R qgemm_r
  973. #define CGEMM_DEFAULT_P cgemm_p
  974. #define CGEMM_DEFAULT_R cgemm_r
  975. #define ZGEMM_DEFAULT_P zgemm_p
  976. #define ZGEMM_DEFAULT_R zgemm_r
  977. #define XGEMM_DEFAULT_P xgemm_p
  978. #define XGEMM_DEFAULT_R xgemm_r
  979. #define SGEMM_DEFAULT_Q 768
  980. #define DGEMM_DEFAULT_Q 384
  981. #define QGEMM_DEFAULT_Q 192
  982. #define CGEMM_DEFAULT_Q 768
  983. #define ZGEMM_DEFAULT_Q 384
  984. #define XGEMM_DEFAULT_Q 192
  985. #define GETRF_FACTOR 0.75
  986. #define GEMM_THREAD gemm_thread_mn
  987. #endif
  988. #ifdef NEHALEM
  989. #define SNUMOPT 8
  990. #define DNUMOPT 4
  991. #define GEMM_DEFAULT_OFFSET_A 32
  992. #define GEMM_DEFAULT_OFFSET_B 0
  993. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  994. #define SYMV_P 8
  995. #define SWITCH_RATIO 4
  996. #ifdef ARCH_X86
  997. #define SGEMM_DEFAULT_UNROLL_M 4
  998. #define DGEMM_DEFAULT_UNROLL_M 2
  999. #define QGEMM_DEFAULT_UNROLL_M 2
  1000. #define CGEMM_DEFAULT_UNROLL_M 2
  1001. #define ZGEMM_DEFAULT_UNROLL_M 1
  1002. #define XGEMM_DEFAULT_UNROLL_M 1
  1003. #define SGEMM_DEFAULT_UNROLL_N 4
  1004. #define DGEMM_DEFAULT_UNROLL_N 4
  1005. #define QGEMM_DEFAULT_UNROLL_N 2
  1006. #define CGEMM_DEFAULT_UNROLL_N 2
  1007. #define ZGEMM_DEFAULT_UNROLL_N 2
  1008. #define XGEMM_DEFAULT_UNROLL_N 1
  1009. #else
  1010. #define SGEMM_DEFAULT_UNROLL_M 4
  1011. #define DGEMM_DEFAULT_UNROLL_M 2
  1012. #define QGEMM_DEFAULT_UNROLL_M 2
  1013. #define CGEMM_DEFAULT_UNROLL_M 2
  1014. #define ZGEMM_DEFAULT_UNROLL_M 1
  1015. #define XGEMM_DEFAULT_UNROLL_M 1
  1016. #define SGEMM_DEFAULT_UNROLL_N 8
  1017. #define DGEMM_DEFAULT_UNROLL_N 8
  1018. #define QGEMM_DEFAULT_UNROLL_N 2
  1019. #define CGEMM_DEFAULT_UNROLL_N 4
  1020. #define ZGEMM_DEFAULT_UNROLL_N 4
  1021. #define XGEMM_DEFAULT_UNROLL_N 1
  1022. #endif
  1023. #define SGEMM_DEFAULT_P 504
  1024. #define SGEMM_DEFAULT_R sgemm_r
  1025. #define DGEMM_DEFAULT_P 504
  1026. #define DGEMM_DEFAULT_R dgemm_r
  1027. #define QGEMM_DEFAULT_P 504
  1028. #define QGEMM_DEFAULT_R qgemm_r
  1029. #define CGEMM_DEFAULT_P 252
  1030. #define CGEMM_DEFAULT_R cgemm_r
  1031. #define ZGEMM_DEFAULT_P 252
  1032. #define ZGEMM_DEFAULT_R zgemm_r
  1033. #define XGEMM_DEFAULT_P 252
  1034. #define XGEMM_DEFAULT_R xgemm_r
  1035. #define SGEMM_DEFAULT_Q 512
  1036. #define DGEMM_DEFAULT_Q 256
  1037. #define QGEMM_DEFAULT_Q 128
  1038. #define CGEMM_DEFAULT_Q 512
  1039. #define ZGEMM_DEFAULT_Q 256
  1040. #define XGEMM_DEFAULT_Q 128
  1041. #define GETRF_FACTOR 0.72
  1042. #endif
  1043. #ifdef SANDYBRIDGE
  1044. #define SNUMOPT 8
  1045. #define DNUMOPT 4
  1046. #define GEMM_DEFAULT_OFFSET_A 0
  1047. #define GEMM_DEFAULT_OFFSET_B 0
  1048. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1049. #define SYMV_P 8
  1050. #define SWITCH_RATIO 4
  1051. #ifdef ARCH_X86
  1052. #define SGEMM_DEFAULT_UNROLL_M 4
  1053. #define DGEMM_DEFAULT_UNROLL_M 2
  1054. #define QGEMM_DEFAULT_UNROLL_M 2
  1055. #define CGEMM_DEFAULT_UNROLL_M 2
  1056. #define ZGEMM_DEFAULT_UNROLL_M 1
  1057. #define XGEMM_DEFAULT_UNROLL_M 1
  1058. #define SGEMM_DEFAULT_UNROLL_N 4
  1059. #define DGEMM_DEFAULT_UNROLL_N 4
  1060. #define QGEMM_DEFAULT_UNROLL_N 2
  1061. #define CGEMM_DEFAULT_UNROLL_N 2
  1062. #define ZGEMM_DEFAULT_UNROLL_N 2
  1063. #define XGEMM_DEFAULT_UNROLL_N 1
  1064. #else
  1065. #define SGEMM_DEFAULT_UNROLL_M 16
  1066. #define DGEMM_DEFAULT_UNROLL_M 8
  1067. #define QGEMM_DEFAULT_UNROLL_M 2
  1068. #define CGEMM_DEFAULT_UNROLL_M 8
  1069. #define ZGEMM_DEFAULT_UNROLL_M 1
  1070. #define XGEMM_DEFAULT_UNROLL_M 1
  1071. #define SGEMM_DEFAULT_UNROLL_N 4
  1072. #define DGEMM_DEFAULT_UNROLL_N 4
  1073. #define QGEMM_DEFAULT_UNROLL_N 2
  1074. #define CGEMM_DEFAULT_UNROLL_N 2
  1075. #define ZGEMM_DEFAULT_UNROLL_N 4
  1076. #define XGEMM_DEFAULT_UNROLL_N 1
  1077. #endif
  1078. #define SGEMM_DEFAULT_P 768
  1079. #define SGEMM_DEFAULT_R sgemm_r
  1080. //#define SGEMM_DEFAULT_R 1024
  1081. #define DGEMM_DEFAULT_P 512
  1082. #define DGEMM_DEFAULT_R dgemm_r
  1083. //#define DGEMM_DEFAULT_R 1024
  1084. #define QGEMM_DEFAULT_P 504
  1085. #define QGEMM_DEFAULT_R qgemm_r
  1086. #define CGEMM_DEFAULT_P 768
  1087. #define CGEMM_DEFAULT_R cgemm_r
  1088. //#define CGEMM_DEFAULT_R 1024
  1089. #define ZGEMM_DEFAULT_P 512
  1090. #define ZGEMM_DEFAULT_R zgemm_r
  1091. //#define ZGEMM_DEFAULT_R 1024
  1092. #define XGEMM_DEFAULT_P 252
  1093. #define XGEMM_DEFAULT_R xgemm_r
  1094. #define SGEMM_DEFAULT_Q 384
  1095. #define DGEMM_DEFAULT_Q 256
  1096. #define QGEMM_DEFAULT_Q 128
  1097. #define CGEMM_DEFAULT_Q 512
  1098. #define ZGEMM_DEFAULT_Q 192
  1099. #define XGEMM_DEFAULT_Q 128
  1100. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1101. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1102. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1103. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1104. #define CGEMM3M_DEFAULT_P 448
  1105. #define ZGEMM3M_DEFAULT_P 224
  1106. #define XGEMM3M_DEFAULT_P 112
  1107. #define CGEMM3M_DEFAULT_Q 224
  1108. #define ZGEMM3M_DEFAULT_Q 224
  1109. #define XGEMM3M_DEFAULT_Q 224
  1110. #define CGEMM3M_DEFAULT_R 12288
  1111. #define ZGEMM3M_DEFAULT_R 12288
  1112. #define XGEMM3M_DEFAULT_R 12288
  1113. #define GETRF_FACTOR 0.72
  1114. #endif
  1115. #ifdef HASWELL
  1116. #define SNUMOPT 16
  1117. #define DNUMOPT 8
  1118. #define GEMM_DEFAULT_OFFSET_A 0
  1119. #define GEMM_DEFAULT_OFFSET_B 0
  1120. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1121. #define SYMV_P 8
  1122. #define SWITCH_RATIO 4
  1123. #ifdef ARCH_X86
  1124. #define SGEMM_DEFAULT_UNROLL_M 4
  1125. #define DGEMM_DEFAULT_UNROLL_M 2
  1126. #define QGEMM_DEFAULT_UNROLL_M 2
  1127. #define CGEMM_DEFAULT_UNROLL_M 2
  1128. #define ZGEMM_DEFAULT_UNROLL_M 1
  1129. #define XGEMM_DEFAULT_UNROLL_M 1
  1130. #define SGEMM_DEFAULT_UNROLL_N 4
  1131. #define DGEMM_DEFAULT_UNROLL_N 4
  1132. #define QGEMM_DEFAULT_UNROLL_N 2
  1133. #define CGEMM_DEFAULT_UNROLL_N 2
  1134. #define ZGEMM_DEFAULT_UNROLL_N 2
  1135. #define XGEMM_DEFAULT_UNROLL_N 1
  1136. #else
  1137. #define SGEMM_DEFAULT_UNROLL_M 16
  1138. #define DGEMM_DEFAULT_UNROLL_M 4
  1139. #define QGEMM_DEFAULT_UNROLL_M 2
  1140. #define CGEMM_DEFAULT_UNROLL_M 8
  1141. #define ZGEMM_DEFAULT_UNROLL_M 4
  1142. #define XGEMM_DEFAULT_UNROLL_M 1
  1143. #define SGEMM_DEFAULT_UNROLL_N 4
  1144. #define DGEMM_DEFAULT_UNROLL_N 8
  1145. #define QGEMM_DEFAULT_UNROLL_N 2
  1146. #define CGEMM_DEFAULT_UNROLL_N 2
  1147. #define ZGEMM_DEFAULT_UNROLL_N 2
  1148. #define XGEMM_DEFAULT_UNROLL_N 1
  1149. #define SGEMM_DEFAULT_UNROLL_MN 32
  1150. #define DGEMM_DEFAULT_UNROLL_MN 32
  1151. #endif
  1152. #ifdef ARCH_X86
  1153. #define SGEMM_DEFAULT_P 512
  1154. #define SGEMM_DEFAULT_R sgemm_r
  1155. #define DGEMM_DEFAULT_P 512
  1156. #define DGEMM_DEFAULT_R dgemm_r
  1157. #define QGEMM_DEFAULT_P 504
  1158. #define QGEMM_DEFAULT_R qgemm_r
  1159. #define CGEMM_DEFAULT_P 128
  1160. #define CGEMM_DEFAULT_R 1024
  1161. #define ZGEMM_DEFAULT_P 512
  1162. #define ZGEMM_DEFAULT_R zgemm_r
  1163. #define XGEMM_DEFAULT_P 252
  1164. #define XGEMM_DEFAULT_R xgemm_r
  1165. #define SGEMM_DEFAULT_Q 256
  1166. #define DGEMM_DEFAULT_Q 256
  1167. #define QGEMM_DEFAULT_Q 128
  1168. #define CGEMM_DEFAULT_Q 256
  1169. #define ZGEMM_DEFAULT_Q 192
  1170. #define XGEMM_DEFAULT_Q 128
  1171. #else
  1172. #define SGEMM_DEFAULT_P 768
  1173. #define DGEMM_DEFAULT_P 512
  1174. #define CGEMM_DEFAULT_P 384
  1175. #define ZGEMM_DEFAULT_P 256
  1176. #ifdef WINDOWS_ABI
  1177. #define SGEMM_DEFAULT_Q 320
  1178. #define DGEMM_DEFAULT_Q 128
  1179. #else
  1180. #define SGEMM_DEFAULT_Q 384
  1181. #define DGEMM_DEFAULT_Q 256
  1182. #endif
  1183. #define CGEMM_DEFAULT_Q 192
  1184. #define ZGEMM_DEFAULT_Q 128
  1185. #define SGEMM_DEFAULT_R sgemm_r
  1186. #define DGEMM_DEFAULT_R 13824
  1187. #define CGEMM_DEFAULT_R cgemm_r
  1188. #define ZGEMM_DEFAULT_R zgemm_r
  1189. #define QGEMM_DEFAULT_Q 128
  1190. #define QGEMM_DEFAULT_P 504
  1191. #define QGEMM_DEFAULT_R qgemm_r
  1192. #define XGEMM_DEFAULT_P 252
  1193. #define XGEMM_DEFAULT_R xgemm_r
  1194. #define XGEMM_DEFAULT_Q 128
  1195. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1196. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1197. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1198. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1199. #define CGEMM3M_DEFAULT_P 448
  1200. #define ZGEMM3M_DEFAULT_P 224
  1201. #define XGEMM3M_DEFAULT_P 112
  1202. #define CGEMM3M_DEFAULT_Q 224
  1203. #define ZGEMM3M_DEFAULT_Q 224
  1204. #define XGEMM3M_DEFAULT_Q 224
  1205. #define CGEMM3M_DEFAULT_R 12288
  1206. #define ZGEMM3M_DEFAULT_R 12288
  1207. #define XGEMM3M_DEFAULT_R 12288
  1208. #endif
  1209. #endif
  1210. #ifdef ATOM
  1211. #define SNUMOPT 2
  1212. #define DNUMOPT 1
  1213. #define GEMM_DEFAULT_OFFSET_A 64
  1214. #define GEMM_DEFAULT_OFFSET_B 0
  1215. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1216. #define SYMV_P 8
  1217. #ifdef ARCH_X86
  1218. #define SGEMM_DEFAULT_UNROLL_M 4
  1219. #define DGEMM_DEFAULT_UNROLL_M 2
  1220. #define QGEMM_DEFAULT_UNROLL_M 2
  1221. #define CGEMM_DEFAULT_UNROLL_M 2
  1222. #define ZGEMM_DEFAULT_UNROLL_M 1
  1223. #define XGEMM_DEFAULT_UNROLL_M 1
  1224. #else
  1225. #define SGEMM_DEFAULT_UNROLL_M 8
  1226. #define DGEMM_DEFAULT_UNROLL_M 4
  1227. #define QGEMM_DEFAULT_UNROLL_M 2
  1228. #define CGEMM_DEFAULT_UNROLL_M 4
  1229. #define ZGEMM_DEFAULT_UNROLL_M 2
  1230. #define XGEMM_DEFAULT_UNROLL_M 1
  1231. #endif
  1232. #define SGEMM_DEFAULT_UNROLL_N 4
  1233. #define DGEMM_DEFAULT_UNROLL_N 2
  1234. #define QGEMM_DEFAULT_UNROLL_N 2
  1235. #define CGEMM_DEFAULT_UNROLL_N 2
  1236. #define ZGEMM_DEFAULT_UNROLL_N 1
  1237. #define XGEMM_DEFAULT_UNROLL_N 1
  1238. #define SGEMM_DEFAULT_P sgemm_p
  1239. #define SGEMM_DEFAULT_R sgemm_r
  1240. #define DGEMM_DEFAULT_P dgemm_p
  1241. #define DGEMM_DEFAULT_R dgemm_r
  1242. #define QGEMM_DEFAULT_P qgemm_p
  1243. #define QGEMM_DEFAULT_R qgemm_r
  1244. #define CGEMM_DEFAULT_P cgemm_p
  1245. #define CGEMM_DEFAULT_R cgemm_r
  1246. #define ZGEMM_DEFAULT_P zgemm_p
  1247. #define ZGEMM_DEFAULT_R zgemm_r
  1248. #define XGEMM_DEFAULT_P xgemm_p
  1249. #define XGEMM_DEFAULT_R xgemm_r
  1250. #define SGEMM_DEFAULT_Q 256
  1251. #define DGEMM_DEFAULT_Q 256
  1252. #define QGEMM_DEFAULT_Q 256
  1253. #define CGEMM_DEFAULT_Q 256
  1254. #define ZGEMM_DEFAULT_Q 256
  1255. #define XGEMM_DEFAULT_Q 256
  1256. #endif
  1257. #ifdef ITANIUM2
  1258. #define SNUMOPT 4
  1259. #define DNUMOPT 4
  1260. #define GEMM_DEFAULT_OFFSET_A 0
  1261. #define GEMM_DEFAULT_OFFSET_B 128
  1262. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1263. #define SGEMM_DEFAULT_UNROLL_M 8
  1264. #define SGEMM_DEFAULT_UNROLL_N 8
  1265. #define DGEMM_DEFAULT_UNROLL_M 8
  1266. #define DGEMM_DEFAULT_UNROLL_N 8
  1267. #define QGEMM_DEFAULT_UNROLL_M 8
  1268. #define QGEMM_DEFAULT_UNROLL_N 8
  1269. #define CGEMM_DEFAULT_UNROLL_M 4
  1270. #define CGEMM_DEFAULT_UNROLL_N 4
  1271. #define ZGEMM_DEFAULT_UNROLL_M 4
  1272. #define ZGEMM_DEFAULT_UNROLL_N 4
  1273. #define XGEMM_DEFAULT_UNROLL_M 4
  1274. #define XGEMM_DEFAULT_UNROLL_N 4
  1275. #define SGEMM_DEFAULT_P sgemm_p
  1276. #define DGEMM_DEFAULT_P dgemm_p
  1277. #define QGEMM_DEFAULT_P qgemm_p
  1278. #define CGEMM_DEFAULT_P cgemm_p
  1279. #define ZGEMM_DEFAULT_P zgemm_p
  1280. #define XGEMM_DEFAULT_P xgemm_p
  1281. #define SGEMM_DEFAULT_Q 1024
  1282. #define DGEMM_DEFAULT_Q 1024
  1283. #define QGEMM_DEFAULT_Q 1024
  1284. #define CGEMM_DEFAULT_Q 1024
  1285. #define ZGEMM_DEFAULT_Q 1024
  1286. #define XGEMM_DEFAULT_Q 1024
  1287. #define SGEMM_DEFAULT_R sgemm_r
  1288. #define DGEMM_DEFAULT_R dgemm_r
  1289. #define QGEMM_DEFAULT_R qgemm_r
  1290. #define CGEMM_DEFAULT_R cgemm_r
  1291. #define ZGEMM_DEFAULT_R zgemm_r
  1292. #define XGEMM_DEFAULT_R xgemm_r
  1293. #define SYMV_P 16
  1294. #define GETRF_FACTOR 0.65
  1295. #endif
  1296. #if defined(EV4) || defined(EV5) || defined(EV6)
  1297. #ifdef EV4
  1298. #define SNUMOPT 1
  1299. #define DNUMOPT 1
  1300. #else
  1301. #define SNUMOPT 2
  1302. #define DNUMOPT 2
  1303. #endif
  1304. #define GEMM_DEFAULT_OFFSET_A 512
  1305. #define GEMM_DEFAULT_OFFSET_B 512
  1306. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1307. #define SGEMM_DEFAULT_UNROLL_M 4
  1308. #define SGEMM_DEFAULT_UNROLL_N 4
  1309. #define DGEMM_DEFAULT_UNROLL_M 4
  1310. #define DGEMM_DEFAULT_UNROLL_N 4
  1311. #define CGEMM_DEFAULT_UNROLL_M 2
  1312. #define CGEMM_DEFAULT_UNROLL_N 2
  1313. #define ZGEMM_DEFAULT_UNROLL_M 2
  1314. #define ZGEMM_DEFAULT_UNROLL_N 2
  1315. #define SYMV_P 8
  1316. #ifdef EV4
  1317. #define SGEMM_DEFAULT_P 32
  1318. #define SGEMM_DEFAULT_Q 112
  1319. #define SGEMM_DEFAULT_R 256
  1320. #define DGEMM_DEFAULT_P 32
  1321. #define DGEMM_DEFAULT_Q 56
  1322. #define DGEMM_DEFAULT_R 256
  1323. #define CGEMM_DEFAULT_P 32
  1324. #define CGEMM_DEFAULT_Q 64
  1325. #define CGEMM_DEFAULT_R 240
  1326. #define ZGEMM_DEFAULT_P 32
  1327. #define ZGEMM_DEFAULT_Q 32
  1328. #define ZGEMM_DEFAULT_R 240
  1329. #endif
  1330. #ifdef EV5
  1331. #define SGEMM_DEFAULT_P 64
  1332. #define SGEMM_DEFAULT_Q 256
  1333. #define DGEMM_DEFAULT_P 64
  1334. #define DGEMM_DEFAULT_Q 128
  1335. #define CGEMM_DEFAULT_P 64
  1336. #define CGEMM_DEFAULT_Q 128
  1337. #define ZGEMM_DEFAULT_P 64
  1338. #define ZGEMM_DEFAULT_Q 64
  1339. #endif
  1340. #ifdef EV6
  1341. #define SGEMM_DEFAULT_P 256
  1342. #define SGEMM_DEFAULT_Q 512
  1343. #define DGEMM_DEFAULT_P 256
  1344. #define DGEMM_DEFAULT_Q 256
  1345. #define CGEMM_DEFAULT_P 256
  1346. #define CGEMM_DEFAULT_Q 256
  1347. #define ZGEMM_DEFAULT_P 128
  1348. #define ZGEMM_DEFAULT_Q 256
  1349. #endif
  1350. #endif
  1351. #ifdef CELL
  1352. #define SNUMOPT 2
  1353. #define DNUMOPT 2
  1354. #define GEMM_DEFAULT_OFFSET_A 0
  1355. #define GEMM_DEFAULT_OFFSET_B 8192
  1356. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1357. #define SGEMM_DEFAULT_UNROLL_M 16
  1358. #define SGEMM_DEFAULT_UNROLL_N 4
  1359. #define DGEMM_DEFAULT_UNROLL_M 4
  1360. #define DGEMM_DEFAULT_UNROLL_N 4
  1361. #define CGEMM_DEFAULT_UNROLL_M 8
  1362. #define CGEMM_DEFAULT_UNROLL_N 2
  1363. #define ZGEMM_DEFAULT_UNROLL_M 2
  1364. #define ZGEMM_DEFAULT_UNROLL_N 2
  1365. #define SGEMM_DEFAULT_P 128
  1366. #define DGEMM_DEFAULT_P 128
  1367. #define CGEMM_DEFAULT_P 128
  1368. #define ZGEMM_DEFAULT_P 128
  1369. #define SGEMM_DEFAULT_Q 512
  1370. #define DGEMM_DEFAULT_Q 256
  1371. #define CGEMM_DEFAULT_Q 256
  1372. #define ZGEMM_DEFAULT_Q 128
  1373. #define SYMV_P 4
  1374. #endif
  1375. #ifdef PPCG4
  1376. #define GEMM_DEFAULT_OFFSET_A 0
  1377. #define GEMM_DEFAULT_OFFSET_B 1024
  1378. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1379. #define SGEMM_DEFAULT_UNROLL_M 16
  1380. #define SGEMM_DEFAULT_UNROLL_N 4
  1381. #define DGEMM_DEFAULT_UNROLL_M 4
  1382. #define DGEMM_DEFAULT_UNROLL_N 4
  1383. #define CGEMM_DEFAULT_UNROLL_M 8
  1384. #define CGEMM_DEFAULT_UNROLL_N 2
  1385. #define ZGEMM_DEFAULT_UNROLL_M 2
  1386. #define ZGEMM_DEFAULT_UNROLL_N 2
  1387. #define SGEMM_DEFAULT_P 256
  1388. #define DGEMM_DEFAULT_P 128
  1389. #define CGEMM_DEFAULT_P 128
  1390. #define ZGEMM_DEFAULT_P 64
  1391. #define SGEMM_DEFAULT_Q 256
  1392. #define DGEMM_DEFAULT_Q 256
  1393. #define CGEMM_DEFAULT_Q 256
  1394. #define ZGEMM_DEFAULT_Q 256
  1395. #define SYMV_P 4
  1396. #endif
  1397. #ifdef PPC970
  1398. #define SNUMOPT 4
  1399. #define DNUMOPT 4
  1400. #define GEMM_DEFAULT_OFFSET_A 2688
  1401. #define GEMM_DEFAULT_OFFSET_B 3072
  1402. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1403. #define SGEMM_DEFAULT_UNROLL_M 16
  1404. #define SGEMM_DEFAULT_UNROLL_N 4
  1405. #define DGEMM_DEFAULT_UNROLL_M 4
  1406. #define DGEMM_DEFAULT_UNROLL_N 4
  1407. #define CGEMM_DEFAULT_UNROLL_M 8
  1408. #define CGEMM_DEFAULT_UNROLL_N 2
  1409. #define ZGEMM_DEFAULT_UNROLL_M 2
  1410. #define ZGEMM_DEFAULT_UNROLL_N 2
  1411. #ifdef OS_LINUX
  1412. #if L2_SIZE == 1024976
  1413. #define SGEMM_DEFAULT_P 320
  1414. #define DGEMM_DEFAULT_P 256
  1415. #define CGEMM_DEFAULT_P 256
  1416. #define ZGEMM_DEFAULT_P 256
  1417. #else
  1418. #define SGEMM_DEFAULT_P 176
  1419. #define DGEMM_DEFAULT_P 176
  1420. #define CGEMM_DEFAULT_P 176
  1421. #define ZGEMM_DEFAULT_P 176
  1422. #endif
  1423. #endif
  1424. #define SGEMM_DEFAULT_Q 512
  1425. #define DGEMM_DEFAULT_Q 256
  1426. #define CGEMM_DEFAULT_Q 256
  1427. #define ZGEMM_DEFAULT_Q 128
  1428. #define SYMV_P 4
  1429. #endif
  1430. #ifdef PPC440
  1431. #define SNUMOPT 2
  1432. #define DNUMOPT 2
  1433. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1434. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1435. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1436. #define SGEMM_DEFAULT_UNROLL_M 4
  1437. #define SGEMM_DEFAULT_UNROLL_N 4
  1438. #define DGEMM_DEFAULT_UNROLL_M 4
  1439. #define DGEMM_DEFAULT_UNROLL_N 4
  1440. #define CGEMM_DEFAULT_UNROLL_M 2
  1441. #define CGEMM_DEFAULT_UNROLL_N 2
  1442. #define ZGEMM_DEFAULT_UNROLL_M 2
  1443. #define ZGEMM_DEFAULT_UNROLL_N 2
  1444. #define SGEMM_DEFAULT_P 512
  1445. #define DGEMM_DEFAULT_P 512
  1446. #define CGEMM_DEFAULT_P 512
  1447. #define ZGEMM_DEFAULT_P 512
  1448. #define SGEMM_DEFAULT_Q 1024
  1449. #define DGEMM_DEFAULT_Q 512
  1450. #define CGEMM_DEFAULT_Q 512
  1451. #define ZGEMM_DEFAULT_Q 256
  1452. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1453. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1454. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1455. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1456. #define SYMV_P 4
  1457. #endif
  1458. #ifdef PPC440FP2
  1459. #define SNUMOPT 4
  1460. #define DNUMOPT 4
  1461. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1462. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1463. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1464. #define SGEMM_DEFAULT_UNROLL_M 8
  1465. #define SGEMM_DEFAULT_UNROLL_N 4
  1466. #define DGEMM_DEFAULT_UNROLL_M 8
  1467. #define DGEMM_DEFAULT_UNROLL_N 4
  1468. #define CGEMM_DEFAULT_UNROLL_M 4
  1469. #define CGEMM_DEFAULT_UNROLL_N 2
  1470. #define ZGEMM_DEFAULT_UNROLL_M 4
  1471. #define ZGEMM_DEFAULT_UNROLL_N 2
  1472. #define SGEMM_DEFAULT_P 128
  1473. #define DGEMM_DEFAULT_P 128
  1474. #define CGEMM_DEFAULT_P 128
  1475. #define ZGEMM_DEFAULT_P 128
  1476. #if 1
  1477. #define SGEMM_DEFAULT_Q 4096
  1478. #define DGEMM_DEFAULT_Q 3072
  1479. #define CGEMM_DEFAULT_Q 2048
  1480. #define ZGEMM_DEFAULT_Q 1024
  1481. #else
  1482. #define SGEMM_DEFAULT_Q 512
  1483. #define DGEMM_DEFAULT_Q 256
  1484. #define CGEMM_DEFAULT_Q 256
  1485. #define ZGEMM_DEFAULT_Q 128
  1486. #endif
  1487. #define SYMV_P 4
  1488. #endif
  1489. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1490. #define GEMM_DEFAULT_OFFSET_A 0
  1491. #define GEMM_DEFAULT_OFFSET_B 2048
  1492. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1493. #define SGEMM_DEFAULT_UNROLL_M 4
  1494. #define SGEMM_DEFAULT_UNROLL_N 4
  1495. #define DGEMM_DEFAULT_UNROLL_M 4
  1496. #define DGEMM_DEFAULT_UNROLL_N 4
  1497. #define CGEMM_DEFAULT_UNROLL_M 2
  1498. #define CGEMM_DEFAULT_UNROLL_N 2
  1499. #define ZGEMM_DEFAULT_UNROLL_M 2
  1500. #define ZGEMM_DEFAULT_UNROLL_N 2
  1501. #ifdef POWER3
  1502. #define SNUMOPT 4
  1503. #define DNUMOPT 4
  1504. #define SGEMM_DEFAULT_P 256
  1505. #define SGEMM_DEFAULT_Q 432
  1506. #define SGEMM_DEFAULT_R 1012
  1507. #define DGEMM_DEFAULT_P 256
  1508. #define DGEMM_DEFAULT_Q 216
  1509. #define DGEMM_DEFAULT_R 1012
  1510. #define ZGEMM_DEFAULT_P 256
  1511. #define ZGEMM_DEFAULT_Q 104
  1512. #define ZGEMM_DEFAULT_R 1012
  1513. #endif
  1514. #if defined(POWER4)
  1515. #ifdef ALLOC_HUGETLB
  1516. #define SGEMM_DEFAULT_P 184
  1517. #define DGEMM_DEFAULT_P 184
  1518. #define CGEMM_DEFAULT_P 184
  1519. #define ZGEMM_DEFAULT_P 184
  1520. #else
  1521. #define SGEMM_DEFAULT_P 144
  1522. #define DGEMM_DEFAULT_P 144
  1523. #define CGEMM_DEFAULT_P 144
  1524. #define ZGEMM_DEFAULT_P 144
  1525. #endif
  1526. #endif
  1527. #if defined(POWER5)
  1528. #ifdef ALLOC_HUGETLB
  1529. #define SGEMM_DEFAULT_P 512
  1530. #define DGEMM_DEFAULT_P 256
  1531. #define CGEMM_DEFAULT_P 256
  1532. #define ZGEMM_DEFAULT_P 128
  1533. #else
  1534. #define SGEMM_DEFAULT_P 320
  1535. #define DGEMM_DEFAULT_P 160
  1536. #define CGEMM_DEFAULT_P 160
  1537. #define ZGEMM_DEFAULT_P 80
  1538. #endif
  1539. #define SGEMM_DEFAULT_Q 256
  1540. #define CGEMM_DEFAULT_Q 256
  1541. #define DGEMM_DEFAULT_Q 256
  1542. #define ZGEMM_DEFAULT_Q 256
  1543. #endif
  1544. #define SYMV_P 8
  1545. #endif
  1546. #if defined(POWER6)
  1547. #define SNUMOPT 4
  1548. #define DNUMOPT 4
  1549. #define GEMM_DEFAULT_OFFSET_A 384
  1550. #define GEMM_DEFAULT_OFFSET_B 1024
  1551. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1552. #define SGEMM_DEFAULT_UNROLL_M 4
  1553. #define SGEMM_DEFAULT_UNROLL_N 4
  1554. #define DGEMM_DEFAULT_UNROLL_M 4
  1555. #define DGEMM_DEFAULT_UNROLL_N 4
  1556. #define CGEMM_DEFAULT_UNROLL_M 2
  1557. #define CGEMM_DEFAULT_UNROLL_N 4
  1558. #define ZGEMM_DEFAULT_UNROLL_M 2
  1559. #define ZGEMM_DEFAULT_UNROLL_N 4
  1560. #define SGEMM_DEFAULT_P 992
  1561. #define DGEMM_DEFAULT_P 480
  1562. #define CGEMM_DEFAULT_P 488
  1563. #define ZGEMM_DEFAULT_P 248
  1564. #define SGEMM_DEFAULT_Q 504
  1565. #define DGEMM_DEFAULT_Q 504
  1566. #define CGEMM_DEFAULT_Q 400
  1567. #define ZGEMM_DEFAULT_Q 400
  1568. #define SYMV_P 8
  1569. #endif
  1570. #if defined(POWER8)
  1571. #define SNUMOPT 16
  1572. #define DNUMOPT 8
  1573. #define GEMM_DEFAULT_OFFSET_A 0
  1574. #define GEMM_DEFAULT_OFFSET_B 65536
  1575. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1576. #define SGEMM_DEFAULT_UNROLL_M 16
  1577. #define SGEMM_DEFAULT_UNROLL_N 8
  1578. #define DGEMM_DEFAULT_UNROLL_M 16
  1579. #define DGEMM_DEFAULT_UNROLL_N 4
  1580. #define CGEMM_DEFAULT_UNROLL_M 8
  1581. #define CGEMM_DEFAULT_UNROLL_N 4
  1582. #define ZGEMM_DEFAULT_UNROLL_M 8
  1583. #define ZGEMM_DEFAULT_UNROLL_N 2
  1584. #define SGEMM_DEFAULT_P 1280
  1585. #define DGEMM_DEFAULT_P 640
  1586. #define CGEMM_DEFAULT_P 640
  1587. #define ZGEMM_DEFAULT_P 320
  1588. #define SGEMM_DEFAULT_Q 640
  1589. #define DGEMM_DEFAULT_Q 720
  1590. #define CGEMM_DEFAULT_Q 640
  1591. #define ZGEMM_DEFAULT_Q 640
  1592. #define SYMV_P 8
  1593. #endif
  1594. #if defined(SPARC) && defined(V7)
  1595. #define SNUMOPT 4
  1596. #define DNUMOPT 4
  1597. #define GEMM_DEFAULT_OFFSET_A 0
  1598. #define GEMM_DEFAULT_OFFSET_B 2048
  1599. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1600. #define SGEMM_DEFAULT_UNROLL_M 2
  1601. #define SGEMM_DEFAULT_UNROLL_N 8
  1602. #define DGEMM_DEFAULT_UNROLL_M 2
  1603. #define DGEMM_DEFAULT_UNROLL_N 8
  1604. #define CGEMM_DEFAULT_UNROLL_M 1
  1605. #define CGEMM_DEFAULT_UNROLL_N 4
  1606. #define ZGEMM_DEFAULT_UNROLL_M 1
  1607. #define ZGEMM_DEFAULT_UNROLL_N 4
  1608. #define SGEMM_DEFAULT_P 256
  1609. #define DGEMM_DEFAULT_P 256
  1610. #define CGEMM_DEFAULT_P 256
  1611. #define ZGEMM_DEFAULT_P 256
  1612. #define SGEMM_DEFAULT_Q 512
  1613. #define DGEMM_DEFAULT_Q 256
  1614. #define CGEMM_DEFAULT_Q 256
  1615. #define ZGEMM_DEFAULT_Q 128
  1616. #define SYMV_P 8
  1617. #define GEMM_THREAD gemm_thread_mn
  1618. #endif
  1619. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1620. #define SNUMOPT 2
  1621. #define DNUMOPT 2
  1622. #define GEMM_DEFAULT_OFFSET_A 0
  1623. #define GEMM_DEFAULT_OFFSET_B 2048
  1624. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1625. #define SGEMM_DEFAULT_UNROLL_M 4
  1626. #define SGEMM_DEFAULT_UNROLL_N 4
  1627. #define DGEMM_DEFAULT_UNROLL_M 4
  1628. #define DGEMM_DEFAULT_UNROLL_N 4
  1629. #define CGEMM_DEFAULT_UNROLL_M 2
  1630. #define CGEMM_DEFAULT_UNROLL_N 2
  1631. #define ZGEMM_DEFAULT_UNROLL_M 2
  1632. #define ZGEMM_DEFAULT_UNROLL_N 2
  1633. #define SGEMM_DEFAULT_P 512
  1634. #define DGEMM_DEFAULT_P 512
  1635. #define CGEMM_DEFAULT_P 512
  1636. #define ZGEMM_DEFAULT_P 512
  1637. #define SGEMM_DEFAULT_Q 1024
  1638. #define DGEMM_DEFAULT_Q 512
  1639. #define CGEMM_DEFAULT_Q 512
  1640. #define ZGEMM_DEFAULT_Q 256
  1641. #define SYMV_P 8
  1642. #endif
  1643. #ifdef SICORTEX
  1644. #define SNUMOPT 2
  1645. #define DNUMOPT 2
  1646. #define GEMM_DEFAULT_OFFSET_A 0
  1647. #define GEMM_DEFAULT_OFFSET_B 0
  1648. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1649. #define SGEMM_DEFAULT_UNROLL_M 2
  1650. #define SGEMM_DEFAULT_UNROLL_N 8
  1651. #define DGEMM_DEFAULT_UNROLL_M 2
  1652. #define DGEMM_DEFAULT_UNROLL_N 8
  1653. #define CGEMM_DEFAULT_UNROLL_M 1
  1654. #define CGEMM_DEFAULT_UNROLL_N 4
  1655. #define ZGEMM_DEFAULT_UNROLL_M 1
  1656. #define ZGEMM_DEFAULT_UNROLL_N 4
  1657. #define SGEMM_DEFAULT_P 108
  1658. #define DGEMM_DEFAULT_P 112
  1659. #define CGEMM_DEFAULT_P 108
  1660. #define ZGEMM_DEFAULT_P 112
  1661. #define SGEMM_DEFAULT_Q 288
  1662. #define DGEMM_DEFAULT_Q 144
  1663. #define CGEMM_DEFAULT_Q 144
  1664. #define ZGEMM_DEFAULT_Q 72
  1665. #define SGEMM_DEFAULT_R 2000
  1666. #define DGEMM_DEFAULT_R 2000
  1667. #define CGEMM_DEFAULT_R 2000
  1668. #define ZGEMM_DEFAULT_R 2000
  1669. #define SYMV_P 16
  1670. #endif
  1671. #ifdef LOONGSON3A
  1672. ////Copy from SICORTEX
  1673. #define SNUMOPT 2
  1674. #define DNUMOPT 2
  1675. #define GEMM_DEFAULT_OFFSET_A 0
  1676. #define GEMM_DEFAULT_OFFSET_B 0
  1677. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1678. #define SGEMM_DEFAULT_UNROLL_M 8
  1679. #define SGEMM_DEFAULT_UNROLL_N 4
  1680. #define DGEMM_DEFAULT_UNROLL_M 4
  1681. #define DGEMM_DEFAULT_UNROLL_N 4
  1682. #define CGEMM_DEFAULT_UNROLL_M 4
  1683. #define CGEMM_DEFAULT_UNROLL_N 2
  1684. #define ZGEMM_DEFAULT_UNROLL_M 2
  1685. #define ZGEMM_DEFAULT_UNROLL_N 2
  1686. #define SGEMM_DEFAULT_P 64
  1687. #define DGEMM_DEFAULT_P 44
  1688. #define CGEMM_DEFAULT_P 64
  1689. #define ZGEMM_DEFAULT_P 32
  1690. #define SGEMM_DEFAULT_Q 192
  1691. #define DGEMM_DEFAULT_Q 92
  1692. #define CGEMM_DEFAULT_Q 128
  1693. #define ZGEMM_DEFAULT_Q 80
  1694. #define SGEMM_DEFAULT_R 640
  1695. #define DGEMM_DEFAULT_R dgemm_r
  1696. #define CGEMM_DEFAULT_R 640
  1697. #define ZGEMM_DEFAULT_R 640
  1698. #define GEMM_OFFSET_A1 0x10000
  1699. #define GEMM_OFFSET_B1 0x100000
  1700. #define SYMV_P 16
  1701. #endif
  1702. #ifdef LOONGSON3B
  1703. #define SNUMOPT 2
  1704. #define DNUMOPT 2
  1705. #define GEMM_DEFAULT_OFFSET_A 0
  1706. #define GEMM_DEFAULT_OFFSET_B 0
  1707. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1708. #define SGEMM_DEFAULT_UNROLL_M 2
  1709. #define SGEMM_DEFAULT_UNROLL_N 2
  1710. #define DGEMM_DEFAULT_UNROLL_M 2
  1711. #define DGEMM_DEFAULT_UNROLL_N 2
  1712. #define CGEMM_DEFAULT_UNROLL_M 2
  1713. #define CGEMM_DEFAULT_UNROLL_N 2
  1714. #define ZGEMM_DEFAULT_UNROLL_M 2
  1715. #define ZGEMM_DEFAULT_UNROLL_N 2
  1716. #define SGEMM_DEFAULT_P 64
  1717. #define DGEMM_DEFAULT_P 24
  1718. #define CGEMM_DEFAULT_P 24
  1719. #define ZGEMM_DEFAULT_P 20
  1720. #define SGEMM_DEFAULT_Q 192
  1721. #define DGEMM_DEFAULT_Q 128
  1722. #define CGEMM_DEFAULT_Q 128
  1723. #define ZGEMM_DEFAULT_Q 64
  1724. #define SGEMM_DEFAULT_R 512
  1725. #define DGEMM_DEFAULT_R 512
  1726. #define CGEMM_DEFAULT_R 512
  1727. #define ZGEMM_DEFAULT_R 512
  1728. #define GEMM_OFFSET_A1 0x10000
  1729. #define GEMM_OFFSET_B1 0x100000
  1730. #define SYMV_P 16
  1731. #endif
  1732. #if defined(P5600) || defined(I6400) || defined(P6600)
  1733. #define SNUMOPT 2
  1734. #define DNUMOPT 2
  1735. #define GEMM_DEFAULT_OFFSET_A 0
  1736. #define GEMM_DEFAULT_OFFSET_B 0
  1737. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1738. #ifdef HAVE_MSA
  1739. #define SGEMM_DEFAULT_UNROLL_M 8
  1740. #define SGEMM_DEFAULT_UNROLL_N 8
  1741. #define DGEMM_DEFAULT_UNROLL_M 8
  1742. #define DGEMM_DEFAULT_UNROLL_N 4
  1743. #define CGEMM_DEFAULT_UNROLL_M 8
  1744. #define CGEMM_DEFAULT_UNROLL_N 4
  1745. #define ZGEMM_DEFAULT_UNROLL_M 4
  1746. #define ZGEMM_DEFAULT_UNROLL_N 4
  1747. #else
  1748. #define SGEMM_DEFAULT_UNROLL_M 2
  1749. #define SGEMM_DEFAULT_UNROLL_N 2
  1750. #define DGEMM_DEFAULT_UNROLL_M 2
  1751. #define DGEMM_DEFAULT_UNROLL_N 2
  1752. #define CGEMM_DEFAULT_UNROLL_M 2
  1753. #define CGEMM_DEFAULT_UNROLL_N 2
  1754. #define ZGEMM_DEFAULT_UNROLL_M 2
  1755. #define ZGEMM_DEFAULT_UNROLL_N 2
  1756. #endif
  1757. #define SGEMM_DEFAULT_P 128
  1758. #define DGEMM_DEFAULT_P 128
  1759. #define CGEMM_DEFAULT_P 96
  1760. #define ZGEMM_DEFAULT_P 64
  1761. #define SGEMM_DEFAULT_Q 240
  1762. #define DGEMM_DEFAULT_Q 120
  1763. #define CGEMM_DEFAULT_Q 120
  1764. #define ZGEMM_DEFAULT_Q 120
  1765. #define SGEMM_DEFAULT_R 12288
  1766. #define DGEMM_DEFAULT_R 8192
  1767. #define CGEMM_DEFAULT_R 4096
  1768. #define ZGEMM_DEFAULT_R 4096
  1769. #define SYMV_P 16
  1770. #endif
  1771. #ifdef ARMV7
  1772. #define SNUMOPT 2
  1773. #define DNUMOPT 2
  1774. #define GEMM_DEFAULT_OFFSET_A 0
  1775. #define GEMM_DEFAULT_OFFSET_B 0
  1776. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1777. #define SGEMM_DEFAULT_UNROLL_M 4
  1778. #define SGEMM_DEFAULT_UNROLL_N 4
  1779. #define DGEMM_DEFAULT_UNROLL_M 4
  1780. #define DGEMM_DEFAULT_UNROLL_N 4
  1781. #define CGEMM_DEFAULT_UNROLL_M 2
  1782. #define CGEMM_DEFAULT_UNROLL_N 2
  1783. #define ZGEMM_DEFAULT_UNROLL_M 2
  1784. #define ZGEMM_DEFAULT_UNROLL_N 2
  1785. #define SGEMM_DEFAULT_P 128
  1786. #define DGEMM_DEFAULT_P 128
  1787. #define CGEMM_DEFAULT_P 96
  1788. #define ZGEMM_DEFAULT_P 64
  1789. #define SGEMM_DEFAULT_Q 240
  1790. #define DGEMM_DEFAULT_Q 120
  1791. #define CGEMM_DEFAULT_Q 120
  1792. #define ZGEMM_DEFAULT_Q 120
  1793. #define SGEMM_DEFAULT_R 12288
  1794. #define DGEMM_DEFAULT_R 8192
  1795. #define CGEMM_DEFAULT_R 4096
  1796. #define ZGEMM_DEFAULT_R 4096
  1797. #define SYMV_P 16
  1798. #endif
  1799. #if defined(ARMV6)
  1800. #define SNUMOPT 2
  1801. #define DNUMOPT 2
  1802. #define GEMM_DEFAULT_OFFSET_A 0
  1803. #define GEMM_DEFAULT_OFFSET_B 0
  1804. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1805. #define SGEMM_DEFAULT_UNROLL_M 4
  1806. #define SGEMM_DEFAULT_UNROLL_N 2
  1807. #define DGEMM_DEFAULT_UNROLL_M 4
  1808. #define DGEMM_DEFAULT_UNROLL_N 2
  1809. #define CGEMM_DEFAULT_UNROLL_M 2
  1810. #define CGEMM_DEFAULT_UNROLL_N 2
  1811. #define ZGEMM_DEFAULT_UNROLL_M 2
  1812. #define ZGEMM_DEFAULT_UNROLL_N 2
  1813. #define SGEMM_DEFAULT_P 128
  1814. #define DGEMM_DEFAULT_P 128
  1815. #define CGEMM_DEFAULT_P 96
  1816. #define ZGEMM_DEFAULT_P 64
  1817. #define SGEMM_DEFAULT_Q 240
  1818. #define DGEMM_DEFAULT_Q 120
  1819. #define CGEMM_DEFAULT_Q 120
  1820. #define ZGEMM_DEFAULT_Q 120
  1821. #define SGEMM_DEFAULT_R 12288
  1822. #define DGEMM_DEFAULT_R 8192
  1823. #define CGEMM_DEFAULT_R 4096
  1824. #define ZGEMM_DEFAULT_R 4096
  1825. #define SYMV_P 16
  1826. #endif
  1827. #if defined(CORTEXA57)
  1828. #define SNUMOPT 2
  1829. #define DNUMOPT 2
  1830. #define GEMM_DEFAULT_OFFSET_A 0
  1831. #define GEMM_DEFAULT_OFFSET_B 0
  1832. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1833. #define SGEMM_DEFAULT_UNROLL_M 16
  1834. #define SGEMM_DEFAULT_UNROLL_N 4
  1835. #define DGEMM_DEFAULT_UNROLL_M 8
  1836. #define DGEMM_DEFAULT_UNROLL_N 4
  1837. #define CGEMM_DEFAULT_UNROLL_M 8
  1838. #define CGEMM_DEFAULT_UNROLL_N 4
  1839. #define ZGEMM_DEFAULT_UNROLL_M 4
  1840. #define ZGEMM_DEFAULT_UNROLL_N 4
  1841. #define SGEMM_DEFAULT_P 512
  1842. #define DGEMM_DEFAULT_P 256
  1843. #define CGEMM_DEFAULT_P 256
  1844. #define ZGEMM_DEFAULT_P 128
  1845. #define SGEMM_DEFAULT_Q 1024
  1846. #define DGEMM_DEFAULT_Q 512
  1847. #define CGEMM_DEFAULT_Q 512
  1848. #define ZGEMM_DEFAULT_Q 512
  1849. #define SGEMM_DEFAULT_R 4096
  1850. #define DGEMM_DEFAULT_R 4096
  1851. #define CGEMM_DEFAULT_R 4096
  1852. #define ZGEMM_DEFAULT_R 2048
  1853. #define SYMV_P 16
  1854. #endif
  1855. #if defined(ARMV8)
  1856. #define SNUMOPT 2
  1857. #define DNUMOPT 2
  1858. #define GEMM_DEFAULT_OFFSET_A 0
  1859. #define GEMM_DEFAULT_OFFSET_B 0
  1860. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1861. #define SGEMM_DEFAULT_UNROLL_M 4
  1862. #define SGEMM_DEFAULT_UNROLL_N 4
  1863. #define DGEMM_DEFAULT_UNROLL_M 2
  1864. #define DGEMM_DEFAULT_UNROLL_N 2
  1865. #define CGEMM_DEFAULT_UNROLL_M 2
  1866. #define CGEMM_DEFAULT_UNROLL_N 2
  1867. #define ZGEMM_DEFAULT_UNROLL_M 2
  1868. #define ZGEMM_DEFAULT_UNROLL_N 2
  1869. #define SGEMM_DEFAULT_P 128
  1870. #define DGEMM_DEFAULT_P 128
  1871. #define CGEMM_DEFAULT_P 96
  1872. #define ZGEMM_DEFAULT_P 64
  1873. #define SGEMM_DEFAULT_Q 240
  1874. #define DGEMM_DEFAULT_Q 120
  1875. #define CGEMM_DEFAULT_Q 120
  1876. #define ZGEMM_DEFAULT_Q 120
  1877. #define SGEMM_DEFAULT_R 12288
  1878. #define DGEMM_DEFAULT_R 8192
  1879. #define CGEMM_DEFAULT_R 4096
  1880. #define ZGEMM_DEFAULT_R 4096
  1881. #define SYMV_P 16
  1882. #endif
  1883. #if defined(THUNDERX)
  1884. #define SNUMOPT 2
  1885. #define DNUMOPT 2
  1886. #define GEMM_DEFAULT_OFFSET_A 0
  1887. #define GEMM_DEFAULT_OFFSET_B 0
  1888. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1889. #define SGEMM_DEFAULT_UNROLL_M 4
  1890. #define SGEMM_DEFAULT_UNROLL_N 4
  1891. #define DGEMM_DEFAULT_UNROLL_M 2
  1892. #define DGEMM_DEFAULT_UNROLL_N 2
  1893. #define CGEMM_DEFAULT_UNROLL_M 2
  1894. #define CGEMM_DEFAULT_UNROLL_N 2
  1895. #define ZGEMM_DEFAULT_UNROLL_M 2
  1896. #define ZGEMM_DEFAULT_UNROLL_N 2
  1897. #define SGEMM_DEFAULT_P 128
  1898. #define DGEMM_DEFAULT_P 128
  1899. #define CGEMM_DEFAULT_P 96
  1900. #define ZGEMM_DEFAULT_P 64
  1901. #define SGEMM_DEFAULT_Q 240
  1902. #define DGEMM_DEFAULT_Q 120
  1903. #define CGEMM_DEFAULT_Q 120
  1904. #define ZGEMM_DEFAULT_Q 120
  1905. #define SGEMM_DEFAULT_R 12288
  1906. #define DGEMM_DEFAULT_R 8192
  1907. #define CGEMM_DEFAULT_R 4096
  1908. #define ZGEMM_DEFAULT_R 4096
  1909. #define SYMV_P 16
  1910. #endif
  1911. #if defined(THUNDERX2T99) || defined(VULCAN)
  1912. #define SNUMOPT 2
  1913. #define DNUMOPT 2
  1914. #define GEMM_DEFAULT_OFFSET_A 0
  1915. #define GEMM_DEFAULT_OFFSET_B 0
  1916. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1917. #define SGEMM_DEFAULT_UNROLL_M 16
  1918. #define SGEMM_DEFAULT_UNROLL_N 4
  1919. #define DGEMM_DEFAULT_UNROLL_M 8
  1920. #define DGEMM_DEFAULT_UNROLL_N 4
  1921. #define CGEMM_DEFAULT_UNROLL_M 8
  1922. #define CGEMM_DEFAULT_UNROLL_N 4
  1923. #define ZGEMM_DEFAULT_UNROLL_M 4
  1924. #define ZGEMM_DEFAULT_UNROLL_N 4
  1925. #define SGEMM_DEFAULT_P sgemm_p
  1926. #define DGEMM_DEFAULT_P dgemm_p
  1927. #define CGEMM_DEFAULT_P cgemm_p
  1928. #define ZGEMM_DEFAULT_P 128
  1929. #define SGEMM_DEFAULT_Q sgemm_q
  1930. #define DGEMM_DEFAULT_Q dgemm_q
  1931. #define CGEMM_DEFAULT_Q cgemm_q
  1932. #define ZGEMM_DEFAULT_Q 512
  1933. #define SGEMM_DEFAULT_R sgemm_r
  1934. #define DGEMM_DEFAULT_R dgemm_r
  1935. #define CGEMM_DEFAULT_R cgemm_r
  1936. #define ZGEMM_DEFAULT_R 2048
  1937. #define SYMV_P 16
  1938. #endif
  1939. #if defined(ARMV5)
  1940. #define SNUMOPT 2
  1941. #define DNUMOPT 2
  1942. #define GEMM_DEFAULT_OFFSET_A 0
  1943. #define GEMM_DEFAULT_OFFSET_B 0
  1944. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1945. #define SGEMM_DEFAULT_UNROLL_M 2
  1946. #define SGEMM_DEFAULT_UNROLL_N 2
  1947. #define DGEMM_DEFAULT_UNROLL_M 2
  1948. #define DGEMM_DEFAULT_UNROLL_N 2
  1949. #define CGEMM_DEFAULT_UNROLL_M 2
  1950. #define CGEMM_DEFAULT_UNROLL_N 2
  1951. #define ZGEMM_DEFAULT_UNROLL_M 2
  1952. #define ZGEMM_DEFAULT_UNROLL_N 2
  1953. #define SGEMM_DEFAULT_P 128
  1954. #define DGEMM_DEFAULT_P 128
  1955. #define CGEMM_DEFAULT_P 96
  1956. #define ZGEMM_DEFAULT_P 64
  1957. #define SGEMM_DEFAULT_Q 240
  1958. #define DGEMM_DEFAULT_Q 120
  1959. #define CGEMM_DEFAULT_Q 120
  1960. #define ZGEMM_DEFAULT_Q 120
  1961. #define SGEMM_DEFAULT_R 12288
  1962. #define DGEMM_DEFAULT_R 8192
  1963. #define CGEMM_DEFAULT_R 4096
  1964. #define ZGEMM_DEFAULT_R 4096
  1965. #define SYMV_P 16
  1966. #endif
  1967. #ifdef CORTEXA9
  1968. #define SNUMOPT 2
  1969. #define DNUMOPT 2
  1970. #define GEMM_DEFAULT_OFFSET_A 0
  1971. #define GEMM_DEFAULT_OFFSET_B 0
  1972. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1973. #define SGEMM_DEFAULT_UNROLL_M 4
  1974. #define SGEMM_DEFAULT_UNROLL_N 4
  1975. #define DGEMM_DEFAULT_UNROLL_M 4
  1976. #define DGEMM_DEFAULT_UNROLL_N 4
  1977. #define CGEMM_DEFAULT_UNROLL_M 2
  1978. #define CGEMM_DEFAULT_UNROLL_N 2
  1979. #define ZGEMM_DEFAULT_UNROLL_M 2
  1980. #define ZGEMM_DEFAULT_UNROLL_N 2
  1981. #define SGEMM_DEFAULT_P 128
  1982. #define DGEMM_DEFAULT_P 128
  1983. #define CGEMM_DEFAULT_P 96
  1984. #define ZGEMM_DEFAULT_P 64
  1985. #define SGEMM_DEFAULT_Q 240
  1986. #define DGEMM_DEFAULT_Q 120
  1987. #define CGEMM_DEFAULT_Q 120
  1988. #define ZGEMM_DEFAULT_Q 120
  1989. #define SGEMM_DEFAULT_R 12288
  1990. #define DGEMM_DEFAULT_R 8192
  1991. #define CGEMM_DEFAULT_R 4096
  1992. #define ZGEMM_DEFAULT_R 4096
  1993. #define SYMV_P 16
  1994. #endif
  1995. #ifdef CORTEXA15
  1996. #define SNUMOPT 2
  1997. #define DNUMOPT 2
  1998. #define GEMM_DEFAULT_OFFSET_A 0
  1999. #define GEMM_DEFAULT_OFFSET_B 0
  2000. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  2001. #define SGEMM_DEFAULT_UNROLL_M 4
  2002. #define SGEMM_DEFAULT_UNROLL_N 4
  2003. #define DGEMM_DEFAULT_UNROLL_M 4
  2004. #define DGEMM_DEFAULT_UNROLL_N 4
  2005. #define CGEMM_DEFAULT_UNROLL_M 2
  2006. #define CGEMM_DEFAULT_UNROLL_N 2
  2007. #define ZGEMM_DEFAULT_UNROLL_M 2
  2008. #define ZGEMM_DEFAULT_UNROLL_N 2
  2009. #define SGEMM_DEFAULT_P 128
  2010. #define DGEMM_DEFAULT_P 128
  2011. #define CGEMM_DEFAULT_P 96
  2012. #define ZGEMM_DEFAULT_P 64
  2013. #define SGEMM_DEFAULT_Q 240
  2014. #define DGEMM_DEFAULT_Q 120
  2015. #define CGEMM_DEFAULT_Q 120
  2016. #define ZGEMM_DEFAULT_Q 120
  2017. #define SGEMM_DEFAULT_R 12288
  2018. #define DGEMM_DEFAULT_R 8192
  2019. #define CGEMM_DEFAULT_R 4096
  2020. #define ZGEMM_DEFAULT_R 4096
  2021. #define SYMV_P 16
  2022. #endif
  2023. #if defined(ZARCH_GENERIC)
  2024. #define SNUMOPT 2
  2025. #define DNUMOPT 2
  2026. #define GEMM_DEFAULT_OFFSET_A 0
  2027. #define GEMM_DEFAULT_OFFSET_B 0
  2028. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  2029. #define SGEMM_DEFAULT_UNROLL_M 2
  2030. #define SGEMM_DEFAULT_UNROLL_N 2
  2031. #define DGEMM_DEFAULT_UNROLL_M 2
  2032. #define DGEMM_DEFAULT_UNROLL_N 2
  2033. #define CGEMM_DEFAULT_UNROLL_M 2
  2034. #define CGEMM_DEFAULT_UNROLL_N 2
  2035. #define ZGEMM_DEFAULT_UNROLL_M 2
  2036. #define ZGEMM_DEFAULT_UNROLL_N 2
  2037. #define SGEMM_DEFAULT_P 128
  2038. #define DGEMM_DEFAULT_P 128
  2039. #define CGEMM_DEFAULT_P 96
  2040. #define ZGEMM_DEFAULT_P 64
  2041. #define SGEMM_DEFAULT_Q 240
  2042. #define DGEMM_DEFAULT_Q 120
  2043. #define CGEMM_DEFAULT_Q 120
  2044. #define ZGEMM_DEFAULT_Q 120
  2045. #define SGEMM_DEFAULT_R 12288
  2046. #define DGEMM_DEFAULT_R 8192
  2047. #define CGEMM_DEFAULT_R 4096
  2048. #define ZGEMM_DEFAULT_R 4096
  2049. #define SYMV_P 16
  2050. #endif
  2051. #if defined(Z13)
  2052. #define SNUMOPT 2
  2053. #define DNUMOPT 4
  2054. #define GEMM_DEFAULT_OFFSET_A 0
  2055. #define GEMM_DEFAULT_OFFSET_B 0
  2056. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  2057. #define SGEMM_DEFAULT_UNROLL_M 2
  2058. #define SGEMM_DEFAULT_UNROLL_N 2
  2059. #define DGEMM_DEFAULT_UNROLL_M 8
  2060. #define DGEMM_DEFAULT_UNROLL_N 4
  2061. #define CGEMM_DEFAULT_UNROLL_M 2
  2062. #define CGEMM_DEFAULT_UNROLL_N 2
  2063. #define ZGEMM_DEFAULT_UNROLL_M 2
  2064. #define ZGEMM_DEFAULT_UNROLL_N 2
  2065. #define SGEMM_DEFAULT_P 128
  2066. #define DGEMM_DEFAULT_P 320
  2067. #define CGEMM_DEFAULT_P 96
  2068. #define ZGEMM_DEFAULT_P 64
  2069. #define SGEMM_DEFAULT_Q 240
  2070. #define DGEMM_DEFAULT_Q 384
  2071. #define CGEMM_DEFAULT_Q 120
  2072. #define ZGEMM_DEFAULT_Q 120
  2073. #define SGEMM_DEFAULT_R 12288
  2074. #define DGEMM_DEFAULT_R 4096
  2075. #define CGEMM_DEFAULT_R 4096
  2076. #define ZGEMM_DEFAULT_R 4096
  2077. #define SYMV_P 16
  2078. #endif
  2079. #ifdef GENERIC
  2080. #define SNUMOPT 2
  2081. #define DNUMOPT 2
  2082. #define GEMM_DEFAULT_OFFSET_A 0
  2083. #define GEMM_DEFAULT_OFFSET_B 0
  2084. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  2085. #define SGEMM_DEFAULT_UNROLL_N 2
  2086. #define DGEMM_DEFAULT_UNROLL_N 2
  2087. #define QGEMM_DEFAULT_UNROLL_N 2
  2088. #define CGEMM_DEFAULT_UNROLL_N 2
  2089. #define ZGEMM_DEFAULT_UNROLL_N 2
  2090. #define XGEMM_DEFAULT_UNROLL_N 1
  2091. #ifdef ARCH_X86
  2092. #define SGEMM_DEFAULT_UNROLL_M 2
  2093. #define DGEMM_DEFAULT_UNROLL_M 2
  2094. #define QGEMM_DEFAULT_UNROLL_M 2
  2095. #define CGEMM_DEFAULT_UNROLL_M 2
  2096. #define ZGEMM_DEFAULT_UNROLL_M 2
  2097. #define XGEMM_DEFAULT_UNROLL_M 1
  2098. #else
  2099. #define SGEMM_DEFAULT_UNROLL_M 2
  2100. #define DGEMM_DEFAULT_UNROLL_M 2
  2101. #define QGEMM_DEFAULT_UNROLL_M 2
  2102. #define CGEMM_DEFAULT_UNROLL_M 2
  2103. #define ZGEMM_DEFAULT_UNROLL_M 2
  2104. #define XGEMM_DEFAULT_UNROLL_M 1
  2105. #endif
  2106. #define SGEMM_DEFAULT_P sgemm_p
  2107. #define DGEMM_DEFAULT_P dgemm_p
  2108. #define QGEMM_DEFAULT_P qgemm_p
  2109. #define CGEMM_DEFAULT_P cgemm_p
  2110. #define ZGEMM_DEFAULT_P zgemm_p
  2111. #define XGEMM_DEFAULT_P xgemm_p
  2112. #define SGEMM_DEFAULT_R sgemm_r
  2113. #define DGEMM_DEFAULT_R dgemm_r
  2114. #define QGEMM_DEFAULT_R qgemm_r
  2115. #define CGEMM_DEFAULT_R cgemm_r
  2116. #define ZGEMM_DEFAULT_R zgemm_r
  2117. #define XGEMM_DEFAULT_R xgemm_r
  2118. #define SGEMM_DEFAULT_Q 128
  2119. #define DGEMM_DEFAULT_Q 128
  2120. #define QGEMM_DEFAULT_Q 128
  2121. #define CGEMM_DEFAULT_Q 128
  2122. #define ZGEMM_DEFAULT_Q 128
  2123. #define XGEMM_DEFAULT_Q 128
  2124. #define SYMV_P 16
  2125. #endif
  2126. #ifndef QGEMM_DEFAULT_UNROLL_M
  2127. #define QGEMM_DEFAULT_UNROLL_M 2
  2128. #endif
  2129. #ifndef QGEMM_DEFAULT_UNROLL_N
  2130. #define QGEMM_DEFAULT_UNROLL_N 2
  2131. #endif
  2132. #ifndef XGEMM_DEFAULT_UNROLL_M
  2133. #define XGEMM_DEFAULT_UNROLL_M 2
  2134. #endif
  2135. #ifndef XGEMM_DEFAULT_UNROLL_N
  2136. #define XGEMM_DEFAULT_UNROLL_N 2
  2137. #endif
  2138. #ifndef HAVE_SSE2
  2139. #define SHUFPD_0 shufps $0x44,
  2140. #define SHUFPD_1 shufps $0x4e,
  2141. #define SHUFPD_2 shufps $0xe4,
  2142. #define SHUFPD_3 shufps $0xee,
  2143. #endif
  2144. #ifndef SHUFPD_0
  2145. #define SHUFPD_0 shufpd $0,
  2146. #endif
  2147. #ifndef SHUFPD_1
  2148. #define SHUFPD_1 shufpd $1,
  2149. #endif
  2150. #ifndef SHUFPD_2
  2151. #define SHUFPD_2 shufpd $2,
  2152. #endif
  2153. #ifndef SHUFPD_3
  2154. #define SHUFPD_3 shufpd $3,
  2155. #endif
  2156. #ifndef SHUFPS_39
  2157. #define SHUFPS_39 shufps $0x39,
  2158. #endif
  2159. #endif