You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 52 kB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217
  1. /*****************************************************************************
  2. Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the ISCAS nor the names of its contributors may
  14. be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************************/
  27. /*********************************************************************/
  28. /* Copyright 2009, 2010 The University of Texas at Austin. */
  29. /* All rights reserved. */
  30. /* */
  31. /* Redistribution and use in source and binary forms, with or */
  32. /* without modification, are permitted provided that the following */
  33. /* conditions are met: */
  34. /* */
  35. /* 1. Redistributions of source code must retain the above */
  36. /* copyright notice, this list of conditions and the following */
  37. /* disclaimer. */
  38. /* */
  39. /* 2. Redistributions in binary form must reproduce the above */
  40. /* copyright notice, this list of conditions and the following */
  41. /* disclaimer in the documentation and/or other materials */
  42. /* provided with the distribution. */
  43. /* */
  44. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  45. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  46. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  47. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  48. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  49. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  50. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  51. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  52. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  53. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  54. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  55. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  56. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  57. /* POSSIBILITY OF SUCH DAMAGE. */
  58. /* */
  59. /* The views and conclusions contained in the software and */
  60. /* documentation are those of the authors and should not be */
  61. /* interpreted as representing official policies, either expressed */
  62. /* or implied, of The University of Texas at Austin. */
  63. /*********************************************************************/
  64. #ifndef PARAM_H
  65. #define PARAM_H
  66. #ifdef OPTERON
  67. #define SNUMOPT 4
  68. #define DNUMOPT 2
  69. #define GEMM_DEFAULT_OFFSET_A 64
  70. #define GEMM_DEFAULT_OFFSET_B 256
  71. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  72. #define SGEMM_DEFAULT_UNROLL_N 4
  73. #define DGEMM_DEFAULT_UNROLL_N 4
  74. #define QGEMM_DEFAULT_UNROLL_N 2
  75. #define CGEMM_DEFAULT_UNROLL_N 2
  76. #define ZGEMM_DEFAULT_UNROLL_N 2
  77. #define XGEMM_DEFAULT_UNROLL_N 1
  78. #ifdef ARCH_X86
  79. #define SGEMM_DEFAULT_UNROLL_M 4
  80. #define DGEMM_DEFAULT_UNROLL_M 2
  81. #define QGEMM_DEFAULT_UNROLL_M 2
  82. #define CGEMM_DEFAULT_UNROLL_M 2
  83. #define ZGEMM_DEFAULT_UNROLL_M 1
  84. #define XGEMM_DEFAULT_UNROLL_M 1
  85. #else
  86. #define SGEMM_DEFAULT_UNROLL_M 8
  87. #define DGEMM_DEFAULT_UNROLL_M 4
  88. #define QGEMM_DEFAULT_UNROLL_M 2
  89. #define CGEMM_DEFAULT_UNROLL_M 4
  90. #define ZGEMM_DEFAULT_UNROLL_M 2
  91. #define XGEMM_DEFAULT_UNROLL_M 1
  92. #endif
  93. #define SGEMM_DEFAULT_P sgemm_p
  94. #define DGEMM_DEFAULT_P dgemm_p
  95. #define QGEMM_DEFAULT_P qgemm_p
  96. #define CGEMM_DEFAULT_P cgemm_p
  97. #define ZGEMM_DEFAULT_P zgemm_p
  98. #define XGEMM_DEFAULT_P xgemm_p
  99. #define SGEMM_DEFAULT_R sgemm_r
  100. #define DGEMM_DEFAULT_R dgemm_r
  101. #define QGEMM_DEFAULT_R qgemm_r
  102. #define CGEMM_DEFAULT_R cgemm_r
  103. #define ZGEMM_DEFAULT_R zgemm_r
  104. #define XGEMM_DEFAULT_R xgemm_r
  105. #ifdef ALLOC_HUGETLB
  106. #define SGEMM_DEFAULT_Q 248
  107. #define DGEMM_DEFAULT_Q 248
  108. #define QGEMM_DEFAULT_Q 248
  109. #define CGEMM_DEFAULT_Q 248
  110. #define ZGEMM_DEFAULT_Q 248
  111. #define XGEMM_DEFAULT_Q 248
  112. #else
  113. #define SGEMM_DEFAULT_Q 240
  114. #define DGEMM_DEFAULT_Q 240
  115. #define QGEMM_DEFAULT_Q 240
  116. #define CGEMM_DEFAULT_Q 240
  117. #define ZGEMM_DEFAULT_Q 240
  118. #define XGEMM_DEFAULT_Q 240
  119. #endif
  120. #define SYMV_P 16
  121. #define HAVE_EXCLUSIVE_CACHE
  122. #endif
  123. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  124. #define SNUMOPT 8
  125. #define DNUMOPT 4
  126. #define GEMM_DEFAULT_OFFSET_A 64
  127. #define GEMM_DEFAULT_OFFSET_B 832
  128. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  129. #define SGEMM_DEFAULT_UNROLL_N 4
  130. #define DGEMM_DEFAULT_UNROLL_N 4
  131. #define QGEMM_DEFAULT_UNROLL_N 2
  132. #define CGEMM_DEFAULT_UNROLL_N 2
  133. #define ZGEMM_DEFAULT_UNROLL_N 2
  134. #define XGEMM_DEFAULT_UNROLL_N 1
  135. #ifdef ARCH_X86
  136. #define SGEMM_DEFAULT_UNROLL_M 4
  137. #define DGEMM_DEFAULT_UNROLL_M 2
  138. #define QGEMM_DEFAULT_UNROLL_M 2
  139. #define CGEMM_DEFAULT_UNROLL_M 2
  140. #define ZGEMM_DEFAULT_UNROLL_M 1
  141. #define XGEMM_DEFAULT_UNROLL_M 1
  142. #else
  143. #define SGEMM_DEFAULT_UNROLL_M 8
  144. #define DGEMM_DEFAULT_UNROLL_M 4
  145. #define QGEMM_DEFAULT_UNROLL_M 2
  146. #define CGEMM_DEFAULT_UNROLL_M 4
  147. #define ZGEMM_DEFAULT_UNROLL_M 2
  148. #define XGEMM_DEFAULT_UNROLL_M 1
  149. #endif
  150. #if 0
  151. #define SGEMM_DEFAULT_P 496
  152. #define DGEMM_DEFAULT_P 248
  153. #define QGEMM_DEFAULT_P 124
  154. #define CGEMM_DEFAULT_P 248
  155. #define ZGEMM_DEFAULT_P 124
  156. #define XGEMM_DEFAULT_P 62
  157. #define SGEMM_DEFAULT_Q 248
  158. #define DGEMM_DEFAULT_Q 248
  159. #define QGEMM_DEFAULT_Q 248
  160. #define CGEMM_DEFAULT_Q 248
  161. #define ZGEMM_DEFAULT_Q 248
  162. #define XGEMM_DEFAULT_Q 248
  163. #else
  164. #define SGEMM_DEFAULT_P 448
  165. #define DGEMM_DEFAULT_P 224
  166. #define QGEMM_DEFAULT_P 112
  167. #define CGEMM_DEFAULT_P 224
  168. #define ZGEMM_DEFAULT_P 112
  169. #define XGEMM_DEFAULT_P 56
  170. #define SGEMM_DEFAULT_Q 224
  171. #define DGEMM_DEFAULT_Q 224
  172. #define QGEMM_DEFAULT_Q 224
  173. #define CGEMM_DEFAULT_Q 224
  174. #define ZGEMM_DEFAULT_Q 224
  175. #define XGEMM_DEFAULT_Q 224
  176. #endif
  177. #define SGEMM_DEFAULT_R sgemm_r
  178. #define QGEMM_DEFAULT_R qgemm_r
  179. #define DGEMM_DEFAULT_R dgemm_r
  180. #define CGEMM_DEFAULT_R cgemm_r
  181. #define ZGEMM_DEFAULT_R zgemm_r
  182. #define XGEMM_DEFAULT_R xgemm_r
  183. #define SYMV_P 16
  184. #define HAVE_EXCLUSIVE_CACHE
  185. #define GEMM_THREAD gemm_thread_mn
  186. #endif
  187. #ifdef BULLDOZER
  188. #define SNUMOPT 8
  189. #define DNUMOPT 4
  190. #define GEMM_DEFAULT_OFFSET_A 64
  191. #define GEMM_DEFAULT_OFFSET_B 832
  192. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  193. #define QGEMM_DEFAULT_UNROLL_N 2
  194. #define CGEMM_DEFAULT_UNROLL_N 2
  195. #define ZGEMM_DEFAULT_UNROLL_N 2
  196. #define XGEMM_DEFAULT_UNROLL_N 1
  197. #ifdef ARCH_X86
  198. #define SGEMM_DEFAULT_UNROLL_N 4
  199. #define DGEMM_DEFAULT_UNROLL_N 4
  200. #define SGEMM_DEFAULT_UNROLL_M 4
  201. #define DGEMM_DEFAULT_UNROLL_M 2
  202. #define QGEMM_DEFAULT_UNROLL_M 2
  203. #define CGEMM_DEFAULT_UNROLL_M 2
  204. #define ZGEMM_DEFAULT_UNROLL_M 1
  205. #define XGEMM_DEFAULT_UNROLL_M 1
  206. #else
  207. #define SGEMM_DEFAULT_UNROLL_N 2
  208. #define DGEMM_DEFAULT_UNROLL_N 2
  209. #define SGEMM_DEFAULT_UNROLL_M 16
  210. #define DGEMM_DEFAULT_UNROLL_M 8
  211. #define QGEMM_DEFAULT_UNROLL_M 2
  212. #define CGEMM_DEFAULT_UNROLL_M 4
  213. #define ZGEMM_DEFAULT_UNROLL_M 2
  214. #define XGEMM_DEFAULT_UNROLL_M 1
  215. #define CGEMM3M_DEFAULT_UNROLL_N 4
  216. #define CGEMM3M_DEFAULT_UNROLL_M 8
  217. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  218. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  219. #define GEMV_UNROLL 8
  220. #endif
  221. #if defined(ARCH_X86_64)
  222. #define SGEMM_DEFAULT_P 768
  223. #define DGEMM_DEFAULT_P 384
  224. #else
  225. #define SGEMM_DEFAULT_P 448
  226. #define DGEMM_DEFAULT_P 224
  227. #endif
  228. #define QGEMM_DEFAULT_P 112
  229. #define CGEMM_DEFAULT_P 224
  230. #define ZGEMM_DEFAULT_P 112
  231. #define XGEMM_DEFAULT_P 56
  232. #if defined(ARCH_X86_64)
  233. #define SGEMM_DEFAULT_Q 168
  234. #define DGEMM_DEFAULT_Q 168
  235. #else
  236. #define SGEMM_DEFAULT_Q 224
  237. #define DGEMM_DEFAULT_Q 224
  238. #endif
  239. #define QGEMM_DEFAULT_Q 224
  240. #define CGEMM_DEFAULT_Q 224
  241. #define ZGEMM_DEFAULT_Q 224
  242. #define XGEMM_DEFAULT_Q 224
  243. #define CGEMM3M_DEFAULT_P 448
  244. #define ZGEMM3M_DEFAULT_P 224
  245. #define XGEMM3M_DEFAULT_P 112
  246. #define CGEMM3M_DEFAULT_Q 224
  247. #define ZGEMM3M_DEFAULT_Q 224
  248. #define XGEMM3M_DEFAULT_Q 224
  249. #define CGEMM3M_DEFAULT_R 12288
  250. #define ZGEMM3M_DEFAULT_R 12288
  251. #define XGEMM3M_DEFAULT_R 12288
  252. #define SGEMM_DEFAULT_R sgemm_r
  253. #define QGEMM_DEFAULT_R qgemm_r
  254. #define DGEMM_DEFAULT_R dgemm_r
  255. #define CGEMM_DEFAULT_R cgemm_r
  256. #define ZGEMM_DEFAULT_R zgemm_r
  257. #define XGEMM_DEFAULT_R xgemm_r
  258. #define SYMV_P 16
  259. #define HAVE_EXCLUSIVE_CACHE
  260. #define GEMM_THREAD gemm_thread_mn
  261. #endif
  262. #ifdef PILEDRIVER
  263. #define SNUMOPT 8
  264. #define DNUMOPT 4
  265. #define GEMM_DEFAULT_OFFSET_A 64
  266. #define GEMM_DEFAULT_OFFSET_B 832
  267. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  268. #define QGEMM_DEFAULT_UNROLL_N 2
  269. #define CGEMM_DEFAULT_UNROLL_N 2
  270. #define ZGEMM_DEFAULT_UNROLL_N 2
  271. #define XGEMM_DEFAULT_UNROLL_N 1
  272. #ifdef ARCH_X86
  273. #define SGEMM_DEFAULT_UNROLL_N 4
  274. #define DGEMM_DEFAULT_UNROLL_N 4
  275. #define SGEMM_DEFAULT_UNROLL_M 4
  276. #define DGEMM_DEFAULT_UNROLL_M 2
  277. #define QGEMM_DEFAULT_UNROLL_M 2
  278. #define CGEMM_DEFAULT_UNROLL_M 2
  279. #define ZGEMM_DEFAULT_UNROLL_M 1
  280. #define XGEMM_DEFAULT_UNROLL_M 1
  281. #else
  282. #define SGEMM_DEFAULT_UNROLL_N 2
  283. #define DGEMM_DEFAULT_UNROLL_N 2
  284. #define SGEMM_DEFAULT_UNROLL_M 16
  285. #define DGEMM_DEFAULT_UNROLL_M 8
  286. #define QGEMM_DEFAULT_UNROLL_M 2
  287. #define CGEMM_DEFAULT_UNROLL_M 4
  288. #define ZGEMM_DEFAULT_UNROLL_M 2
  289. #define XGEMM_DEFAULT_UNROLL_M 1
  290. #define CGEMM3M_DEFAULT_UNROLL_N 4
  291. #define CGEMM3M_DEFAULT_UNROLL_M 8
  292. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  293. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  294. #define GEMV_UNROLL 8
  295. #endif
  296. #if defined(ARCH_X86_64)
  297. #define SGEMM_DEFAULT_P 768
  298. #define DGEMM_DEFAULT_P 768
  299. #define ZGEMM_DEFAULT_P 384
  300. #define CGEMM_DEFAULT_P 768
  301. #else
  302. #define SGEMM_DEFAULT_P 448
  303. #define DGEMM_DEFAULT_P 480
  304. #define ZGEMM_DEFAULT_P 112
  305. #define CGEMM_DEFAULT_P 224
  306. #endif
  307. #define QGEMM_DEFAULT_P 112
  308. #define XGEMM_DEFAULT_P 56
  309. #if defined(ARCH_X86_64)
  310. #define SGEMM_DEFAULT_Q 192
  311. #define DGEMM_DEFAULT_Q 168
  312. #define ZGEMM_DEFAULT_Q 168
  313. #define CGEMM_DEFAULT_Q 168
  314. #else
  315. #define SGEMM_DEFAULT_Q 224
  316. #define DGEMM_DEFAULT_Q 224
  317. #define ZGEMM_DEFAULT_Q 224
  318. #define CGEMM_DEFAULT_Q 224
  319. #endif
  320. #define QGEMM_DEFAULT_Q 224
  321. #define XGEMM_DEFAULT_Q 224
  322. #define CGEMM3M_DEFAULT_P 448
  323. #define ZGEMM3M_DEFAULT_P 224
  324. #define XGEMM3M_DEFAULT_P 112
  325. #define CGEMM3M_DEFAULT_Q 224
  326. #define ZGEMM3M_DEFAULT_Q 224
  327. #define XGEMM3M_DEFAULT_Q 224
  328. #define CGEMM3M_DEFAULT_R 12288
  329. #define ZGEMM3M_DEFAULT_R 12288
  330. #define XGEMM3M_DEFAULT_R 12288
  331. #define SGEMM_DEFAULT_R 12288
  332. #define QGEMM_DEFAULT_R qgemm_r
  333. #define DGEMM_DEFAULT_R 12288
  334. #define CGEMM_DEFAULT_R cgemm_r
  335. #define ZGEMM_DEFAULT_R zgemm_r
  336. #define XGEMM_DEFAULT_R xgemm_r
  337. #define SYMV_P 16
  338. #define HAVE_EXCLUSIVE_CACHE
  339. #define GEMM_THREAD gemm_thread_mn
  340. #endif
  341. #ifdef ATHLON
  342. #define SNUMOPT 4
  343. #define DNUMOPT 2
  344. #define GEMM_DEFAULT_OFFSET_A 0
  345. #define GEMM_DEFAULT_OFFSET_B 384
  346. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  347. #define SGEMM_DEFAULT_UNROLL_N 4
  348. #define DGEMM_DEFAULT_UNROLL_N 4
  349. #define QGEMM_DEFAULT_UNROLL_N 2
  350. #define CGEMM_DEFAULT_UNROLL_N 2
  351. #define ZGEMM_DEFAULT_UNROLL_N 2
  352. #define XGEMM_DEFAULT_UNROLL_N 1
  353. #define SGEMM_DEFAULT_UNROLL_M 2
  354. #define DGEMM_DEFAULT_UNROLL_M 1
  355. #define QGEMM_DEFAULT_UNROLL_M 2
  356. #define CGEMM_DEFAULT_UNROLL_M 1
  357. #define ZGEMM_DEFAULT_UNROLL_M 1
  358. #define XGEMM_DEFAULT_UNROLL_M 1
  359. #define SGEMM_DEFAULT_R sgemm_r
  360. #define DGEMM_DEFAULT_R dgemm_r
  361. #define QGEMM_DEFAULT_R qgemm_r
  362. #define CGEMM_DEFAULT_R cgemm_r
  363. #define ZGEMM_DEFAULT_R zgemm_r
  364. #define XGEMM_DEFAULT_R xgemm_r
  365. #define SGEMM_DEFAULT_P 208
  366. #define DGEMM_DEFAULT_P 104
  367. #define QGEMM_DEFAULT_P 56
  368. #define CGEMM_DEFAULT_P 104
  369. #define ZGEMM_DEFAULT_P 56
  370. #define XGEMM_DEFAULT_P 28
  371. #define SGEMM_DEFAULT_Q 208
  372. #define DGEMM_DEFAULT_Q 208
  373. #define QGEMM_DEFAULT_Q 208
  374. #define CGEMM_DEFAULT_Q 208
  375. #define ZGEMM_DEFAULT_Q 208
  376. #define XGEMM_DEFAULT_Q 208
  377. #define SYMV_P 16
  378. #define HAVE_EXCLUSIVE_CACHE
  379. #endif
  380. #ifdef VIAC3
  381. #define SNUMOPT 2
  382. #define DNUMOPT 1
  383. #define GEMM_DEFAULT_OFFSET_A 0
  384. #define GEMM_DEFAULT_OFFSET_B 256
  385. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  386. #define SGEMM_DEFAULT_UNROLL_N 4
  387. #define DGEMM_DEFAULT_UNROLL_N 4
  388. #define QGEMM_DEFAULT_UNROLL_N 2
  389. #define CGEMM_DEFAULT_UNROLL_N 2
  390. #define ZGEMM_DEFAULT_UNROLL_N 2
  391. #define XGEMM_DEFAULT_UNROLL_N 1
  392. #define SGEMM_DEFAULT_UNROLL_M 2
  393. #define DGEMM_DEFAULT_UNROLL_M 1
  394. #define QGEMM_DEFAULT_UNROLL_M 2
  395. #define CGEMM_DEFAULT_UNROLL_M 1
  396. #define ZGEMM_DEFAULT_UNROLL_M 1
  397. #define XGEMM_DEFAULT_UNROLL_M 1
  398. #define SGEMM_DEFAULT_R sgemm_r
  399. #define DGEMM_DEFAULT_R dgemm_r
  400. #define QGEMM_DEFAULT_R qgemm_r
  401. #define CGEMM_DEFAULT_R cgemm_r
  402. #define ZGEMM_DEFAULT_R zgemm_r
  403. #define XGEMM_DEFAULT_R xgemm_r
  404. #define SGEMM_DEFAULT_P 128
  405. #define DGEMM_DEFAULT_P 128
  406. #define QGEMM_DEFAULT_P 128
  407. #define CGEMM_DEFAULT_P 128
  408. #define ZGEMM_DEFAULT_P 128
  409. #define XGEMM_DEFAULT_P 128
  410. #define SGEMM_DEFAULT_Q 512
  411. #define DGEMM_DEFAULT_Q 256
  412. #define QGEMM_DEFAULT_Q 256
  413. #define CGEMM_DEFAULT_Q 256
  414. #define ZGEMM_DEFAULT_Q 128
  415. #define XGEMM_DEFAULT_Q 128
  416. #define SYMV_P 16
  417. #endif
  418. #ifdef NANO
  419. #define SNUMOPT 4
  420. #define DNUMOPT 2
  421. #define GEMM_DEFAULT_OFFSET_A 64
  422. #define GEMM_DEFAULT_OFFSET_B 256
  423. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  424. #ifdef ARCH_X86
  425. #define SGEMM_DEFAULT_UNROLL_N 4
  426. #define DGEMM_DEFAULT_UNROLL_N 4
  427. #define QGEMM_DEFAULT_UNROLL_N 2
  428. #define CGEMM_DEFAULT_UNROLL_N 2
  429. #define ZGEMM_DEFAULT_UNROLL_N 2
  430. #define XGEMM_DEFAULT_UNROLL_N 1
  431. #define SGEMM_DEFAULT_UNROLL_M 4
  432. #define DGEMM_DEFAULT_UNROLL_M 2
  433. #define QGEMM_DEFAULT_UNROLL_M 2
  434. #define CGEMM_DEFAULT_UNROLL_M 2
  435. #define ZGEMM_DEFAULT_UNROLL_M 1
  436. #define XGEMM_DEFAULT_UNROLL_M 1
  437. #else
  438. #define SGEMM_DEFAULT_UNROLL_N 8
  439. #define DGEMM_DEFAULT_UNROLL_N 4
  440. #define QGEMM_DEFAULT_UNROLL_N 2
  441. #define CGEMM_DEFAULT_UNROLL_N 4
  442. #define ZGEMM_DEFAULT_UNROLL_N 2
  443. #define XGEMM_DEFAULT_UNROLL_N 1
  444. #define SGEMM_DEFAULT_UNROLL_M 4
  445. #define DGEMM_DEFAULT_UNROLL_M 4
  446. #define QGEMM_DEFAULT_UNROLL_M 2
  447. #define CGEMM_DEFAULT_UNROLL_M 2
  448. #define ZGEMM_DEFAULT_UNROLL_M 2
  449. #define XGEMM_DEFAULT_UNROLL_M 1
  450. #endif
  451. #define SGEMM_DEFAULT_P 288
  452. #define DGEMM_DEFAULT_P 288
  453. #define QGEMM_DEFAULT_P 288
  454. #define CGEMM_DEFAULT_P 288
  455. #define ZGEMM_DEFAULT_P 288
  456. #define XGEMM_DEFAULT_P 288
  457. #define SGEMM_DEFAULT_R sgemm_r
  458. #define DGEMM_DEFAULT_R dgemm_r
  459. #define QGEMM_DEFAULT_R qgemm_r
  460. #define CGEMM_DEFAULT_R cgemm_r
  461. #define ZGEMM_DEFAULT_R zgemm_r
  462. #define XGEMM_DEFAULT_R xgemm_r
  463. #define SGEMM_DEFAULT_Q 256
  464. #define DGEMM_DEFAULT_Q 128
  465. #define QGEMM_DEFAULT_Q 64
  466. #define CGEMM_DEFAULT_Q 128
  467. #define ZGEMM_DEFAULT_Q 64
  468. #define XGEMM_DEFAULT_Q 32
  469. #define SYMV_P 16
  470. #define HAVE_EXCLUSIVE_CACHE
  471. #endif
  472. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  473. #ifdef HAVE_SSE
  474. #define SNUMOPT 2
  475. #else
  476. #define SNUMOPT 1
  477. #endif
  478. #define DNUMOPT 1
  479. #define GEMM_DEFAULT_OFFSET_A 0
  480. #define GEMM_DEFAULT_OFFSET_B 0
  481. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  482. #ifdef HAVE_SSE
  483. #define SGEMM_DEFAULT_UNROLL_M 8
  484. #define CGEMM_DEFAULT_UNROLL_M 4
  485. #else
  486. #define SGEMM_DEFAULT_UNROLL_M 4
  487. #define CGEMM_DEFAULT_UNROLL_M 2
  488. #endif
  489. #define DGEMM_DEFAULT_UNROLL_M 2
  490. #define SGEMM_DEFAULT_UNROLL_N 2
  491. #define DGEMM_DEFAULT_UNROLL_N 2
  492. #define QGEMM_DEFAULT_UNROLL_M 2
  493. #define QGEMM_DEFAULT_UNROLL_N 2
  494. #define CGEMM_DEFAULT_UNROLL_N 1
  495. #define ZGEMM_DEFAULT_UNROLL_M 1
  496. #define ZGEMM_DEFAULT_UNROLL_N 1
  497. #define XGEMM_DEFAULT_UNROLL_M 1
  498. #define XGEMM_DEFAULT_UNROLL_N 1
  499. #define SGEMM_DEFAULT_P sgemm_p
  500. #define SGEMM_DEFAULT_Q 256
  501. #define SGEMM_DEFAULT_R sgemm_r
  502. #define DGEMM_DEFAULT_P dgemm_p
  503. #define DGEMM_DEFAULT_Q 256
  504. #define DGEMM_DEFAULT_R dgemm_r
  505. #define QGEMM_DEFAULT_P qgemm_p
  506. #define QGEMM_DEFAULT_Q 256
  507. #define QGEMM_DEFAULT_R qgemm_r
  508. #define CGEMM_DEFAULT_P cgemm_p
  509. #define CGEMM_DEFAULT_Q 256
  510. #define CGEMM_DEFAULT_R cgemm_r
  511. #define ZGEMM_DEFAULT_P zgemm_p
  512. #define ZGEMM_DEFAULT_Q 256
  513. #define ZGEMM_DEFAULT_R zgemm_r
  514. #define XGEMM_DEFAULT_P xgemm_p
  515. #define XGEMM_DEFAULT_Q 256
  516. #define XGEMM_DEFAULT_R xgemm_r
  517. #define SYMV_P 4
  518. #endif
  519. #ifdef PENTIUMM
  520. #define SNUMOPT 2
  521. #define DNUMOPT 1
  522. #define GEMM_DEFAULT_OFFSET_A 0
  523. #define GEMM_DEFAULT_OFFSET_B 0
  524. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  525. #ifdef CORE_YONAH
  526. #define SGEMM_DEFAULT_UNROLL_M 4
  527. #define SGEMM_DEFAULT_UNROLL_N 4
  528. #define DGEMM_DEFAULT_UNROLL_M 2
  529. #define DGEMM_DEFAULT_UNROLL_N 4
  530. #define QGEMM_DEFAULT_UNROLL_M 2
  531. #define QGEMM_DEFAULT_UNROLL_N 2
  532. #define CGEMM_DEFAULT_UNROLL_M 2
  533. #define CGEMM_DEFAULT_UNROLL_N 2
  534. #define ZGEMM_DEFAULT_UNROLL_M 1
  535. #define ZGEMM_DEFAULT_UNROLL_N 2
  536. #define XGEMM_DEFAULT_UNROLL_M 1
  537. #define XGEMM_DEFAULT_UNROLL_N 1
  538. #else
  539. #define SGEMM_DEFAULT_UNROLL_M 8
  540. #define SGEMM_DEFAULT_UNROLL_N 2
  541. #define DGEMM_DEFAULT_UNROLL_M 2
  542. #define DGEMM_DEFAULT_UNROLL_N 2
  543. #define QGEMM_DEFAULT_UNROLL_M 2
  544. #define QGEMM_DEFAULT_UNROLL_N 2
  545. #define CGEMM_DEFAULT_UNROLL_M 4
  546. #define CGEMM_DEFAULT_UNROLL_N 1
  547. #define ZGEMM_DEFAULT_UNROLL_M 1
  548. #define ZGEMM_DEFAULT_UNROLL_N 1
  549. #define XGEMM_DEFAULT_UNROLL_M 1
  550. #define XGEMM_DEFAULT_UNROLL_N 1
  551. #endif
  552. #define SGEMM_DEFAULT_P sgemm_p
  553. #define SGEMM_DEFAULT_Q 256
  554. #define SGEMM_DEFAULT_R sgemm_r
  555. #define DGEMM_DEFAULT_P dgemm_p
  556. #define DGEMM_DEFAULT_Q 256
  557. #define DGEMM_DEFAULT_R dgemm_r
  558. #define QGEMM_DEFAULT_P qgemm_p
  559. #define QGEMM_DEFAULT_Q 256
  560. #define QGEMM_DEFAULT_R qgemm_r
  561. #define CGEMM_DEFAULT_P cgemm_p
  562. #define CGEMM_DEFAULT_Q 256
  563. #define CGEMM_DEFAULT_R cgemm_r
  564. #define ZGEMM_DEFAULT_P zgemm_p
  565. #define ZGEMM_DEFAULT_Q 256
  566. #define ZGEMM_DEFAULT_R zgemm_r
  567. #define XGEMM_DEFAULT_P xgemm_p
  568. #define XGEMM_DEFAULT_Q 256
  569. #define XGEMM_DEFAULT_R xgemm_r
  570. #define SYMV_P 4
  571. #endif
  572. #ifdef CORE_NORTHWOOD
  573. #define SNUMOPT 4
  574. #define DNUMOPT 2
  575. #define GEMM_DEFAULT_OFFSET_A 0
  576. #define GEMM_DEFAULT_OFFSET_B 32
  577. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  578. #define SYMV_P 8
  579. #define SGEMM_DEFAULT_UNROLL_M 8
  580. #define DGEMM_DEFAULT_UNROLL_M 4
  581. #define QGEMM_DEFAULT_UNROLL_M 2
  582. #define CGEMM_DEFAULT_UNROLL_M 4
  583. #define ZGEMM_DEFAULT_UNROLL_M 2
  584. #define XGEMM_DEFAULT_UNROLL_M 1
  585. #define SGEMM_DEFAULT_UNROLL_N 2
  586. #define DGEMM_DEFAULT_UNROLL_N 2
  587. #define QGEMM_DEFAULT_UNROLL_N 2
  588. #define CGEMM_DEFAULT_UNROLL_N 1
  589. #define ZGEMM_DEFAULT_UNROLL_N 1
  590. #define XGEMM_DEFAULT_UNROLL_N 1
  591. #define SGEMM_DEFAULT_P sgemm_p
  592. #define SGEMM_DEFAULT_R sgemm_r
  593. #define DGEMM_DEFAULT_P dgemm_p
  594. #define DGEMM_DEFAULT_R dgemm_r
  595. #define QGEMM_DEFAULT_P qgemm_p
  596. #define QGEMM_DEFAULT_R qgemm_r
  597. #define CGEMM_DEFAULT_P cgemm_p
  598. #define CGEMM_DEFAULT_R cgemm_r
  599. #define ZGEMM_DEFAULT_P zgemm_p
  600. #define ZGEMM_DEFAULT_R zgemm_r
  601. #define XGEMM_DEFAULT_P xgemm_p
  602. #define XGEMM_DEFAULT_R xgemm_r
  603. #define SGEMM_DEFAULT_Q 128
  604. #define DGEMM_DEFAULT_Q 128
  605. #define QGEMM_DEFAULT_Q 128
  606. #define CGEMM_DEFAULT_Q 128
  607. #define ZGEMM_DEFAULT_Q 128
  608. #define XGEMM_DEFAULT_Q 128
  609. #endif
  610. #ifdef CORE_PRESCOTT
  611. #define SNUMOPT 4
  612. #define DNUMOPT 2
  613. #ifndef __64BIT__
  614. #define GEMM_DEFAULT_OFFSET_A 128
  615. #define GEMM_DEFAULT_OFFSET_B 192
  616. #else
  617. #define GEMM_DEFAULT_OFFSET_A 0
  618. #define GEMM_DEFAULT_OFFSET_B 256
  619. #endif
  620. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  621. #define SYMV_P 8
  622. #ifdef ARCH_X86
  623. #define SGEMM_DEFAULT_UNROLL_M 4
  624. #define DGEMM_DEFAULT_UNROLL_M 2
  625. #define QGEMM_DEFAULT_UNROLL_M 2
  626. #define CGEMM_DEFAULT_UNROLL_M 2
  627. #define ZGEMM_DEFAULT_UNROLL_M 1
  628. #define XGEMM_DEFAULT_UNROLL_M 1
  629. #else
  630. #define SGEMM_DEFAULT_UNROLL_M 8
  631. #define DGEMM_DEFAULT_UNROLL_M 4
  632. #define QGEMM_DEFAULT_UNROLL_M 2
  633. #define CGEMM_DEFAULT_UNROLL_M 4
  634. #define ZGEMM_DEFAULT_UNROLL_M 2
  635. #define XGEMM_DEFAULT_UNROLL_M 1
  636. #endif
  637. #define SGEMM_DEFAULT_UNROLL_N 4
  638. #define DGEMM_DEFAULT_UNROLL_N 4
  639. #define QGEMM_DEFAULT_UNROLL_N 2
  640. #define CGEMM_DEFAULT_UNROLL_N 2
  641. #define ZGEMM_DEFAULT_UNROLL_N 2
  642. #define XGEMM_DEFAULT_UNROLL_N 1
  643. #define SGEMM_DEFAULT_P sgemm_p
  644. #define SGEMM_DEFAULT_R sgemm_r
  645. #define DGEMM_DEFAULT_P dgemm_p
  646. #define DGEMM_DEFAULT_R dgemm_r
  647. #define QGEMM_DEFAULT_P qgemm_p
  648. #define QGEMM_DEFAULT_R qgemm_r
  649. #define CGEMM_DEFAULT_P cgemm_p
  650. #define CGEMM_DEFAULT_R cgemm_r
  651. #define ZGEMM_DEFAULT_P zgemm_p
  652. #define ZGEMM_DEFAULT_R zgemm_r
  653. #define XGEMM_DEFAULT_P xgemm_p
  654. #define XGEMM_DEFAULT_R xgemm_r
  655. #define SGEMM_DEFAULT_Q 128
  656. #define DGEMM_DEFAULT_Q 128
  657. #define QGEMM_DEFAULT_Q 128
  658. #define CGEMM_DEFAULT_Q 128
  659. #define ZGEMM_DEFAULT_Q 128
  660. #define XGEMM_DEFAULT_Q 128
  661. #endif
  662. #ifdef CORE2
  663. #define SNUMOPT 8
  664. #define DNUMOPT 4
  665. #define GEMM_DEFAULT_OFFSET_A 448
  666. #define GEMM_DEFAULT_OFFSET_B 128
  667. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  668. #define SYMV_P 8
  669. #define SWITCH_RATIO 4
  670. #ifdef ARCH_X86
  671. #define SGEMM_DEFAULT_UNROLL_M 8
  672. #define DGEMM_DEFAULT_UNROLL_M 4
  673. #define QGEMM_DEFAULT_UNROLL_M 2
  674. #define CGEMM_DEFAULT_UNROLL_M 4
  675. #define ZGEMM_DEFAULT_UNROLL_M 2
  676. #define XGEMM_DEFAULT_UNROLL_M 1
  677. #define SGEMM_DEFAULT_UNROLL_N 2
  678. #define DGEMM_DEFAULT_UNROLL_N 2
  679. #define QGEMM_DEFAULT_UNROLL_N 2
  680. #define CGEMM_DEFAULT_UNROLL_N 1
  681. #define ZGEMM_DEFAULT_UNROLL_N 1
  682. #define XGEMM_DEFAULT_UNROLL_N 1
  683. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  684. #else
  685. #define SGEMM_DEFAULT_UNROLL_M 8
  686. #define DGEMM_DEFAULT_UNROLL_M 4
  687. #define QGEMM_DEFAULT_UNROLL_M 2
  688. #define CGEMM_DEFAULT_UNROLL_M 4
  689. #define ZGEMM_DEFAULT_UNROLL_M 2
  690. #define XGEMM_DEFAULT_UNROLL_M 1
  691. #define SGEMM_DEFAULT_UNROLL_N 4
  692. #define DGEMM_DEFAULT_UNROLL_N 4
  693. #define QGEMM_DEFAULT_UNROLL_N 2
  694. #define CGEMM_DEFAULT_UNROLL_N 2
  695. #define ZGEMM_DEFAULT_UNROLL_N 2
  696. #define XGEMM_DEFAULT_UNROLL_N 1
  697. #endif
  698. #define SGEMM_DEFAULT_P sgemm_p
  699. #define SGEMM_DEFAULT_R sgemm_r
  700. #define DGEMM_DEFAULT_P dgemm_p
  701. #define DGEMM_DEFAULT_R dgemm_r
  702. #define QGEMM_DEFAULT_P qgemm_p
  703. #define QGEMM_DEFAULT_R qgemm_r
  704. #define CGEMM_DEFAULT_P cgemm_p
  705. #define CGEMM_DEFAULT_R cgemm_r
  706. #define ZGEMM_DEFAULT_P zgemm_p
  707. #define ZGEMM_DEFAULT_R zgemm_r
  708. #define XGEMM_DEFAULT_P xgemm_p
  709. #define XGEMM_DEFAULT_R xgemm_r
  710. #define SGEMM_DEFAULT_Q 256
  711. #define DGEMM_DEFAULT_Q 256
  712. #define QGEMM_DEFAULT_Q 256
  713. #define CGEMM_DEFAULT_Q 256
  714. #define ZGEMM_DEFAULT_Q 256
  715. #define XGEMM_DEFAULT_Q 256
  716. #endif
  717. #ifdef PENRYN
  718. #define SNUMOPT 8
  719. #define DNUMOPT 4
  720. #define GEMM_DEFAULT_OFFSET_A 128
  721. #define GEMM_DEFAULT_OFFSET_B 0
  722. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  723. #define SYMV_P 8
  724. #define SWITCH_RATIO 4
  725. #ifdef ARCH_X86
  726. #define SGEMM_DEFAULT_UNROLL_M 4
  727. #define DGEMM_DEFAULT_UNROLL_M 2
  728. #define QGEMM_DEFAULT_UNROLL_M 2
  729. #define CGEMM_DEFAULT_UNROLL_M 2
  730. #define ZGEMM_DEFAULT_UNROLL_M 1
  731. #define XGEMM_DEFAULT_UNROLL_M 1
  732. #define SGEMM_DEFAULT_UNROLL_N 4
  733. #define DGEMM_DEFAULT_UNROLL_N 4
  734. #define QGEMM_DEFAULT_UNROLL_N 2
  735. #define CGEMM_DEFAULT_UNROLL_N 2
  736. #define ZGEMM_DEFAULT_UNROLL_N 2
  737. #define XGEMM_DEFAULT_UNROLL_N 1
  738. #else
  739. #define SGEMM_DEFAULT_UNROLL_M 8
  740. #define DGEMM_DEFAULT_UNROLL_M 4
  741. #define QGEMM_DEFAULT_UNROLL_M 2
  742. #define CGEMM_DEFAULT_UNROLL_M 4
  743. #define ZGEMM_DEFAULT_UNROLL_M 2
  744. #define XGEMM_DEFAULT_UNROLL_M 1
  745. #define SGEMM_DEFAULT_UNROLL_N 4
  746. #define DGEMM_DEFAULT_UNROLL_N 4
  747. #define QGEMM_DEFAULT_UNROLL_N 2
  748. #define CGEMM_DEFAULT_UNROLL_N 2
  749. #define ZGEMM_DEFAULT_UNROLL_N 2
  750. #define XGEMM_DEFAULT_UNROLL_N 1
  751. #endif
  752. #define SGEMM_DEFAULT_P sgemm_p
  753. #define SGEMM_DEFAULT_R sgemm_r
  754. #define DGEMM_DEFAULT_P dgemm_p
  755. #define DGEMM_DEFAULT_R dgemm_r
  756. #define QGEMM_DEFAULT_P qgemm_p
  757. #define QGEMM_DEFAULT_R qgemm_r
  758. #define CGEMM_DEFAULT_P cgemm_p
  759. #define CGEMM_DEFAULT_R cgemm_r
  760. #define ZGEMM_DEFAULT_P zgemm_p
  761. #define ZGEMM_DEFAULT_R zgemm_r
  762. #define XGEMM_DEFAULT_P xgemm_p
  763. #define XGEMM_DEFAULT_R xgemm_r
  764. #define SGEMM_DEFAULT_Q 512
  765. #define DGEMM_DEFAULT_Q 256
  766. #define QGEMM_DEFAULT_Q 128
  767. #define CGEMM_DEFAULT_Q 512
  768. #define ZGEMM_DEFAULT_Q 256
  769. #define XGEMM_DEFAULT_Q 128
  770. #define GETRF_FACTOR 0.75
  771. #endif
  772. #ifdef DUNNINGTON
  773. #define SNUMOPT 8
  774. #define DNUMOPT 4
  775. #define GEMM_DEFAULT_OFFSET_A 128
  776. #define GEMM_DEFAULT_OFFSET_B 0
  777. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  778. #define SYMV_P 8
  779. #define SWITCH_RATIO 4
  780. #ifdef ARCH_X86
  781. #define SGEMM_DEFAULT_UNROLL_M 4
  782. #define DGEMM_DEFAULT_UNROLL_M 2
  783. #define QGEMM_DEFAULT_UNROLL_M 2
  784. #define CGEMM_DEFAULT_UNROLL_M 2
  785. #define ZGEMM_DEFAULT_UNROLL_M 1
  786. #define XGEMM_DEFAULT_UNROLL_M 1
  787. #define SGEMM_DEFAULT_UNROLL_N 4
  788. #define DGEMM_DEFAULT_UNROLL_N 4
  789. #define QGEMM_DEFAULT_UNROLL_N 2
  790. #define CGEMM_DEFAULT_UNROLL_N 2
  791. #define ZGEMM_DEFAULT_UNROLL_N 2
  792. #define XGEMM_DEFAULT_UNROLL_N 1
  793. #else
  794. #define SGEMM_DEFAULT_UNROLL_M 8
  795. #define DGEMM_DEFAULT_UNROLL_M 4
  796. #define QGEMM_DEFAULT_UNROLL_M 2
  797. #define CGEMM_DEFAULT_UNROLL_M 4
  798. #define ZGEMM_DEFAULT_UNROLL_M 2
  799. #define XGEMM_DEFAULT_UNROLL_M 1
  800. #define SGEMM_DEFAULT_UNROLL_N 4
  801. #define DGEMM_DEFAULT_UNROLL_N 4
  802. #define QGEMM_DEFAULT_UNROLL_N 2
  803. #define CGEMM_DEFAULT_UNROLL_N 2
  804. #define ZGEMM_DEFAULT_UNROLL_N 2
  805. #define XGEMM_DEFAULT_UNROLL_N 1
  806. #endif
  807. #define SGEMM_DEFAULT_P sgemm_p
  808. #define SGEMM_DEFAULT_R sgemm_r
  809. #define DGEMM_DEFAULT_P dgemm_p
  810. #define DGEMM_DEFAULT_R dgemm_r
  811. #define QGEMM_DEFAULT_P qgemm_p
  812. #define QGEMM_DEFAULT_R qgemm_r
  813. #define CGEMM_DEFAULT_P cgemm_p
  814. #define CGEMM_DEFAULT_R cgemm_r
  815. #define ZGEMM_DEFAULT_P zgemm_p
  816. #define ZGEMM_DEFAULT_R zgemm_r
  817. #define XGEMM_DEFAULT_P xgemm_p
  818. #define XGEMM_DEFAULT_R xgemm_r
  819. #define SGEMM_DEFAULT_Q 768
  820. #define DGEMM_DEFAULT_Q 384
  821. #define QGEMM_DEFAULT_Q 192
  822. #define CGEMM_DEFAULT_Q 768
  823. #define ZGEMM_DEFAULT_Q 384
  824. #define XGEMM_DEFAULT_Q 192
  825. #define GETRF_FACTOR 0.75
  826. #define GEMM_THREAD gemm_thread_mn
  827. #endif
  828. #ifdef NEHALEM
  829. #define SNUMOPT 8
  830. #define DNUMOPT 4
  831. #define GEMM_DEFAULT_OFFSET_A 32
  832. #define GEMM_DEFAULT_OFFSET_B 0
  833. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  834. #define SYMV_P 8
  835. #define SWITCH_RATIO 4
  836. #ifdef ARCH_X86
  837. #define SGEMM_DEFAULT_UNROLL_M 4
  838. #define DGEMM_DEFAULT_UNROLL_M 2
  839. #define QGEMM_DEFAULT_UNROLL_M 2
  840. #define CGEMM_DEFAULT_UNROLL_M 2
  841. #define ZGEMM_DEFAULT_UNROLL_M 1
  842. #define XGEMM_DEFAULT_UNROLL_M 1
  843. #define SGEMM_DEFAULT_UNROLL_N 4
  844. #define DGEMM_DEFAULT_UNROLL_N 4
  845. #define QGEMM_DEFAULT_UNROLL_N 2
  846. #define CGEMM_DEFAULT_UNROLL_N 2
  847. #define ZGEMM_DEFAULT_UNROLL_N 2
  848. #define XGEMM_DEFAULT_UNROLL_N 1
  849. #else
  850. #define SGEMM_DEFAULT_UNROLL_M 4
  851. #define DGEMM_DEFAULT_UNROLL_M 2
  852. #define QGEMM_DEFAULT_UNROLL_M 2
  853. #define CGEMM_DEFAULT_UNROLL_M 2
  854. #define ZGEMM_DEFAULT_UNROLL_M 1
  855. #define XGEMM_DEFAULT_UNROLL_M 1
  856. #define SGEMM_DEFAULT_UNROLL_N 8
  857. #define DGEMM_DEFAULT_UNROLL_N 8
  858. #define QGEMM_DEFAULT_UNROLL_N 2
  859. #define CGEMM_DEFAULT_UNROLL_N 4
  860. #define ZGEMM_DEFAULT_UNROLL_N 4
  861. #define XGEMM_DEFAULT_UNROLL_N 1
  862. #endif
  863. #define SGEMM_DEFAULT_P 504
  864. #define SGEMM_DEFAULT_R sgemm_r
  865. #define DGEMM_DEFAULT_P 504
  866. #define DGEMM_DEFAULT_R dgemm_r
  867. #define QGEMM_DEFAULT_P 504
  868. #define QGEMM_DEFAULT_R qgemm_r
  869. #define CGEMM_DEFAULT_P 252
  870. #define CGEMM_DEFAULT_R cgemm_r
  871. #define ZGEMM_DEFAULT_P 252
  872. #define ZGEMM_DEFAULT_R zgemm_r
  873. #define XGEMM_DEFAULT_P 252
  874. #define XGEMM_DEFAULT_R xgemm_r
  875. #define SGEMM_DEFAULT_Q 512
  876. #define DGEMM_DEFAULT_Q 256
  877. #define QGEMM_DEFAULT_Q 128
  878. #define CGEMM_DEFAULT_Q 512
  879. #define ZGEMM_DEFAULT_Q 256
  880. #define XGEMM_DEFAULT_Q 128
  881. #define GETRF_FACTOR 0.72
  882. #endif
  883. #ifdef SANDYBRIDGE
  884. #define SNUMOPT 8
  885. #define DNUMOPT 4
  886. #define GEMM_DEFAULT_OFFSET_A 0
  887. #define GEMM_DEFAULT_OFFSET_B 0
  888. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  889. #define SYMV_P 8
  890. #define SWITCH_RATIO 4
  891. #ifdef ARCH_X86
  892. #define SGEMM_DEFAULT_UNROLL_M 4
  893. #define DGEMM_DEFAULT_UNROLL_M 2
  894. #define QGEMM_DEFAULT_UNROLL_M 2
  895. #define CGEMM_DEFAULT_UNROLL_M 2
  896. #define ZGEMM_DEFAULT_UNROLL_M 1
  897. #define XGEMM_DEFAULT_UNROLL_M 1
  898. #define SGEMM_DEFAULT_UNROLL_N 4
  899. #define DGEMM_DEFAULT_UNROLL_N 4
  900. #define QGEMM_DEFAULT_UNROLL_N 2
  901. #define CGEMM_DEFAULT_UNROLL_N 2
  902. #define ZGEMM_DEFAULT_UNROLL_N 2
  903. #define XGEMM_DEFAULT_UNROLL_N 1
  904. #else
  905. #define SGEMM_DEFAULT_UNROLL_M 16
  906. #define DGEMM_DEFAULT_UNROLL_M 8
  907. #define QGEMM_DEFAULT_UNROLL_M 2
  908. #define CGEMM_DEFAULT_UNROLL_M 8
  909. #define ZGEMM_DEFAULT_UNROLL_M 4
  910. #define XGEMM_DEFAULT_UNROLL_M 1
  911. #define SGEMM_DEFAULT_UNROLL_N 4
  912. #define DGEMM_DEFAULT_UNROLL_N 4
  913. #define QGEMM_DEFAULT_UNROLL_N 2
  914. #define CGEMM_DEFAULT_UNROLL_N 2
  915. #define ZGEMM_DEFAULT_UNROLL_N 4
  916. #define XGEMM_DEFAULT_UNROLL_N 1
  917. #endif
  918. #define SGEMM_DEFAULT_P 768
  919. #define SGEMM_DEFAULT_R sgemm_r
  920. //#define SGEMM_DEFAULT_R 1024
  921. #define DGEMM_DEFAULT_P 512
  922. #define DGEMM_DEFAULT_R dgemm_r
  923. //#define DGEMM_DEFAULT_R 1024
  924. #define QGEMM_DEFAULT_P 504
  925. #define QGEMM_DEFAULT_R qgemm_r
  926. #define CGEMM_DEFAULT_P 768
  927. #define CGEMM_DEFAULT_R cgemm_r
  928. //#define CGEMM_DEFAULT_R 1024
  929. #define ZGEMM_DEFAULT_P 512
  930. #define ZGEMM_DEFAULT_R zgemm_r
  931. //#define ZGEMM_DEFAULT_R 1024
  932. #define XGEMM_DEFAULT_P 252
  933. #define XGEMM_DEFAULT_R xgemm_r
  934. #define SGEMM_DEFAULT_Q 384
  935. #define DGEMM_DEFAULT_Q 256
  936. #define QGEMM_DEFAULT_Q 128
  937. #define CGEMM_DEFAULT_Q 512
  938. #define ZGEMM_DEFAULT_Q 192
  939. #define XGEMM_DEFAULT_Q 128
  940. #define CGEMM3M_DEFAULT_UNROLL_N 8
  941. #define CGEMM3M_DEFAULT_UNROLL_M 4
  942. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  943. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  944. #define CGEMM3M_DEFAULT_P 448
  945. #define ZGEMM3M_DEFAULT_P 224
  946. #define XGEMM3M_DEFAULT_P 112
  947. #define CGEMM3M_DEFAULT_Q 224
  948. #define ZGEMM3M_DEFAULT_Q 224
  949. #define XGEMM3M_DEFAULT_Q 224
  950. #define CGEMM3M_DEFAULT_R 12288
  951. #define ZGEMM3M_DEFAULT_R 12288
  952. #define XGEMM3M_DEFAULT_R 12288
  953. #define GETRF_FACTOR 0.72
  954. #endif
  955. #ifdef HASWELL
  956. #define SNUMOPT 16
  957. #define DNUMOPT 8
  958. #define GEMM_DEFAULT_OFFSET_A 0
  959. #define GEMM_DEFAULT_OFFSET_B 0
  960. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  961. #define SYMV_P 8
  962. #define SWITCH_RATIO 4
  963. #ifdef ARCH_X86
  964. #define SGEMM_DEFAULT_UNROLL_M 4
  965. #define DGEMM_DEFAULT_UNROLL_M 2
  966. #define QGEMM_DEFAULT_UNROLL_M 2
  967. #define CGEMM_DEFAULT_UNROLL_M 2
  968. #define ZGEMM_DEFAULT_UNROLL_M 1
  969. #define XGEMM_DEFAULT_UNROLL_M 1
  970. #define SGEMM_DEFAULT_UNROLL_N 4
  971. #define DGEMM_DEFAULT_UNROLL_N 4
  972. #define QGEMM_DEFAULT_UNROLL_N 2
  973. #define CGEMM_DEFAULT_UNROLL_N 2
  974. #define ZGEMM_DEFAULT_UNROLL_N 2
  975. #define XGEMM_DEFAULT_UNROLL_N 1
  976. #else
  977. #define SGEMM_DEFAULT_UNROLL_M 16
  978. #define DGEMM_DEFAULT_UNROLL_M 4
  979. #define QGEMM_DEFAULT_UNROLL_M 2
  980. #define CGEMM_DEFAULT_UNROLL_M 8
  981. #define ZGEMM_DEFAULT_UNROLL_M 4
  982. #define XGEMM_DEFAULT_UNROLL_M 1
  983. #define SGEMM_DEFAULT_UNROLL_N 4
  984. #define DGEMM_DEFAULT_UNROLL_N 4
  985. #define QGEMM_DEFAULT_UNROLL_N 2
  986. #define CGEMM_DEFAULT_UNROLL_N 2
  987. #define ZGEMM_DEFAULT_UNROLL_N 2
  988. #define XGEMM_DEFAULT_UNROLL_N 1
  989. #define DGEMM_DEFAULT_UNROLL_MN 16
  990. #endif
  991. #ifdef ARCH_X86
  992. #define SGEMM_DEFAULT_P 512
  993. #define SGEMM_DEFAULT_R sgemm_r
  994. #define DGEMM_DEFAULT_P 512
  995. #define DGEMM_DEFAULT_R dgemm_r
  996. #define QGEMM_DEFAULT_P 504
  997. #define QGEMM_DEFAULT_R qgemm_r
  998. #define CGEMM_DEFAULT_P 128
  999. #define CGEMM_DEFAULT_R 1024
  1000. #define ZGEMM_DEFAULT_P 512
  1001. #define ZGEMM_DEFAULT_R zgemm_r
  1002. #define XGEMM_DEFAULT_P 252
  1003. #define XGEMM_DEFAULT_R xgemm_r
  1004. #define SGEMM_DEFAULT_Q 256
  1005. #define DGEMM_DEFAULT_Q 256
  1006. #define QGEMM_DEFAULT_Q 128
  1007. #define CGEMM_DEFAULT_Q 256
  1008. #define ZGEMM_DEFAULT_Q 192
  1009. #define XGEMM_DEFAULT_Q 128
  1010. #else
  1011. #define SGEMM_DEFAULT_P 768
  1012. #define DGEMM_DEFAULT_P 512
  1013. #define CGEMM_DEFAULT_P 384
  1014. #define ZGEMM_DEFAULT_P 256
  1015. #ifdef WINDOWS_ABI
  1016. #define SGEMM_DEFAULT_Q 320
  1017. #define DGEMM_DEFAULT_Q 128
  1018. #else
  1019. #define SGEMM_DEFAULT_Q 384
  1020. #define DGEMM_DEFAULT_Q 256
  1021. #endif
  1022. #define CGEMM_DEFAULT_Q 192
  1023. #define ZGEMM_DEFAULT_Q 128
  1024. #define SGEMM_DEFAULT_R sgemm_r
  1025. #define DGEMM_DEFAULT_R 13824
  1026. #define CGEMM_DEFAULT_R cgemm_r
  1027. #define ZGEMM_DEFAULT_R zgemm_r
  1028. #define QGEMM_DEFAULT_Q 128
  1029. #define QGEMM_DEFAULT_P 504
  1030. #define QGEMM_DEFAULT_R qgemm_r
  1031. #define XGEMM_DEFAULT_P 252
  1032. #define XGEMM_DEFAULT_R xgemm_r
  1033. #define XGEMM_DEFAULT_Q 128
  1034. #define CGEMM3M_DEFAULT_UNROLL_N 8
  1035. #define CGEMM3M_DEFAULT_UNROLL_M 4
  1036. #define ZGEMM3M_DEFAULT_UNROLL_N 8
  1037. #define ZGEMM3M_DEFAULT_UNROLL_M 2
  1038. #define CGEMM3M_DEFAULT_P 448
  1039. #define ZGEMM3M_DEFAULT_P 224
  1040. #define XGEMM3M_DEFAULT_P 112
  1041. #define CGEMM3M_DEFAULT_Q 224
  1042. #define ZGEMM3M_DEFAULT_Q 224
  1043. #define XGEMM3M_DEFAULT_Q 224
  1044. #define CGEMM3M_DEFAULT_R 12288
  1045. #define ZGEMM3M_DEFAULT_R 12288
  1046. #define XGEMM3M_DEFAULT_R 12288
  1047. #endif
  1048. #endif
  1049. #ifdef ATOM
  1050. #define SNUMOPT 2
  1051. #define DNUMOPT 1
  1052. #define GEMM_DEFAULT_OFFSET_A 64
  1053. #define GEMM_DEFAULT_OFFSET_B 0
  1054. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1055. #define SYMV_P 8
  1056. #ifdef ARCH_X86
  1057. #define SGEMM_DEFAULT_UNROLL_M 4
  1058. #define DGEMM_DEFAULT_UNROLL_M 2
  1059. #define QGEMM_DEFAULT_UNROLL_M 2
  1060. #define CGEMM_DEFAULT_UNROLL_M 2
  1061. #define ZGEMM_DEFAULT_UNROLL_M 1
  1062. #define XGEMM_DEFAULT_UNROLL_M 1
  1063. #else
  1064. #define SGEMM_DEFAULT_UNROLL_M 8
  1065. #define DGEMM_DEFAULT_UNROLL_M 4
  1066. #define QGEMM_DEFAULT_UNROLL_M 2
  1067. #define CGEMM_DEFAULT_UNROLL_M 4
  1068. #define ZGEMM_DEFAULT_UNROLL_M 2
  1069. #define XGEMM_DEFAULT_UNROLL_M 1
  1070. #endif
  1071. #define SGEMM_DEFAULT_UNROLL_N 4
  1072. #define DGEMM_DEFAULT_UNROLL_N 2
  1073. #define QGEMM_DEFAULT_UNROLL_N 2
  1074. #define CGEMM_DEFAULT_UNROLL_N 2
  1075. #define ZGEMM_DEFAULT_UNROLL_N 1
  1076. #define XGEMM_DEFAULT_UNROLL_N 1
  1077. #define SGEMM_DEFAULT_P sgemm_p
  1078. #define SGEMM_DEFAULT_R sgemm_r
  1079. #define DGEMM_DEFAULT_P dgemm_p
  1080. #define DGEMM_DEFAULT_R dgemm_r
  1081. #define QGEMM_DEFAULT_P qgemm_p
  1082. #define QGEMM_DEFAULT_R qgemm_r
  1083. #define CGEMM_DEFAULT_P cgemm_p
  1084. #define CGEMM_DEFAULT_R cgemm_r
  1085. #define ZGEMM_DEFAULT_P zgemm_p
  1086. #define ZGEMM_DEFAULT_R zgemm_r
  1087. #define XGEMM_DEFAULT_P xgemm_p
  1088. #define XGEMM_DEFAULT_R xgemm_r
  1089. #define SGEMM_DEFAULT_Q 256
  1090. #define DGEMM_DEFAULT_Q 256
  1091. #define QGEMM_DEFAULT_Q 256
  1092. #define CGEMM_DEFAULT_Q 256
  1093. #define ZGEMM_DEFAULT_Q 256
  1094. #define XGEMM_DEFAULT_Q 256
  1095. #endif
  1096. #ifdef ITANIUM2
  1097. #define SNUMOPT 4
  1098. #define DNUMOPT 4
  1099. #define GEMM_DEFAULT_OFFSET_A 0
  1100. #define GEMM_DEFAULT_OFFSET_B 128
  1101. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1102. #define SGEMM_DEFAULT_UNROLL_M 8
  1103. #define SGEMM_DEFAULT_UNROLL_N 8
  1104. #define DGEMM_DEFAULT_UNROLL_M 8
  1105. #define DGEMM_DEFAULT_UNROLL_N 8
  1106. #define QGEMM_DEFAULT_UNROLL_M 8
  1107. #define QGEMM_DEFAULT_UNROLL_N 8
  1108. #define CGEMM_DEFAULT_UNROLL_M 4
  1109. #define CGEMM_DEFAULT_UNROLL_N 4
  1110. #define ZGEMM_DEFAULT_UNROLL_M 4
  1111. #define ZGEMM_DEFAULT_UNROLL_N 4
  1112. #define XGEMM_DEFAULT_UNROLL_M 4
  1113. #define XGEMM_DEFAULT_UNROLL_N 4
  1114. #define SGEMM_DEFAULT_P sgemm_p
  1115. #define DGEMM_DEFAULT_P dgemm_p
  1116. #define QGEMM_DEFAULT_P qgemm_p
  1117. #define CGEMM_DEFAULT_P cgemm_p
  1118. #define ZGEMM_DEFAULT_P zgemm_p
  1119. #define XGEMM_DEFAULT_P xgemm_p
  1120. #define SGEMM_DEFAULT_Q 1024
  1121. #define DGEMM_DEFAULT_Q 1024
  1122. #define QGEMM_DEFAULT_Q 1024
  1123. #define CGEMM_DEFAULT_Q 1024
  1124. #define ZGEMM_DEFAULT_Q 1024
  1125. #define XGEMM_DEFAULT_Q 1024
  1126. #define SGEMM_DEFAULT_R sgemm_r
  1127. #define DGEMM_DEFAULT_R dgemm_r
  1128. #define QGEMM_DEFAULT_R qgemm_r
  1129. #define CGEMM_DEFAULT_R cgemm_r
  1130. #define ZGEMM_DEFAULT_R zgemm_r
  1131. #define XGEMM_DEFAULT_R xgemm_r
  1132. #define SYMV_P 16
  1133. #define GETRF_FACTOR 0.65
  1134. #endif
  1135. #if defined(EV4) || defined(EV5) || defined(EV6)
  1136. #ifdef EV4
  1137. #define SNUMOPT 1
  1138. #define DNUMOPT 1
  1139. #else
  1140. #define SNUMOPT 2
  1141. #define DNUMOPT 2
  1142. #endif
  1143. #define GEMM_DEFAULT_OFFSET_A 512
  1144. #define GEMM_DEFAULT_OFFSET_B 512
  1145. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1146. #define SGEMM_DEFAULT_UNROLL_M 4
  1147. #define SGEMM_DEFAULT_UNROLL_N 4
  1148. #define DGEMM_DEFAULT_UNROLL_M 4
  1149. #define DGEMM_DEFAULT_UNROLL_N 4
  1150. #define CGEMM_DEFAULT_UNROLL_M 2
  1151. #define CGEMM_DEFAULT_UNROLL_N 2
  1152. #define ZGEMM_DEFAULT_UNROLL_M 2
  1153. #define ZGEMM_DEFAULT_UNROLL_N 2
  1154. #define SYMV_P 8
  1155. #ifdef EV4
  1156. #define SGEMM_DEFAULT_P 32
  1157. #define SGEMM_DEFAULT_Q 112
  1158. #define SGEMM_DEFAULT_R 256
  1159. #define DGEMM_DEFAULT_P 32
  1160. #define DGEMM_DEFAULT_Q 56
  1161. #define DGEMM_DEFAULT_R 256
  1162. #define CGEMM_DEFAULT_P 32
  1163. #define CGEMM_DEFAULT_Q 64
  1164. #define CGEMM_DEFAULT_R 240
  1165. #define ZGEMM_DEFAULT_P 32
  1166. #define ZGEMM_DEFAULT_Q 32
  1167. #define ZGEMM_DEFAULT_R 240
  1168. #endif
  1169. #ifdef EV5
  1170. #define SGEMM_DEFAULT_P 64
  1171. #define SGEMM_DEFAULT_Q 256
  1172. #define DGEMM_DEFAULT_P 64
  1173. #define DGEMM_DEFAULT_Q 128
  1174. #define CGEMM_DEFAULT_P 64
  1175. #define CGEMM_DEFAULT_Q 128
  1176. #define ZGEMM_DEFAULT_P 64
  1177. #define ZGEMM_DEFAULT_Q 64
  1178. #endif
  1179. #ifdef EV6
  1180. #define SGEMM_DEFAULT_P 256
  1181. #define SGEMM_DEFAULT_Q 512
  1182. #define DGEMM_DEFAULT_P 256
  1183. #define DGEMM_DEFAULT_Q 256
  1184. #define CGEMM_DEFAULT_P 256
  1185. #define CGEMM_DEFAULT_Q 256
  1186. #define ZGEMM_DEFAULT_P 128
  1187. #define ZGEMM_DEFAULT_Q 256
  1188. #endif
  1189. #endif
  1190. #ifdef CELL
  1191. #define SNUMOPT 2
  1192. #define DNUMOPT 2
  1193. #define GEMM_DEFAULT_OFFSET_A 0
  1194. #define GEMM_DEFAULT_OFFSET_B 8192
  1195. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1196. #define SGEMM_DEFAULT_UNROLL_M 16
  1197. #define SGEMM_DEFAULT_UNROLL_N 4
  1198. #define DGEMM_DEFAULT_UNROLL_M 4
  1199. #define DGEMM_DEFAULT_UNROLL_N 4
  1200. #define CGEMM_DEFAULT_UNROLL_M 8
  1201. #define CGEMM_DEFAULT_UNROLL_N 2
  1202. #define ZGEMM_DEFAULT_UNROLL_M 2
  1203. #define ZGEMM_DEFAULT_UNROLL_N 2
  1204. #define SGEMM_DEFAULT_P 128
  1205. #define DGEMM_DEFAULT_P 128
  1206. #define CGEMM_DEFAULT_P 128
  1207. #define ZGEMM_DEFAULT_P 128
  1208. #define SGEMM_DEFAULT_Q 512
  1209. #define DGEMM_DEFAULT_Q 256
  1210. #define CGEMM_DEFAULT_Q 256
  1211. #define ZGEMM_DEFAULT_Q 128
  1212. #define SYMV_P 4
  1213. #endif
  1214. #ifdef PPCG4
  1215. #define GEMM_DEFAULT_OFFSET_A 0
  1216. #define GEMM_DEFAULT_OFFSET_B 1024
  1217. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1218. #define SGEMM_DEFAULT_UNROLL_M 16
  1219. #define SGEMM_DEFAULT_UNROLL_N 4
  1220. #define DGEMM_DEFAULT_UNROLL_M 4
  1221. #define DGEMM_DEFAULT_UNROLL_N 4
  1222. #define CGEMM_DEFAULT_UNROLL_M 8
  1223. #define CGEMM_DEFAULT_UNROLL_N 2
  1224. #define ZGEMM_DEFAULT_UNROLL_M 2
  1225. #define ZGEMM_DEFAULT_UNROLL_N 2
  1226. #define SGEMM_DEFAULT_P 256
  1227. #define DGEMM_DEFAULT_P 128
  1228. #define CGEMM_DEFAULT_P 128
  1229. #define ZGEMM_DEFAULT_P 64
  1230. #define SGEMM_DEFAULT_Q 256
  1231. #define DGEMM_DEFAULT_Q 256
  1232. #define CGEMM_DEFAULT_Q 256
  1233. #define ZGEMM_DEFAULT_Q 256
  1234. #define SYMV_P 4
  1235. #endif
  1236. #ifdef PPC970
  1237. #define SNUMOPT 4
  1238. #define DNUMOPT 4
  1239. #define GEMM_DEFAULT_OFFSET_A 2688
  1240. #define GEMM_DEFAULT_OFFSET_B 3072
  1241. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1242. #define SGEMM_DEFAULT_UNROLL_M 16
  1243. #define SGEMM_DEFAULT_UNROLL_N 4
  1244. #define DGEMM_DEFAULT_UNROLL_M 4
  1245. #define DGEMM_DEFAULT_UNROLL_N 4
  1246. #define CGEMM_DEFAULT_UNROLL_M 8
  1247. #define CGEMM_DEFAULT_UNROLL_N 2
  1248. #define ZGEMM_DEFAULT_UNROLL_M 2
  1249. #define ZGEMM_DEFAULT_UNROLL_N 2
  1250. #ifdef OS_LINUX
  1251. #if L2_SIZE == 1024976
  1252. #define SGEMM_DEFAULT_P 320
  1253. #define DGEMM_DEFAULT_P 256
  1254. #define CGEMM_DEFAULT_P 256
  1255. #define ZGEMM_DEFAULT_P 256
  1256. #else
  1257. #define SGEMM_DEFAULT_P 176
  1258. #define DGEMM_DEFAULT_P 176
  1259. #define CGEMM_DEFAULT_P 176
  1260. #define ZGEMM_DEFAULT_P 176
  1261. #endif
  1262. #endif
  1263. #define SGEMM_DEFAULT_Q 512
  1264. #define DGEMM_DEFAULT_Q 256
  1265. #define CGEMM_DEFAULT_Q 256
  1266. #define ZGEMM_DEFAULT_Q 128
  1267. #define SYMV_P 4
  1268. #endif
  1269. #ifdef PPC440
  1270. #define SNUMOPT 2
  1271. #define DNUMOPT 2
  1272. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1273. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1274. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1275. #define SGEMM_DEFAULT_UNROLL_M 4
  1276. #define SGEMM_DEFAULT_UNROLL_N 4
  1277. #define DGEMM_DEFAULT_UNROLL_M 4
  1278. #define DGEMM_DEFAULT_UNROLL_N 4
  1279. #define CGEMM_DEFAULT_UNROLL_M 2
  1280. #define CGEMM_DEFAULT_UNROLL_N 2
  1281. #define ZGEMM_DEFAULT_UNROLL_M 2
  1282. #define ZGEMM_DEFAULT_UNROLL_N 2
  1283. #define SGEMM_DEFAULT_P 512
  1284. #define DGEMM_DEFAULT_P 512
  1285. #define CGEMM_DEFAULT_P 512
  1286. #define ZGEMM_DEFAULT_P 512
  1287. #define SGEMM_DEFAULT_Q 1024
  1288. #define DGEMM_DEFAULT_Q 512
  1289. #define CGEMM_DEFAULT_Q 512
  1290. #define ZGEMM_DEFAULT_Q 256
  1291. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1292. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1293. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1294. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1295. #define SYMV_P 4
  1296. #endif
  1297. #ifdef PPC440FP2
  1298. #define SNUMOPT 4
  1299. #define DNUMOPT 4
  1300. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1301. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1302. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1303. #define SGEMM_DEFAULT_UNROLL_M 8
  1304. #define SGEMM_DEFAULT_UNROLL_N 4
  1305. #define DGEMM_DEFAULT_UNROLL_M 8
  1306. #define DGEMM_DEFAULT_UNROLL_N 4
  1307. #define CGEMM_DEFAULT_UNROLL_M 4
  1308. #define CGEMM_DEFAULT_UNROLL_N 2
  1309. #define ZGEMM_DEFAULT_UNROLL_M 4
  1310. #define ZGEMM_DEFAULT_UNROLL_N 2
  1311. #define SGEMM_DEFAULT_P 128
  1312. #define DGEMM_DEFAULT_P 128
  1313. #define CGEMM_DEFAULT_P 128
  1314. #define ZGEMM_DEFAULT_P 128
  1315. #if 1
  1316. #define SGEMM_DEFAULT_Q 4096
  1317. #define DGEMM_DEFAULT_Q 3072
  1318. #define CGEMM_DEFAULT_Q 2048
  1319. #define ZGEMM_DEFAULT_Q 1024
  1320. #else
  1321. #define SGEMM_DEFAULT_Q 512
  1322. #define DGEMM_DEFAULT_Q 256
  1323. #define CGEMM_DEFAULT_Q 256
  1324. #define ZGEMM_DEFAULT_Q 128
  1325. #endif
  1326. #define SYMV_P 4
  1327. #endif
  1328. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1329. #define GEMM_DEFAULT_OFFSET_A 0
  1330. #define GEMM_DEFAULT_OFFSET_B 2048
  1331. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1332. #define SGEMM_DEFAULT_UNROLL_M 4
  1333. #define SGEMM_DEFAULT_UNROLL_N 4
  1334. #define DGEMM_DEFAULT_UNROLL_M 4
  1335. #define DGEMM_DEFAULT_UNROLL_N 4
  1336. #define CGEMM_DEFAULT_UNROLL_M 2
  1337. #define CGEMM_DEFAULT_UNROLL_N 2
  1338. #define ZGEMM_DEFAULT_UNROLL_M 2
  1339. #define ZGEMM_DEFAULT_UNROLL_N 2
  1340. #ifdef POWER3
  1341. #define SNUMOPT 4
  1342. #define DNUMOPT 4
  1343. #define SGEMM_DEFAULT_P 256
  1344. #define SGEMM_DEFAULT_Q 432
  1345. #define SGEMM_DEFAULT_R 1012
  1346. #define DGEMM_DEFAULT_P 256
  1347. #define DGEMM_DEFAULT_Q 216
  1348. #define DGEMM_DEFAULT_R 1012
  1349. #define ZGEMM_DEFAULT_P 256
  1350. #define ZGEMM_DEFAULT_Q 104
  1351. #define ZGEMM_DEFAULT_R 1012
  1352. #endif
  1353. #if defined(POWER4)
  1354. #ifdef ALLOC_HUGETLB
  1355. #define SGEMM_DEFAULT_P 184
  1356. #define DGEMM_DEFAULT_P 184
  1357. #define CGEMM_DEFAULT_P 184
  1358. #define ZGEMM_DEFAULT_P 184
  1359. #else
  1360. #define SGEMM_DEFAULT_P 144
  1361. #define DGEMM_DEFAULT_P 144
  1362. #define CGEMM_DEFAULT_P 144
  1363. #define ZGEMM_DEFAULT_P 144
  1364. #endif
  1365. #endif
  1366. #if defined(POWER5)
  1367. #ifdef ALLOC_HUGETLB
  1368. #define SGEMM_DEFAULT_P 512
  1369. #define DGEMM_DEFAULT_P 256
  1370. #define CGEMM_DEFAULT_P 256
  1371. #define ZGEMM_DEFAULT_P 128
  1372. #else
  1373. #define SGEMM_DEFAULT_P 320
  1374. #define DGEMM_DEFAULT_P 160
  1375. #define CGEMM_DEFAULT_P 160
  1376. #define ZGEMM_DEFAULT_P 80
  1377. #endif
  1378. #define SGEMM_DEFAULT_Q 256
  1379. #define CGEMM_DEFAULT_Q 256
  1380. #define DGEMM_DEFAULT_Q 256
  1381. #define ZGEMM_DEFAULT_Q 256
  1382. #endif
  1383. #define SYMV_P 8
  1384. #endif
  1385. #if defined(POWER6)
  1386. #define SNUMOPT 4
  1387. #define DNUMOPT 4
  1388. #define GEMM_DEFAULT_OFFSET_A 384
  1389. #define GEMM_DEFAULT_OFFSET_B 1024
  1390. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1391. #define SGEMM_DEFAULT_UNROLL_M 4
  1392. #define SGEMM_DEFAULT_UNROLL_N 4
  1393. #define DGEMM_DEFAULT_UNROLL_M 4
  1394. #define DGEMM_DEFAULT_UNROLL_N 4
  1395. #define CGEMM_DEFAULT_UNROLL_M 2
  1396. #define CGEMM_DEFAULT_UNROLL_N 4
  1397. #define ZGEMM_DEFAULT_UNROLL_M 2
  1398. #define ZGEMM_DEFAULT_UNROLL_N 4
  1399. #define SGEMM_DEFAULT_P 992
  1400. #define DGEMM_DEFAULT_P 480
  1401. #define CGEMM_DEFAULT_P 488
  1402. #define ZGEMM_DEFAULT_P 248
  1403. #define SGEMM_DEFAULT_Q 504
  1404. #define DGEMM_DEFAULT_Q 504
  1405. #define CGEMM_DEFAULT_Q 400
  1406. #define ZGEMM_DEFAULT_Q 400
  1407. #define SYMV_P 8
  1408. #endif
  1409. #if defined(SPARC) && defined(V7)
  1410. #define SNUMOPT 4
  1411. #define DNUMOPT 4
  1412. #define GEMM_DEFAULT_OFFSET_A 0
  1413. #define GEMM_DEFAULT_OFFSET_B 2048
  1414. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1415. #define SGEMM_DEFAULT_UNROLL_M 2
  1416. #define SGEMM_DEFAULT_UNROLL_N 8
  1417. #define DGEMM_DEFAULT_UNROLL_M 2
  1418. #define DGEMM_DEFAULT_UNROLL_N 8
  1419. #define CGEMM_DEFAULT_UNROLL_M 1
  1420. #define CGEMM_DEFAULT_UNROLL_N 4
  1421. #define ZGEMM_DEFAULT_UNROLL_M 1
  1422. #define ZGEMM_DEFAULT_UNROLL_N 4
  1423. #define SGEMM_DEFAULT_P 256
  1424. #define DGEMM_DEFAULT_P 256
  1425. #define CGEMM_DEFAULT_P 256
  1426. #define ZGEMM_DEFAULT_P 256
  1427. #define SGEMM_DEFAULT_Q 512
  1428. #define DGEMM_DEFAULT_Q 256
  1429. #define CGEMM_DEFAULT_Q 256
  1430. #define ZGEMM_DEFAULT_Q 128
  1431. #define SYMV_P 8
  1432. #define GEMM_THREAD gemm_thread_mn
  1433. #endif
  1434. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1435. #define SNUMOPT 2
  1436. #define DNUMOPT 2
  1437. #define GEMM_DEFAULT_OFFSET_A 0
  1438. #define GEMM_DEFAULT_OFFSET_B 2048
  1439. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1440. #define SGEMM_DEFAULT_UNROLL_M 4
  1441. #define SGEMM_DEFAULT_UNROLL_N 4
  1442. #define DGEMM_DEFAULT_UNROLL_M 4
  1443. #define DGEMM_DEFAULT_UNROLL_N 4
  1444. #define CGEMM_DEFAULT_UNROLL_M 2
  1445. #define CGEMM_DEFAULT_UNROLL_N 2
  1446. #define ZGEMM_DEFAULT_UNROLL_M 2
  1447. #define ZGEMM_DEFAULT_UNROLL_N 2
  1448. #define SGEMM_DEFAULT_P 512
  1449. #define DGEMM_DEFAULT_P 512
  1450. #define CGEMM_DEFAULT_P 512
  1451. #define ZGEMM_DEFAULT_P 512
  1452. #define SGEMM_DEFAULT_Q 1024
  1453. #define DGEMM_DEFAULT_Q 512
  1454. #define CGEMM_DEFAULT_Q 512
  1455. #define ZGEMM_DEFAULT_Q 256
  1456. #define SYMV_P 8
  1457. #endif
  1458. #ifdef SICORTEX
  1459. #define SNUMOPT 2
  1460. #define DNUMOPT 2
  1461. #define GEMM_DEFAULT_OFFSET_A 0
  1462. #define GEMM_DEFAULT_OFFSET_B 0
  1463. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1464. #define SGEMM_DEFAULT_UNROLL_M 2
  1465. #define SGEMM_DEFAULT_UNROLL_N 8
  1466. #define DGEMM_DEFAULT_UNROLL_M 2
  1467. #define DGEMM_DEFAULT_UNROLL_N 8
  1468. #define CGEMM_DEFAULT_UNROLL_M 1
  1469. #define CGEMM_DEFAULT_UNROLL_N 4
  1470. #define ZGEMM_DEFAULT_UNROLL_M 1
  1471. #define ZGEMM_DEFAULT_UNROLL_N 4
  1472. #define SGEMM_DEFAULT_P 108
  1473. #define DGEMM_DEFAULT_P 112
  1474. #define CGEMM_DEFAULT_P 108
  1475. #define ZGEMM_DEFAULT_P 112
  1476. #define SGEMM_DEFAULT_Q 288
  1477. #define DGEMM_DEFAULT_Q 144
  1478. #define CGEMM_DEFAULT_Q 144
  1479. #define ZGEMM_DEFAULT_Q 72
  1480. #define SGEMM_DEFAULT_R 2000
  1481. #define DGEMM_DEFAULT_R 2000
  1482. #define CGEMM_DEFAULT_R 2000
  1483. #define ZGEMM_DEFAULT_R 2000
  1484. #define SYMV_P 16
  1485. #endif
  1486. #ifdef LOONGSON3A
  1487. ////Copy from SICORTEX
  1488. #define SNUMOPT 2
  1489. #define DNUMOPT 2
  1490. #define GEMM_DEFAULT_OFFSET_A 0
  1491. #define GEMM_DEFAULT_OFFSET_B 0
  1492. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1493. #define SGEMM_DEFAULT_UNROLL_M 8
  1494. #define SGEMM_DEFAULT_UNROLL_N 4
  1495. #define DGEMM_DEFAULT_UNROLL_M 4
  1496. #define DGEMM_DEFAULT_UNROLL_N 4
  1497. #define CGEMM_DEFAULT_UNROLL_M 4
  1498. #define CGEMM_DEFAULT_UNROLL_N 2
  1499. #define ZGEMM_DEFAULT_UNROLL_M 2
  1500. #define ZGEMM_DEFAULT_UNROLL_N 2
  1501. #define SGEMM_DEFAULT_P 64
  1502. #define DGEMM_DEFAULT_P 44
  1503. #define CGEMM_DEFAULT_P 64
  1504. #define ZGEMM_DEFAULT_P 32
  1505. #define SGEMM_DEFAULT_Q 192
  1506. #define DGEMM_DEFAULT_Q 92
  1507. #define CGEMM_DEFAULT_Q 128
  1508. #define ZGEMM_DEFAULT_Q 80
  1509. #define SGEMM_DEFAULT_R 640
  1510. #define DGEMM_DEFAULT_R dgemm_r
  1511. #define CGEMM_DEFAULT_R 640
  1512. #define ZGEMM_DEFAULT_R 640
  1513. #define GEMM_OFFSET_A1 0x10000
  1514. #define GEMM_OFFSET_B1 0x100000
  1515. #define SYMV_P 16
  1516. #endif
  1517. #ifdef LOONGSON3B
  1518. #define SNUMOPT 2
  1519. #define DNUMOPT 2
  1520. #define GEMM_DEFAULT_OFFSET_A 0
  1521. #define GEMM_DEFAULT_OFFSET_B 0
  1522. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1523. #define SGEMM_DEFAULT_UNROLL_M 2
  1524. #define SGEMM_DEFAULT_UNROLL_N 2
  1525. #define DGEMM_DEFAULT_UNROLL_M 2
  1526. #define DGEMM_DEFAULT_UNROLL_N 2
  1527. #define CGEMM_DEFAULT_UNROLL_M 2
  1528. #define CGEMM_DEFAULT_UNROLL_N 2
  1529. #define ZGEMM_DEFAULT_UNROLL_M 2
  1530. #define ZGEMM_DEFAULT_UNROLL_N 2
  1531. #define SGEMM_DEFAULT_P 64
  1532. #define DGEMM_DEFAULT_P 24
  1533. #define CGEMM_DEFAULT_P 24
  1534. #define ZGEMM_DEFAULT_P 20
  1535. #define SGEMM_DEFAULT_Q 192
  1536. #define DGEMM_DEFAULT_Q 128
  1537. #define CGEMM_DEFAULT_Q 128
  1538. #define ZGEMM_DEFAULT_Q 64
  1539. #define SGEMM_DEFAULT_R 512
  1540. #define DGEMM_DEFAULT_R 512
  1541. #define CGEMM_DEFAULT_R 512
  1542. #define ZGEMM_DEFAULT_R 512
  1543. #define GEMM_OFFSET_A1 0x10000
  1544. #define GEMM_OFFSET_B1 0x100000
  1545. #define SYMV_P 16
  1546. #endif
  1547. #ifdef ARMV7
  1548. #define SNUMOPT 2
  1549. #define DNUMOPT 2
  1550. #define GEMM_DEFAULT_OFFSET_A 0
  1551. #define GEMM_DEFAULT_OFFSET_B 0
  1552. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1553. #define SGEMM_DEFAULT_UNROLL_M 4
  1554. #define SGEMM_DEFAULT_UNROLL_N 4
  1555. #define DGEMM_DEFAULT_UNROLL_M 4
  1556. #define DGEMM_DEFAULT_UNROLL_N 4
  1557. #define CGEMM_DEFAULT_UNROLL_M 2
  1558. #define CGEMM_DEFAULT_UNROLL_N 2
  1559. #define ZGEMM_DEFAULT_UNROLL_M 2
  1560. #define ZGEMM_DEFAULT_UNROLL_N 2
  1561. #define SGEMM_DEFAULT_P 128
  1562. #define DGEMM_DEFAULT_P 128
  1563. #define CGEMM_DEFAULT_P 96
  1564. #define ZGEMM_DEFAULT_P 64
  1565. #define SGEMM_DEFAULT_Q 240
  1566. #define DGEMM_DEFAULT_Q 120
  1567. #define CGEMM_DEFAULT_Q 120
  1568. #define ZGEMM_DEFAULT_Q 120
  1569. #define SGEMM_DEFAULT_R 12288
  1570. #define DGEMM_DEFAULT_R 8192
  1571. #define CGEMM_DEFAULT_R 4096
  1572. #define ZGEMM_DEFAULT_R 4096
  1573. #define SYMV_P 16
  1574. #endif
  1575. #if defined(ARMV6)
  1576. #define SNUMOPT 2
  1577. #define DNUMOPT 2
  1578. #define GEMM_DEFAULT_OFFSET_A 0
  1579. #define GEMM_DEFAULT_OFFSET_B 0
  1580. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1581. #define SGEMM_DEFAULT_UNROLL_M 4
  1582. #define SGEMM_DEFAULT_UNROLL_N 2
  1583. #define DGEMM_DEFAULT_UNROLL_M 4
  1584. #define DGEMM_DEFAULT_UNROLL_N 2
  1585. #define CGEMM_DEFAULT_UNROLL_M 2
  1586. #define CGEMM_DEFAULT_UNROLL_N 2
  1587. #define ZGEMM_DEFAULT_UNROLL_M 2
  1588. #define ZGEMM_DEFAULT_UNROLL_N 2
  1589. #define SGEMM_DEFAULT_P 128
  1590. #define DGEMM_DEFAULT_P 128
  1591. #define CGEMM_DEFAULT_P 96
  1592. #define ZGEMM_DEFAULT_P 64
  1593. #define SGEMM_DEFAULT_Q 240
  1594. #define DGEMM_DEFAULT_Q 120
  1595. #define CGEMM_DEFAULT_Q 120
  1596. #define ZGEMM_DEFAULT_Q 120
  1597. #define SGEMM_DEFAULT_R 12288
  1598. #define DGEMM_DEFAULT_R 8192
  1599. #define CGEMM_DEFAULT_R 4096
  1600. #define ZGEMM_DEFAULT_R 4096
  1601. #define SYMV_P 16
  1602. #endif
  1603. #if defined(ARMV8)
  1604. #define SNUMOPT 2
  1605. #define DNUMOPT 2
  1606. #define GEMM_DEFAULT_OFFSET_A 0
  1607. #define GEMM_DEFAULT_OFFSET_B 0
  1608. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1609. #define SGEMM_DEFAULT_UNROLL_M 2
  1610. #define SGEMM_DEFAULT_UNROLL_N 2
  1611. #define DGEMM_DEFAULT_UNROLL_M 2
  1612. #define DGEMM_DEFAULT_UNROLL_N 2
  1613. #define CGEMM_DEFAULT_UNROLL_M 2
  1614. #define CGEMM_DEFAULT_UNROLL_N 2
  1615. #define ZGEMM_DEFAULT_UNROLL_M 2
  1616. #define ZGEMM_DEFAULT_UNROLL_N 2
  1617. #define SGEMM_DEFAULT_P 128
  1618. #define DGEMM_DEFAULT_P 128
  1619. #define CGEMM_DEFAULT_P 96
  1620. #define ZGEMM_DEFAULT_P 64
  1621. #define SGEMM_DEFAULT_Q 240
  1622. #define DGEMM_DEFAULT_Q 120
  1623. #define CGEMM_DEFAULT_Q 120
  1624. #define ZGEMM_DEFAULT_Q 120
  1625. #define SGEMM_DEFAULT_R 12288
  1626. #define DGEMM_DEFAULT_R 8192
  1627. #define CGEMM_DEFAULT_R 4096
  1628. #define ZGEMM_DEFAULT_R 4096
  1629. #define SYMV_P 16
  1630. #endif
  1631. #if defined(ARMV5)
  1632. #define SNUMOPT 2
  1633. #define DNUMOPT 2
  1634. #define GEMM_DEFAULT_OFFSET_A 0
  1635. #define GEMM_DEFAULT_OFFSET_B 0
  1636. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1637. #define SGEMM_DEFAULT_UNROLL_M 2
  1638. #define SGEMM_DEFAULT_UNROLL_N 2
  1639. #define DGEMM_DEFAULT_UNROLL_M 2
  1640. #define DGEMM_DEFAULT_UNROLL_N 2
  1641. #define CGEMM_DEFAULT_UNROLL_M 2
  1642. #define CGEMM_DEFAULT_UNROLL_N 2
  1643. #define ZGEMM_DEFAULT_UNROLL_M 2
  1644. #define ZGEMM_DEFAULT_UNROLL_N 2
  1645. #define SGEMM_DEFAULT_P 128
  1646. #define DGEMM_DEFAULT_P 128
  1647. #define CGEMM_DEFAULT_P 96
  1648. #define ZGEMM_DEFAULT_P 64
  1649. #define SGEMM_DEFAULT_Q 240
  1650. #define DGEMM_DEFAULT_Q 120
  1651. #define CGEMM_DEFAULT_Q 120
  1652. #define ZGEMM_DEFAULT_Q 120
  1653. #define SGEMM_DEFAULT_R 12288
  1654. #define DGEMM_DEFAULT_R 8192
  1655. #define CGEMM_DEFAULT_R 4096
  1656. #define ZGEMM_DEFAULT_R 4096
  1657. #define SYMV_P 16
  1658. #endif
  1659. #ifdef GENERIC
  1660. #define SNUMOPT 2
  1661. #define DNUMOPT 2
  1662. #define GEMM_DEFAULT_OFFSET_A 0
  1663. #define GEMM_DEFAULT_OFFSET_B 0
  1664. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1665. #define SGEMM_DEFAULT_UNROLL_N 4
  1666. #define DGEMM_DEFAULT_UNROLL_N 4
  1667. #define QGEMM_DEFAULT_UNROLL_N 2
  1668. #define CGEMM_DEFAULT_UNROLL_N 2
  1669. #define ZGEMM_DEFAULT_UNROLL_N 2
  1670. #define XGEMM_DEFAULT_UNROLL_N 1
  1671. #ifdef ARCH_X86
  1672. #define SGEMM_DEFAULT_UNROLL_M 4
  1673. #define DGEMM_DEFAULT_UNROLL_M 2
  1674. #define QGEMM_DEFAULT_UNROLL_M 2
  1675. #define CGEMM_DEFAULT_UNROLL_M 2
  1676. #define ZGEMM_DEFAULT_UNROLL_M 1
  1677. #define XGEMM_DEFAULT_UNROLL_M 1
  1678. #else
  1679. #define SGEMM_DEFAULT_UNROLL_M 8
  1680. #define DGEMM_DEFAULT_UNROLL_M 4
  1681. #define QGEMM_DEFAULT_UNROLL_M 2
  1682. #define CGEMM_DEFAULT_UNROLL_M 4
  1683. #define ZGEMM_DEFAULT_UNROLL_M 2
  1684. #define XGEMM_DEFAULT_UNROLL_M 1
  1685. #endif
  1686. #define SGEMM_DEFAULT_P sgemm_p
  1687. #define DGEMM_DEFAULT_P dgemm_p
  1688. #define QGEMM_DEFAULT_P qgemm_p
  1689. #define CGEMM_DEFAULT_P cgemm_p
  1690. #define ZGEMM_DEFAULT_P zgemm_p
  1691. #define XGEMM_DEFAULT_P xgemm_p
  1692. #define SGEMM_DEFAULT_R sgemm_r
  1693. #define DGEMM_DEFAULT_R dgemm_r
  1694. #define QGEMM_DEFAULT_R qgemm_r
  1695. #define CGEMM_DEFAULT_R cgemm_r
  1696. #define ZGEMM_DEFAULT_R zgemm_r
  1697. #define XGEMM_DEFAULT_R xgemm_r
  1698. #define SGEMM_DEFAULT_Q 128
  1699. #define DGEMM_DEFAULT_Q 128
  1700. #define QGEMM_DEFAULT_Q 128
  1701. #define CGEMM_DEFAULT_Q 128
  1702. #define ZGEMM_DEFAULT_Q 128
  1703. #define XGEMM_DEFAULT_Q 128
  1704. #define SYMV_P 16
  1705. #endif
  1706. #ifndef QGEMM_DEFAULT_UNROLL_M
  1707. #define QGEMM_DEFAULT_UNROLL_M 2
  1708. #endif
  1709. #ifndef QGEMM_DEFAULT_UNROLL_N
  1710. #define QGEMM_DEFAULT_UNROLL_N 2
  1711. #endif
  1712. #ifndef XGEMM_DEFAULT_UNROLL_M
  1713. #define XGEMM_DEFAULT_UNROLL_M 2
  1714. #endif
  1715. #ifndef XGEMM_DEFAULT_UNROLL_N
  1716. #define XGEMM_DEFAULT_UNROLL_N 2
  1717. #endif
  1718. #ifndef HAVE_SSE2
  1719. #define SHUFPD_0 shufps $0x44,
  1720. #define SHUFPD_1 shufps $0x4e,
  1721. #define SHUFPD_2 shufps $0xe4,
  1722. #define SHUFPD_3 shufps $0xee,
  1723. #endif
  1724. #ifndef SHUFPD_0
  1725. #define SHUFPD_0 shufpd $0,
  1726. #endif
  1727. #ifndef SHUFPD_1
  1728. #define SHUFPD_1 shufpd $1,
  1729. #endif
  1730. #ifndef SHUFPD_2
  1731. #define SHUFPD_2 shufpd $2,
  1732. #endif
  1733. #ifndef SHUFPD_3
  1734. #define SHUFPD_3 shufpd $3,
  1735. #endif
  1736. #ifndef SHUFPS_39
  1737. #define SHUFPS_39 shufps $0x39,
  1738. #endif
  1739. #endif