You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 51 kB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178
  1. /*****************************************************************************
  2. Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the ISCAS nor the names of its contributors may
  14. be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************************/
  27. /*********************************************************************/
  28. /* Copyright 2009, 2010 The University of Texas at Austin. */
  29. /* All rights reserved. */
  30. /* */
  31. /* Redistribution and use in source and binary forms, with or */
  32. /* without modification, are permitted provided that the following */
  33. /* conditions are met: */
  34. /* */
  35. /* 1. Redistributions of source code must retain the above */
  36. /* copyright notice, this list of conditions and the following */
  37. /* disclaimer. */
  38. /* */
  39. /* 2. Redistributions in binary form must reproduce the above */
  40. /* copyright notice, this list of conditions and the following */
  41. /* disclaimer in the documentation and/or other materials */
  42. /* provided with the distribution. */
  43. /* */
  44. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  45. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  46. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  47. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  48. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  49. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  50. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  51. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  52. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  53. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  54. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  55. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  56. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  57. /* POSSIBILITY OF SUCH DAMAGE. */
  58. /* */
  59. /* The views and conclusions contained in the software and */
  60. /* documentation are those of the authors and should not be */
  61. /* interpreted as representing official policies, either expressed */
  62. /* or implied, of The University of Texas at Austin. */
  63. /*********************************************************************/
  64. #ifndef PARAM_H
  65. #define PARAM_H
  66. #ifdef OPTERON
  67. #define SNUMOPT 4
  68. #define DNUMOPT 2
  69. #define GEMM_DEFAULT_OFFSET_A 64
  70. #define GEMM_DEFAULT_OFFSET_B 256
  71. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  72. #define SGEMM_DEFAULT_UNROLL_N 4
  73. #define DGEMM_DEFAULT_UNROLL_N 4
  74. #define QGEMM_DEFAULT_UNROLL_N 2
  75. #define CGEMM_DEFAULT_UNROLL_N 2
  76. #define ZGEMM_DEFAULT_UNROLL_N 2
  77. #define XGEMM_DEFAULT_UNROLL_N 1
  78. #ifdef ARCH_X86
  79. #define SGEMM_DEFAULT_UNROLL_M 4
  80. #define DGEMM_DEFAULT_UNROLL_M 2
  81. #define QGEMM_DEFAULT_UNROLL_M 2
  82. #define CGEMM_DEFAULT_UNROLL_M 2
  83. #define ZGEMM_DEFAULT_UNROLL_M 1
  84. #define XGEMM_DEFAULT_UNROLL_M 1
  85. #else
  86. #define SGEMM_DEFAULT_UNROLL_M 8
  87. #define DGEMM_DEFAULT_UNROLL_M 4
  88. #define QGEMM_DEFAULT_UNROLL_M 2
  89. #define CGEMM_DEFAULT_UNROLL_M 4
  90. #define ZGEMM_DEFAULT_UNROLL_M 2
  91. #define XGEMM_DEFAULT_UNROLL_M 1
  92. #endif
  93. #define SGEMM_DEFAULT_P sgemm_p
  94. #define DGEMM_DEFAULT_P dgemm_p
  95. #define QGEMM_DEFAULT_P qgemm_p
  96. #define CGEMM_DEFAULT_P cgemm_p
  97. #define ZGEMM_DEFAULT_P zgemm_p
  98. #define XGEMM_DEFAULT_P xgemm_p
  99. #define SGEMM_DEFAULT_R sgemm_r
  100. #define DGEMM_DEFAULT_R dgemm_r
  101. #define QGEMM_DEFAULT_R qgemm_r
  102. #define CGEMM_DEFAULT_R cgemm_r
  103. #define ZGEMM_DEFAULT_R zgemm_r
  104. #define XGEMM_DEFAULT_R xgemm_r
  105. #ifdef ALLOC_HUGETLB
  106. #define SGEMM_DEFAULT_Q 248
  107. #define DGEMM_DEFAULT_Q 248
  108. #define QGEMM_DEFAULT_Q 248
  109. #define CGEMM_DEFAULT_Q 248
  110. #define ZGEMM_DEFAULT_Q 248
  111. #define XGEMM_DEFAULT_Q 248
  112. #else
  113. #define SGEMM_DEFAULT_Q 240
  114. #define DGEMM_DEFAULT_Q 240
  115. #define QGEMM_DEFAULT_Q 240
  116. #define CGEMM_DEFAULT_Q 240
  117. #define ZGEMM_DEFAULT_Q 240
  118. #define XGEMM_DEFAULT_Q 240
  119. #endif
  120. #define SYMV_P 16
  121. #define HAVE_EXCLUSIVE_CACHE
  122. #endif
  123. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  124. #define SNUMOPT 8
  125. #define DNUMOPT 4
  126. #define GEMM_DEFAULT_OFFSET_A 64
  127. #define GEMM_DEFAULT_OFFSET_B 832
  128. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  129. #define SGEMM_DEFAULT_UNROLL_N 4
  130. #define DGEMM_DEFAULT_UNROLL_N 4
  131. #define QGEMM_DEFAULT_UNROLL_N 2
  132. #define CGEMM_DEFAULT_UNROLL_N 2
  133. #define ZGEMM_DEFAULT_UNROLL_N 2
  134. #define XGEMM_DEFAULT_UNROLL_N 1
  135. #ifdef ARCH_X86
  136. #define SGEMM_DEFAULT_UNROLL_M 4
  137. #define DGEMM_DEFAULT_UNROLL_M 2
  138. #define QGEMM_DEFAULT_UNROLL_M 2
  139. #define CGEMM_DEFAULT_UNROLL_M 2
  140. #define ZGEMM_DEFAULT_UNROLL_M 1
  141. #define XGEMM_DEFAULT_UNROLL_M 1
  142. #else
  143. #define SGEMM_DEFAULT_UNROLL_M 8
  144. #define DGEMM_DEFAULT_UNROLL_M 4
  145. #define QGEMM_DEFAULT_UNROLL_M 2
  146. #define CGEMM_DEFAULT_UNROLL_M 4
  147. #define ZGEMM_DEFAULT_UNROLL_M 2
  148. #define XGEMM_DEFAULT_UNROLL_M 1
  149. #endif
  150. #if 0
  151. #define SGEMM_DEFAULT_P 496
  152. #define DGEMM_DEFAULT_P 248
  153. #define QGEMM_DEFAULT_P 124
  154. #define CGEMM_DEFAULT_P 248
  155. #define ZGEMM_DEFAULT_P 124
  156. #define XGEMM_DEFAULT_P 62
  157. #define SGEMM_DEFAULT_Q 248
  158. #define DGEMM_DEFAULT_Q 248
  159. #define QGEMM_DEFAULT_Q 248
  160. #define CGEMM_DEFAULT_Q 248
  161. #define ZGEMM_DEFAULT_Q 248
  162. #define XGEMM_DEFAULT_Q 248
  163. #else
  164. #define SGEMM_DEFAULT_P 448
  165. #define DGEMM_DEFAULT_P 224
  166. #define QGEMM_DEFAULT_P 112
  167. #define CGEMM_DEFAULT_P 224
  168. #define ZGEMM_DEFAULT_P 112
  169. #define XGEMM_DEFAULT_P 56
  170. #define SGEMM_DEFAULT_Q 224
  171. #define DGEMM_DEFAULT_Q 224
  172. #define QGEMM_DEFAULT_Q 224
  173. #define CGEMM_DEFAULT_Q 224
  174. #define ZGEMM_DEFAULT_Q 224
  175. #define XGEMM_DEFAULT_Q 224
  176. #endif
  177. #define SGEMM_DEFAULT_R sgemm_r
  178. #define QGEMM_DEFAULT_R qgemm_r
  179. #define DGEMM_DEFAULT_R dgemm_r
  180. #define CGEMM_DEFAULT_R cgemm_r
  181. #define ZGEMM_DEFAULT_R zgemm_r
  182. #define XGEMM_DEFAULT_R xgemm_r
  183. #define SYMV_P 16
  184. #define HAVE_EXCLUSIVE_CACHE
  185. #define GEMM_THREAD gemm_thread_mn
  186. #endif
  187. #ifdef BULLDOZER
  188. #define SNUMOPT 8
  189. #define DNUMOPT 4
  190. #define GEMM_DEFAULT_OFFSET_A 64
  191. #define GEMM_DEFAULT_OFFSET_B 832
  192. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  193. #define QGEMM_DEFAULT_UNROLL_N 2
  194. #define CGEMM_DEFAULT_UNROLL_N 2
  195. #define ZGEMM_DEFAULT_UNROLL_N 2
  196. #define XGEMM_DEFAULT_UNROLL_N 1
  197. #ifdef ARCH_X86
  198. #define SGEMM_DEFAULT_UNROLL_N 4
  199. #define DGEMM_DEFAULT_UNROLL_N 4
  200. #define SGEMM_DEFAULT_UNROLL_M 4
  201. #define DGEMM_DEFAULT_UNROLL_M 2
  202. #define QGEMM_DEFAULT_UNROLL_M 2
  203. #define CGEMM_DEFAULT_UNROLL_M 2
  204. #define ZGEMM_DEFAULT_UNROLL_M 1
  205. #define XGEMM_DEFAULT_UNROLL_M 1
  206. #else
  207. #define SGEMM_DEFAULT_UNROLL_N 2
  208. #define DGEMM_DEFAULT_UNROLL_N 2
  209. #define SGEMM_DEFAULT_UNROLL_M 16
  210. #define DGEMM_DEFAULT_UNROLL_M 8
  211. #define QGEMM_DEFAULT_UNROLL_M 2
  212. #define CGEMM_DEFAULT_UNROLL_M 4
  213. #define ZGEMM_DEFAULT_UNROLL_M 2
  214. #define XGEMM_DEFAULT_UNROLL_M 1
  215. #define CGEMM3M_DEFAULT_UNROLL_N 4
  216. #define CGEMM3M_DEFAULT_UNROLL_M 8
  217. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  218. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  219. #define GEMV_UNROLL 8
  220. #endif
  221. #if defined(ARCH_X86_64)
  222. #define SGEMM_DEFAULT_P 768
  223. #define DGEMM_DEFAULT_P 384
  224. #else
  225. #define SGEMM_DEFAULT_P 448
  226. #define DGEMM_DEFAULT_P 224
  227. #endif
  228. #define QGEMM_DEFAULT_P 112
  229. #define CGEMM_DEFAULT_P 224
  230. #define ZGEMM_DEFAULT_P 112
  231. #define XGEMM_DEFAULT_P 56
  232. #if defined(ARCH_X86_64)
  233. #define SGEMM_DEFAULT_Q 168
  234. #define DGEMM_DEFAULT_Q 168
  235. #else
  236. #define SGEMM_DEFAULT_Q 224
  237. #define DGEMM_DEFAULT_Q 224
  238. #endif
  239. #define QGEMM_DEFAULT_Q 224
  240. #define CGEMM_DEFAULT_Q 224
  241. #define ZGEMM_DEFAULT_Q 224
  242. #define XGEMM_DEFAULT_Q 224
  243. #define SGEMM_DEFAULT_R sgemm_r
  244. #define QGEMM_DEFAULT_R qgemm_r
  245. #define DGEMM_DEFAULT_R dgemm_r
  246. #define CGEMM_DEFAULT_R cgemm_r
  247. #define ZGEMM_DEFAULT_R zgemm_r
  248. #define XGEMM_DEFAULT_R xgemm_r
  249. #define SYMV_P 16
  250. #define HAVE_EXCLUSIVE_CACHE
  251. #define GEMM_THREAD gemm_thread_mn
  252. #endif
  253. #ifdef PILEDRIVER
  254. #define SNUMOPT 8
  255. #define DNUMOPT 4
  256. #define GEMM_DEFAULT_OFFSET_A 64
  257. #define GEMM_DEFAULT_OFFSET_B 832
  258. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  259. #define QGEMM_DEFAULT_UNROLL_N 2
  260. #define CGEMM_DEFAULT_UNROLL_N 2
  261. #define ZGEMM_DEFAULT_UNROLL_N 2
  262. #define XGEMM_DEFAULT_UNROLL_N 1
  263. #ifdef ARCH_X86
  264. #define SGEMM_DEFAULT_UNROLL_N 4
  265. #define DGEMM_DEFAULT_UNROLL_N 4
  266. #define SGEMM_DEFAULT_UNROLL_M 4
  267. #define DGEMM_DEFAULT_UNROLL_M 2
  268. #define QGEMM_DEFAULT_UNROLL_M 2
  269. #define CGEMM_DEFAULT_UNROLL_M 2
  270. #define ZGEMM_DEFAULT_UNROLL_M 1
  271. #define XGEMM_DEFAULT_UNROLL_M 1
  272. #else
  273. #define SGEMM_DEFAULT_UNROLL_N 2
  274. #define DGEMM_DEFAULT_UNROLL_N 2
  275. #define SGEMM_DEFAULT_UNROLL_M 16
  276. #define DGEMM_DEFAULT_UNROLL_M 8
  277. #define QGEMM_DEFAULT_UNROLL_M 2
  278. #define CGEMM_DEFAULT_UNROLL_M 4
  279. #define ZGEMM_DEFAULT_UNROLL_M 2
  280. #define XGEMM_DEFAULT_UNROLL_M 1
  281. #define CGEMM3M_DEFAULT_UNROLL_N 4
  282. #define CGEMM3M_DEFAULT_UNROLL_M 8
  283. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  284. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  285. #define GEMV_UNROLL 8
  286. #endif
  287. #if defined(ARCH_X86_64)
  288. #define SGEMM_DEFAULT_P 768
  289. #define DGEMM_DEFAULT_P 768
  290. #define ZGEMM_DEFAULT_P 384
  291. #define CGEMM_DEFAULT_P 768
  292. #else
  293. #define SGEMM_DEFAULT_P 448
  294. #define DGEMM_DEFAULT_P 480
  295. #define ZGEMM_DEFAULT_P 112
  296. #define CGEMM_DEFAULT_P 224
  297. #endif
  298. #define QGEMM_DEFAULT_P 112
  299. #define XGEMM_DEFAULT_P 56
  300. #if defined(ARCH_X86_64)
  301. #define SGEMM_DEFAULT_Q 192
  302. #define DGEMM_DEFAULT_Q 168
  303. #define ZGEMM_DEFAULT_Q 168
  304. #define CGEMM_DEFAULT_Q 168
  305. #else
  306. #define SGEMM_DEFAULT_Q 224
  307. #define DGEMM_DEFAULT_Q 224
  308. #define ZGEMM_DEFAULT_Q 224
  309. #define CGEMM_DEFAULT_Q 224
  310. #endif
  311. #define QGEMM_DEFAULT_Q 224
  312. #define XGEMM_DEFAULT_Q 224
  313. #define SGEMM_DEFAULT_R 12288
  314. #define QGEMM_DEFAULT_R qgemm_r
  315. #define DGEMM_DEFAULT_R 12288
  316. #define CGEMM_DEFAULT_R cgemm_r
  317. #define ZGEMM_DEFAULT_R zgemm_r
  318. #define XGEMM_DEFAULT_R xgemm_r
  319. #define SYMV_P 16
  320. #define HAVE_EXCLUSIVE_CACHE
  321. #define GEMM_THREAD gemm_thread_mn
  322. #endif
  323. #ifdef ATHLON
  324. #define SNUMOPT 4
  325. #define DNUMOPT 2
  326. #define GEMM_DEFAULT_OFFSET_A 0
  327. #define GEMM_DEFAULT_OFFSET_B 384
  328. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  329. #define SGEMM_DEFAULT_UNROLL_N 4
  330. #define DGEMM_DEFAULT_UNROLL_N 4
  331. #define QGEMM_DEFAULT_UNROLL_N 2
  332. #define CGEMM_DEFAULT_UNROLL_N 2
  333. #define ZGEMM_DEFAULT_UNROLL_N 2
  334. #define XGEMM_DEFAULT_UNROLL_N 1
  335. #define SGEMM_DEFAULT_UNROLL_M 2
  336. #define DGEMM_DEFAULT_UNROLL_M 1
  337. #define QGEMM_DEFAULT_UNROLL_M 2
  338. #define CGEMM_DEFAULT_UNROLL_M 1
  339. #define ZGEMM_DEFAULT_UNROLL_M 1
  340. #define XGEMM_DEFAULT_UNROLL_M 1
  341. #define SGEMM_DEFAULT_R sgemm_r
  342. #define DGEMM_DEFAULT_R dgemm_r
  343. #define QGEMM_DEFAULT_R qgemm_r
  344. #define CGEMM_DEFAULT_R cgemm_r
  345. #define ZGEMM_DEFAULT_R zgemm_r
  346. #define XGEMM_DEFAULT_R xgemm_r
  347. #define SGEMM_DEFAULT_P 208
  348. #define DGEMM_DEFAULT_P 104
  349. #define QGEMM_DEFAULT_P 56
  350. #define CGEMM_DEFAULT_P 104
  351. #define ZGEMM_DEFAULT_P 56
  352. #define XGEMM_DEFAULT_P 28
  353. #define SGEMM_DEFAULT_Q 208
  354. #define DGEMM_DEFAULT_Q 208
  355. #define QGEMM_DEFAULT_Q 208
  356. #define CGEMM_DEFAULT_Q 208
  357. #define ZGEMM_DEFAULT_Q 208
  358. #define XGEMM_DEFAULT_Q 208
  359. #define SYMV_P 16
  360. #define HAVE_EXCLUSIVE_CACHE
  361. #endif
  362. #ifdef VIAC3
  363. #define SNUMOPT 2
  364. #define DNUMOPT 1
  365. #define GEMM_DEFAULT_OFFSET_A 0
  366. #define GEMM_DEFAULT_OFFSET_B 256
  367. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  368. #define SGEMM_DEFAULT_UNROLL_N 4
  369. #define DGEMM_DEFAULT_UNROLL_N 4
  370. #define QGEMM_DEFAULT_UNROLL_N 2
  371. #define CGEMM_DEFAULT_UNROLL_N 2
  372. #define ZGEMM_DEFAULT_UNROLL_N 2
  373. #define XGEMM_DEFAULT_UNROLL_N 1
  374. #define SGEMM_DEFAULT_UNROLL_M 2
  375. #define DGEMM_DEFAULT_UNROLL_M 1
  376. #define QGEMM_DEFAULT_UNROLL_M 2
  377. #define CGEMM_DEFAULT_UNROLL_M 1
  378. #define ZGEMM_DEFAULT_UNROLL_M 1
  379. #define XGEMM_DEFAULT_UNROLL_M 1
  380. #define SGEMM_DEFAULT_R sgemm_r
  381. #define DGEMM_DEFAULT_R dgemm_r
  382. #define QGEMM_DEFAULT_R qgemm_r
  383. #define CGEMM_DEFAULT_R cgemm_r
  384. #define ZGEMM_DEFAULT_R zgemm_r
  385. #define XGEMM_DEFAULT_R xgemm_r
  386. #define SGEMM_DEFAULT_P 128
  387. #define DGEMM_DEFAULT_P 128
  388. #define QGEMM_DEFAULT_P 128
  389. #define CGEMM_DEFAULT_P 128
  390. #define ZGEMM_DEFAULT_P 128
  391. #define XGEMM_DEFAULT_P 128
  392. #define SGEMM_DEFAULT_Q 512
  393. #define DGEMM_DEFAULT_Q 256
  394. #define QGEMM_DEFAULT_Q 256
  395. #define CGEMM_DEFAULT_Q 256
  396. #define ZGEMM_DEFAULT_Q 128
  397. #define XGEMM_DEFAULT_Q 128
  398. #define SYMV_P 16
  399. #endif
  400. #ifdef NANO
  401. #define SNUMOPT 4
  402. #define DNUMOPT 2
  403. #define GEMM_DEFAULT_OFFSET_A 64
  404. #define GEMM_DEFAULT_OFFSET_B 256
  405. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  406. #ifdef ARCH_X86
  407. #define SGEMM_DEFAULT_UNROLL_N 4
  408. #define DGEMM_DEFAULT_UNROLL_N 4
  409. #define QGEMM_DEFAULT_UNROLL_N 2
  410. #define CGEMM_DEFAULT_UNROLL_N 2
  411. #define ZGEMM_DEFAULT_UNROLL_N 2
  412. #define XGEMM_DEFAULT_UNROLL_N 1
  413. #define SGEMM_DEFAULT_UNROLL_M 4
  414. #define DGEMM_DEFAULT_UNROLL_M 2
  415. #define QGEMM_DEFAULT_UNROLL_M 2
  416. #define CGEMM_DEFAULT_UNROLL_M 2
  417. #define ZGEMM_DEFAULT_UNROLL_M 1
  418. #define XGEMM_DEFAULT_UNROLL_M 1
  419. #else
  420. #define SGEMM_DEFAULT_UNROLL_N 8
  421. #define DGEMM_DEFAULT_UNROLL_N 4
  422. #define QGEMM_DEFAULT_UNROLL_N 2
  423. #define CGEMM_DEFAULT_UNROLL_N 4
  424. #define ZGEMM_DEFAULT_UNROLL_N 2
  425. #define XGEMM_DEFAULT_UNROLL_N 1
  426. #define SGEMM_DEFAULT_UNROLL_M 4
  427. #define DGEMM_DEFAULT_UNROLL_M 4
  428. #define QGEMM_DEFAULT_UNROLL_M 2
  429. #define CGEMM_DEFAULT_UNROLL_M 2
  430. #define ZGEMM_DEFAULT_UNROLL_M 2
  431. #define XGEMM_DEFAULT_UNROLL_M 1
  432. #endif
  433. #define SGEMM_DEFAULT_P 288
  434. #define DGEMM_DEFAULT_P 288
  435. #define QGEMM_DEFAULT_P 288
  436. #define CGEMM_DEFAULT_P 288
  437. #define ZGEMM_DEFAULT_P 288
  438. #define XGEMM_DEFAULT_P 288
  439. #define SGEMM_DEFAULT_R sgemm_r
  440. #define DGEMM_DEFAULT_R dgemm_r
  441. #define QGEMM_DEFAULT_R qgemm_r
  442. #define CGEMM_DEFAULT_R cgemm_r
  443. #define ZGEMM_DEFAULT_R zgemm_r
  444. #define XGEMM_DEFAULT_R xgemm_r
  445. #define SGEMM_DEFAULT_Q 256
  446. #define DGEMM_DEFAULT_Q 128
  447. #define QGEMM_DEFAULT_Q 64
  448. #define CGEMM_DEFAULT_Q 128
  449. #define ZGEMM_DEFAULT_Q 64
  450. #define XGEMM_DEFAULT_Q 32
  451. #define SYMV_P 16
  452. #define HAVE_EXCLUSIVE_CACHE
  453. #endif
  454. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  455. #ifdef HAVE_SSE
  456. #define SNUMOPT 2
  457. #else
  458. #define SNUMOPT 1
  459. #endif
  460. #define DNUMOPT 1
  461. #define GEMM_DEFAULT_OFFSET_A 0
  462. #define GEMM_DEFAULT_OFFSET_B 0
  463. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  464. #ifdef HAVE_SSE
  465. #define SGEMM_DEFAULT_UNROLL_M 8
  466. #define CGEMM_DEFAULT_UNROLL_M 4
  467. #else
  468. #define SGEMM_DEFAULT_UNROLL_M 4
  469. #define CGEMM_DEFAULT_UNROLL_M 2
  470. #endif
  471. #define DGEMM_DEFAULT_UNROLL_M 2
  472. #define SGEMM_DEFAULT_UNROLL_N 2
  473. #define DGEMM_DEFAULT_UNROLL_N 2
  474. #define QGEMM_DEFAULT_UNROLL_M 2
  475. #define QGEMM_DEFAULT_UNROLL_N 2
  476. #define CGEMM_DEFAULT_UNROLL_N 1
  477. #define ZGEMM_DEFAULT_UNROLL_M 1
  478. #define ZGEMM_DEFAULT_UNROLL_N 1
  479. #define XGEMM_DEFAULT_UNROLL_M 1
  480. #define XGEMM_DEFAULT_UNROLL_N 1
  481. #define SGEMM_DEFAULT_P sgemm_p
  482. #define SGEMM_DEFAULT_Q 256
  483. #define SGEMM_DEFAULT_R sgemm_r
  484. #define DGEMM_DEFAULT_P dgemm_p
  485. #define DGEMM_DEFAULT_Q 256
  486. #define DGEMM_DEFAULT_R dgemm_r
  487. #define QGEMM_DEFAULT_P qgemm_p
  488. #define QGEMM_DEFAULT_Q 256
  489. #define QGEMM_DEFAULT_R qgemm_r
  490. #define CGEMM_DEFAULT_P cgemm_p
  491. #define CGEMM_DEFAULT_Q 256
  492. #define CGEMM_DEFAULT_R cgemm_r
  493. #define ZGEMM_DEFAULT_P zgemm_p
  494. #define ZGEMM_DEFAULT_Q 256
  495. #define ZGEMM_DEFAULT_R zgemm_r
  496. #define XGEMM_DEFAULT_P xgemm_p
  497. #define XGEMM_DEFAULT_Q 256
  498. #define XGEMM_DEFAULT_R xgemm_r
  499. #define SYMV_P 4
  500. #endif
  501. #ifdef PENTIUMM
  502. #define SNUMOPT 2
  503. #define DNUMOPT 1
  504. #define GEMM_DEFAULT_OFFSET_A 0
  505. #define GEMM_DEFAULT_OFFSET_B 0
  506. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  507. #ifdef CORE_YONAH
  508. #define SGEMM_DEFAULT_UNROLL_M 4
  509. #define SGEMM_DEFAULT_UNROLL_N 4
  510. #define DGEMM_DEFAULT_UNROLL_M 2
  511. #define DGEMM_DEFAULT_UNROLL_N 4
  512. #define QGEMM_DEFAULT_UNROLL_M 2
  513. #define QGEMM_DEFAULT_UNROLL_N 2
  514. #define CGEMM_DEFAULT_UNROLL_M 2
  515. #define CGEMM_DEFAULT_UNROLL_N 2
  516. #define ZGEMM_DEFAULT_UNROLL_M 1
  517. #define ZGEMM_DEFAULT_UNROLL_N 2
  518. #define XGEMM_DEFAULT_UNROLL_M 1
  519. #define XGEMM_DEFAULT_UNROLL_N 1
  520. #else
  521. #define SGEMM_DEFAULT_UNROLL_M 8
  522. #define SGEMM_DEFAULT_UNROLL_N 2
  523. #define DGEMM_DEFAULT_UNROLL_M 2
  524. #define DGEMM_DEFAULT_UNROLL_N 2
  525. #define QGEMM_DEFAULT_UNROLL_M 2
  526. #define QGEMM_DEFAULT_UNROLL_N 2
  527. #define CGEMM_DEFAULT_UNROLL_M 4
  528. #define CGEMM_DEFAULT_UNROLL_N 1
  529. #define ZGEMM_DEFAULT_UNROLL_M 1
  530. #define ZGEMM_DEFAULT_UNROLL_N 1
  531. #define XGEMM_DEFAULT_UNROLL_M 1
  532. #define XGEMM_DEFAULT_UNROLL_N 1
  533. #endif
  534. #define SGEMM_DEFAULT_P sgemm_p
  535. #define SGEMM_DEFAULT_Q 256
  536. #define SGEMM_DEFAULT_R sgemm_r
  537. #define DGEMM_DEFAULT_P dgemm_p
  538. #define DGEMM_DEFAULT_Q 256
  539. #define DGEMM_DEFAULT_R dgemm_r
  540. #define QGEMM_DEFAULT_P qgemm_p
  541. #define QGEMM_DEFAULT_Q 256
  542. #define QGEMM_DEFAULT_R qgemm_r
  543. #define CGEMM_DEFAULT_P cgemm_p
  544. #define CGEMM_DEFAULT_Q 256
  545. #define CGEMM_DEFAULT_R cgemm_r
  546. #define ZGEMM_DEFAULT_P zgemm_p
  547. #define ZGEMM_DEFAULT_Q 256
  548. #define ZGEMM_DEFAULT_R zgemm_r
  549. #define XGEMM_DEFAULT_P xgemm_p
  550. #define XGEMM_DEFAULT_Q 256
  551. #define XGEMM_DEFAULT_R xgemm_r
  552. #define SYMV_P 4
  553. #endif
  554. #ifdef CORE_NORTHWOOD
  555. #define SNUMOPT 4
  556. #define DNUMOPT 2
  557. #define GEMM_DEFAULT_OFFSET_A 0
  558. #define GEMM_DEFAULT_OFFSET_B 32
  559. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  560. #define SYMV_P 8
  561. #define SGEMM_DEFAULT_UNROLL_M 8
  562. #define DGEMM_DEFAULT_UNROLL_M 4
  563. #define QGEMM_DEFAULT_UNROLL_M 2
  564. #define CGEMM_DEFAULT_UNROLL_M 4
  565. #define ZGEMM_DEFAULT_UNROLL_M 2
  566. #define XGEMM_DEFAULT_UNROLL_M 1
  567. #define SGEMM_DEFAULT_UNROLL_N 2
  568. #define DGEMM_DEFAULT_UNROLL_N 2
  569. #define QGEMM_DEFAULT_UNROLL_N 2
  570. #define CGEMM_DEFAULT_UNROLL_N 1
  571. #define ZGEMM_DEFAULT_UNROLL_N 1
  572. #define XGEMM_DEFAULT_UNROLL_N 1
  573. #define SGEMM_DEFAULT_P sgemm_p
  574. #define SGEMM_DEFAULT_R sgemm_r
  575. #define DGEMM_DEFAULT_P dgemm_p
  576. #define DGEMM_DEFAULT_R dgemm_r
  577. #define QGEMM_DEFAULT_P qgemm_p
  578. #define QGEMM_DEFAULT_R qgemm_r
  579. #define CGEMM_DEFAULT_P cgemm_p
  580. #define CGEMM_DEFAULT_R cgemm_r
  581. #define ZGEMM_DEFAULT_P zgemm_p
  582. #define ZGEMM_DEFAULT_R zgemm_r
  583. #define XGEMM_DEFAULT_P xgemm_p
  584. #define XGEMM_DEFAULT_R xgemm_r
  585. #define SGEMM_DEFAULT_Q 128
  586. #define DGEMM_DEFAULT_Q 128
  587. #define QGEMM_DEFAULT_Q 128
  588. #define CGEMM_DEFAULT_Q 128
  589. #define ZGEMM_DEFAULT_Q 128
  590. #define XGEMM_DEFAULT_Q 128
  591. #endif
  592. #ifdef CORE_PRESCOTT
  593. #define SNUMOPT 4
  594. #define DNUMOPT 2
  595. #ifndef __64BIT__
  596. #define GEMM_DEFAULT_OFFSET_A 128
  597. #define GEMM_DEFAULT_OFFSET_B 192
  598. #else
  599. #define GEMM_DEFAULT_OFFSET_A 0
  600. #define GEMM_DEFAULT_OFFSET_B 256
  601. #endif
  602. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  603. #define SYMV_P 8
  604. #ifdef ARCH_X86
  605. #define SGEMM_DEFAULT_UNROLL_M 4
  606. #define DGEMM_DEFAULT_UNROLL_M 2
  607. #define QGEMM_DEFAULT_UNROLL_M 2
  608. #define CGEMM_DEFAULT_UNROLL_M 2
  609. #define ZGEMM_DEFAULT_UNROLL_M 1
  610. #define XGEMM_DEFAULT_UNROLL_M 1
  611. #else
  612. #define SGEMM_DEFAULT_UNROLL_M 8
  613. #define DGEMM_DEFAULT_UNROLL_M 4
  614. #define QGEMM_DEFAULT_UNROLL_M 2
  615. #define CGEMM_DEFAULT_UNROLL_M 4
  616. #define ZGEMM_DEFAULT_UNROLL_M 2
  617. #define XGEMM_DEFAULT_UNROLL_M 1
  618. #endif
  619. #define SGEMM_DEFAULT_UNROLL_N 4
  620. #define DGEMM_DEFAULT_UNROLL_N 4
  621. #define QGEMM_DEFAULT_UNROLL_N 2
  622. #define CGEMM_DEFAULT_UNROLL_N 2
  623. #define ZGEMM_DEFAULT_UNROLL_N 2
  624. #define XGEMM_DEFAULT_UNROLL_N 1
  625. #define SGEMM_DEFAULT_P sgemm_p
  626. #define SGEMM_DEFAULT_R sgemm_r
  627. #define DGEMM_DEFAULT_P dgemm_p
  628. #define DGEMM_DEFAULT_R dgemm_r
  629. #define QGEMM_DEFAULT_P qgemm_p
  630. #define QGEMM_DEFAULT_R qgemm_r
  631. #define CGEMM_DEFAULT_P cgemm_p
  632. #define CGEMM_DEFAULT_R cgemm_r
  633. #define ZGEMM_DEFAULT_P zgemm_p
  634. #define ZGEMM_DEFAULT_R zgemm_r
  635. #define XGEMM_DEFAULT_P xgemm_p
  636. #define XGEMM_DEFAULT_R xgemm_r
  637. #define SGEMM_DEFAULT_Q 128
  638. #define DGEMM_DEFAULT_Q 128
  639. #define QGEMM_DEFAULT_Q 128
  640. #define CGEMM_DEFAULT_Q 128
  641. #define ZGEMM_DEFAULT_Q 128
  642. #define XGEMM_DEFAULT_Q 128
  643. #endif
  644. #ifdef CORE2
  645. #define SNUMOPT 8
  646. #define DNUMOPT 4
  647. #define GEMM_DEFAULT_OFFSET_A 448
  648. #define GEMM_DEFAULT_OFFSET_B 128
  649. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  650. #define SYMV_P 8
  651. #define SWITCH_RATIO 4
  652. #ifdef ARCH_X86
  653. #define SGEMM_DEFAULT_UNROLL_M 8
  654. #define DGEMM_DEFAULT_UNROLL_M 4
  655. #define QGEMM_DEFAULT_UNROLL_M 2
  656. #define CGEMM_DEFAULT_UNROLL_M 4
  657. #define ZGEMM_DEFAULT_UNROLL_M 2
  658. #define XGEMM_DEFAULT_UNROLL_M 1
  659. #define SGEMM_DEFAULT_UNROLL_N 2
  660. #define DGEMM_DEFAULT_UNROLL_N 2
  661. #define QGEMM_DEFAULT_UNROLL_N 2
  662. #define CGEMM_DEFAULT_UNROLL_N 1
  663. #define ZGEMM_DEFAULT_UNROLL_N 1
  664. #define XGEMM_DEFAULT_UNROLL_N 1
  665. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  666. #else
  667. #define SGEMM_DEFAULT_UNROLL_M 8
  668. #define DGEMM_DEFAULT_UNROLL_M 4
  669. #define QGEMM_DEFAULT_UNROLL_M 2
  670. #define CGEMM_DEFAULT_UNROLL_M 4
  671. #define ZGEMM_DEFAULT_UNROLL_M 2
  672. #define XGEMM_DEFAULT_UNROLL_M 1
  673. #define SGEMM_DEFAULT_UNROLL_N 4
  674. #define DGEMM_DEFAULT_UNROLL_N 4
  675. #define QGEMM_DEFAULT_UNROLL_N 2
  676. #define CGEMM_DEFAULT_UNROLL_N 2
  677. #define ZGEMM_DEFAULT_UNROLL_N 2
  678. #define XGEMM_DEFAULT_UNROLL_N 1
  679. #endif
  680. #define SGEMM_DEFAULT_P sgemm_p
  681. #define SGEMM_DEFAULT_R sgemm_r
  682. #define DGEMM_DEFAULT_P dgemm_p
  683. #define DGEMM_DEFAULT_R dgemm_r
  684. #define QGEMM_DEFAULT_P qgemm_p
  685. #define QGEMM_DEFAULT_R qgemm_r
  686. #define CGEMM_DEFAULT_P cgemm_p
  687. #define CGEMM_DEFAULT_R cgemm_r
  688. #define ZGEMM_DEFAULT_P zgemm_p
  689. #define ZGEMM_DEFAULT_R zgemm_r
  690. #define XGEMM_DEFAULT_P xgemm_p
  691. #define XGEMM_DEFAULT_R xgemm_r
  692. #define SGEMM_DEFAULT_Q 256
  693. #define DGEMM_DEFAULT_Q 256
  694. #define QGEMM_DEFAULT_Q 256
  695. #define CGEMM_DEFAULT_Q 256
  696. #define ZGEMM_DEFAULT_Q 256
  697. #define XGEMM_DEFAULT_Q 256
  698. #endif
  699. #ifdef PENRYN
  700. #define SNUMOPT 8
  701. #define DNUMOPT 4
  702. #define GEMM_DEFAULT_OFFSET_A 128
  703. #define GEMM_DEFAULT_OFFSET_B 0
  704. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  705. #define SYMV_P 8
  706. #define SWITCH_RATIO 4
  707. #ifdef ARCH_X86
  708. #define SGEMM_DEFAULT_UNROLL_M 4
  709. #define DGEMM_DEFAULT_UNROLL_M 2
  710. #define QGEMM_DEFAULT_UNROLL_M 2
  711. #define CGEMM_DEFAULT_UNROLL_M 2
  712. #define ZGEMM_DEFAULT_UNROLL_M 1
  713. #define XGEMM_DEFAULT_UNROLL_M 1
  714. #define SGEMM_DEFAULT_UNROLL_N 4
  715. #define DGEMM_DEFAULT_UNROLL_N 4
  716. #define QGEMM_DEFAULT_UNROLL_N 2
  717. #define CGEMM_DEFAULT_UNROLL_N 2
  718. #define ZGEMM_DEFAULT_UNROLL_N 2
  719. #define XGEMM_DEFAULT_UNROLL_N 1
  720. #else
  721. #define SGEMM_DEFAULT_UNROLL_M 8
  722. #define DGEMM_DEFAULT_UNROLL_M 4
  723. #define QGEMM_DEFAULT_UNROLL_M 2
  724. #define CGEMM_DEFAULT_UNROLL_M 4
  725. #define ZGEMM_DEFAULT_UNROLL_M 2
  726. #define XGEMM_DEFAULT_UNROLL_M 1
  727. #define SGEMM_DEFAULT_UNROLL_N 4
  728. #define DGEMM_DEFAULT_UNROLL_N 4
  729. #define QGEMM_DEFAULT_UNROLL_N 2
  730. #define CGEMM_DEFAULT_UNROLL_N 2
  731. #define ZGEMM_DEFAULT_UNROLL_N 2
  732. #define XGEMM_DEFAULT_UNROLL_N 1
  733. #endif
  734. #define SGEMM_DEFAULT_P sgemm_p
  735. #define SGEMM_DEFAULT_R sgemm_r
  736. #define DGEMM_DEFAULT_P dgemm_p
  737. #define DGEMM_DEFAULT_R dgemm_r
  738. #define QGEMM_DEFAULT_P qgemm_p
  739. #define QGEMM_DEFAULT_R qgemm_r
  740. #define CGEMM_DEFAULT_P cgemm_p
  741. #define CGEMM_DEFAULT_R cgemm_r
  742. #define ZGEMM_DEFAULT_P zgemm_p
  743. #define ZGEMM_DEFAULT_R zgemm_r
  744. #define XGEMM_DEFAULT_P xgemm_p
  745. #define XGEMM_DEFAULT_R xgemm_r
  746. #define SGEMM_DEFAULT_Q 512
  747. #define DGEMM_DEFAULT_Q 256
  748. #define QGEMM_DEFAULT_Q 128
  749. #define CGEMM_DEFAULT_Q 512
  750. #define ZGEMM_DEFAULT_Q 256
  751. #define XGEMM_DEFAULT_Q 128
  752. #define GETRF_FACTOR 0.75
  753. #endif
  754. #ifdef DUNNINGTON
  755. #define SNUMOPT 8
  756. #define DNUMOPT 4
  757. #define GEMM_DEFAULT_OFFSET_A 128
  758. #define GEMM_DEFAULT_OFFSET_B 0
  759. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  760. #define SYMV_P 8
  761. #define SWITCH_RATIO 4
  762. #ifdef ARCH_X86
  763. #define SGEMM_DEFAULT_UNROLL_M 4
  764. #define DGEMM_DEFAULT_UNROLL_M 2
  765. #define QGEMM_DEFAULT_UNROLL_M 2
  766. #define CGEMM_DEFAULT_UNROLL_M 2
  767. #define ZGEMM_DEFAULT_UNROLL_M 1
  768. #define XGEMM_DEFAULT_UNROLL_M 1
  769. #define SGEMM_DEFAULT_UNROLL_N 4
  770. #define DGEMM_DEFAULT_UNROLL_N 4
  771. #define QGEMM_DEFAULT_UNROLL_N 2
  772. #define CGEMM_DEFAULT_UNROLL_N 2
  773. #define ZGEMM_DEFAULT_UNROLL_N 2
  774. #define XGEMM_DEFAULT_UNROLL_N 1
  775. #else
  776. #define SGEMM_DEFAULT_UNROLL_M 8
  777. #define DGEMM_DEFAULT_UNROLL_M 4
  778. #define QGEMM_DEFAULT_UNROLL_M 2
  779. #define CGEMM_DEFAULT_UNROLL_M 4
  780. #define ZGEMM_DEFAULT_UNROLL_M 2
  781. #define XGEMM_DEFAULT_UNROLL_M 1
  782. #define SGEMM_DEFAULT_UNROLL_N 4
  783. #define DGEMM_DEFAULT_UNROLL_N 4
  784. #define QGEMM_DEFAULT_UNROLL_N 2
  785. #define CGEMM_DEFAULT_UNROLL_N 2
  786. #define ZGEMM_DEFAULT_UNROLL_N 2
  787. #define XGEMM_DEFAULT_UNROLL_N 1
  788. #endif
  789. #define SGEMM_DEFAULT_P sgemm_p
  790. #define SGEMM_DEFAULT_R sgemm_r
  791. #define DGEMM_DEFAULT_P dgemm_p
  792. #define DGEMM_DEFAULT_R dgemm_r
  793. #define QGEMM_DEFAULT_P qgemm_p
  794. #define QGEMM_DEFAULT_R qgemm_r
  795. #define CGEMM_DEFAULT_P cgemm_p
  796. #define CGEMM_DEFAULT_R cgemm_r
  797. #define ZGEMM_DEFAULT_P zgemm_p
  798. #define ZGEMM_DEFAULT_R zgemm_r
  799. #define XGEMM_DEFAULT_P xgemm_p
  800. #define XGEMM_DEFAULT_R xgemm_r
  801. #define SGEMM_DEFAULT_Q 768
  802. #define DGEMM_DEFAULT_Q 384
  803. #define QGEMM_DEFAULT_Q 192
  804. #define CGEMM_DEFAULT_Q 768
  805. #define ZGEMM_DEFAULT_Q 384
  806. #define XGEMM_DEFAULT_Q 192
  807. #define GETRF_FACTOR 0.75
  808. #define GEMM_THREAD gemm_thread_mn
  809. #endif
  810. #ifdef NEHALEM
  811. #define SNUMOPT 8
  812. #define DNUMOPT 4
  813. #define GEMM_DEFAULT_OFFSET_A 32
  814. #define GEMM_DEFAULT_OFFSET_B 0
  815. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  816. #define SYMV_P 8
  817. #define SWITCH_RATIO 4
  818. #ifdef ARCH_X86
  819. #define SGEMM_DEFAULT_UNROLL_M 4
  820. #define DGEMM_DEFAULT_UNROLL_M 2
  821. #define QGEMM_DEFAULT_UNROLL_M 2
  822. #define CGEMM_DEFAULT_UNROLL_M 2
  823. #define ZGEMM_DEFAULT_UNROLL_M 1
  824. #define XGEMM_DEFAULT_UNROLL_M 1
  825. #define SGEMM_DEFAULT_UNROLL_N 4
  826. #define DGEMM_DEFAULT_UNROLL_N 4
  827. #define QGEMM_DEFAULT_UNROLL_N 2
  828. #define CGEMM_DEFAULT_UNROLL_N 2
  829. #define ZGEMM_DEFAULT_UNROLL_N 2
  830. #define XGEMM_DEFAULT_UNROLL_N 1
  831. #else
  832. #define SGEMM_DEFAULT_UNROLL_M 4
  833. #define DGEMM_DEFAULT_UNROLL_M 2
  834. #define QGEMM_DEFAULT_UNROLL_M 2
  835. #define CGEMM_DEFAULT_UNROLL_M 2
  836. #define ZGEMM_DEFAULT_UNROLL_M 1
  837. #define XGEMM_DEFAULT_UNROLL_M 1
  838. #define SGEMM_DEFAULT_UNROLL_N 8
  839. #define DGEMM_DEFAULT_UNROLL_N 8
  840. #define QGEMM_DEFAULT_UNROLL_N 2
  841. #define CGEMM_DEFAULT_UNROLL_N 4
  842. #define ZGEMM_DEFAULT_UNROLL_N 4
  843. #define XGEMM_DEFAULT_UNROLL_N 1
  844. #endif
  845. #define SGEMM_DEFAULT_P 504
  846. #define SGEMM_DEFAULT_R sgemm_r
  847. #define DGEMM_DEFAULT_P 504
  848. #define DGEMM_DEFAULT_R dgemm_r
  849. #define QGEMM_DEFAULT_P 504
  850. #define QGEMM_DEFAULT_R qgemm_r
  851. #define CGEMM_DEFAULT_P 252
  852. #define CGEMM_DEFAULT_R cgemm_r
  853. #define ZGEMM_DEFAULT_P 252
  854. #define ZGEMM_DEFAULT_R zgemm_r
  855. #define XGEMM_DEFAULT_P 252
  856. #define XGEMM_DEFAULT_R xgemm_r
  857. #define SGEMM_DEFAULT_Q 512
  858. #define DGEMM_DEFAULT_Q 256
  859. #define QGEMM_DEFAULT_Q 128
  860. #define CGEMM_DEFAULT_Q 512
  861. #define ZGEMM_DEFAULT_Q 256
  862. #define XGEMM_DEFAULT_Q 128
  863. #define GETRF_FACTOR 0.72
  864. #define CGEMM3M_DEFAULT_UNROLL_N 4
  865. #define CGEMM3M_DEFAULT_UNROLL_M 8
  866. #define ZGEMM3M_DEFAULT_UNROLL_N 2
  867. #define ZGEMM3M_DEFAULT_UNROLL_M 8
  868. #endif
  869. #ifdef SANDYBRIDGE
  870. #define SNUMOPT 8
  871. #define DNUMOPT 4
  872. #define GEMM_DEFAULT_OFFSET_A 0
  873. #define GEMM_DEFAULT_OFFSET_B 0
  874. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  875. #define SYMV_P 8
  876. #define SWITCH_RATIO 4
  877. #ifdef ARCH_X86
  878. #define SGEMM_DEFAULT_UNROLL_M 4
  879. #define DGEMM_DEFAULT_UNROLL_M 2
  880. #define QGEMM_DEFAULT_UNROLL_M 2
  881. #define CGEMM_DEFAULT_UNROLL_M 2
  882. #define ZGEMM_DEFAULT_UNROLL_M 1
  883. #define XGEMM_DEFAULT_UNROLL_M 1
  884. #define SGEMM_DEFAULT_UNROLL_N 4
  885. #define DGEMM_DEFAULT_UNROLL_N 4
  886. #define QGEMM_DEFAULT_UNROLL_N 2
  887. #define CGEMM_DEFAULT_UNROLL_N 2
  888. #define ZGEMM_DEFAULT_UNROLL_N 2
  889. #define XGEMM_DEFAULT_UNROLL_N 1
  890. #else
  891. #define SGEMM_DEFAULT_UNROLL_M 16
  892. #define DGEMM_DEFAULT_UNROLL_M 8
  893. #define QGEMM_DEFAULT_UNROLL_M 2
  894. #define CGEMM_DEFAULT_UNROLL_M 8
  895. #define ZGEMM_DEFAULT_UNROLL_M 4
  896. #define XGEMM_DEFAULT_UNROLL_M 1
  897. #define SGEMM_DEFAULT_UNROLL_N 4
  898. #define DGEMM_DEFAULT_UNROLL_N 4
  899. #define QGEMM_DEFAULT_UNROLL_N 2
  900. #define CGEMM_DEFAULT_UNROLL_N 2
  901. #define ZGEMM_DEFAULT_UNROLL_N 4
  902. #define XGEMM_DEFAULT_UNROLL_N 1
  903. #endif
  904. #define SGEMM_DEFAULT_P 768
  905. #define SGEMM_DEFAULT_R sgemm_r
  906. //#define SGEMM_DEFAULT_R 1024
  907. #define DGEMM_DEFAULT_P 512
  908. #define DGEMM_DEFAULT_R dgemm_r
  909. //#define DGEMM_DEFAULT_R 1024
  910. #define QGEMM_DEFAULT_P 504
  911. #define QGEMM_DEFAULT_R qgemm_r
  912. #define CGEMM_DEFAULT_P 768
  913. #define CGEMM_DEFAULT_R cgemm_r
  914. //#define CGEMM_DEFAULT_R 1024
  915. #define ZGEMM_DEFAULT_P 512
  916. #define ZGEMM_DEFAULT_R zgemm_r
  917. //#define ZGEMM_DEFAULT_R 1024
  918. #define XGEMM_DEFAULT_P 252
  919. #define XGEMM_DEFAULT_R xgemm_r
  920. #define SGEMM_DEFAULT_Q 384
  921. #define DGEMM_DEFAULT_Q 256
  922. #define QGEMM_DEFAULT_Q 128
  923. #define CGEMM_DEFAULT_Q 512
  924. #define ZGEMM_DEFAULT_Q 192
  925. #define XGEMM_DEFAULT_Q 128
  926. #define CGEMM3M_DEFAULT_UNROLL_N 4
  927. #define CGEMM3M_DEFAULT_UNROLL_M 8
  928. #define ZGEMM3M_DEFAULT_UNROLL_N 2
  929. #define ZGEMM3M_DEFAULT_UNROLL_M 8
  930. #define GETRF_FACTOR 0.72
  931. #endif
  932. #ifdef HASWELL
  933. #define SNUMOPT 16
  934. #define DNUMOPT 8
  935. #define GEMM_DEFAULT_OFFSET_A 0
  936. #define GEMM_DEFAULT_OFFSET_B 0
  937. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  938. #define SYMV_P 8
  939. #define SWITCH_RATIO 4
  940. #ifdef ARCH_X86
  941. #define SGEMM_DEFAULT_UNROLL_M 4
  942. #define DGEMM_DEFAULT_UNROLL_M 2
  943. #define QGEMM_DEFAULT_UNROLL_M 2
  944. #define CGEMM_DEFAULT_UNROLL_M 2
  945. #define ZGEMM_DEFAULT_UNROLL_M 1
  946. #define XGEMM_DEFAULT_UNROLL_M 1
  947. #define SGEMM_DEFAULT_UNROLL_N 4
  948. #define DGEMM_DEFAULT_UNROLL_N 4
  949. #define QGEMM_DEFAULT_UNROLL_N 2
  950. #define CGEMM_DEFAULT_UNROLL_N 2
  951. #define ZGEMM_DEFAULT_UNROLL_N 2
  952. #define XGEMM_DEFAULT_UNROLL_N 1
  953. #else
  954. #define SGEMM_DEFAULT_UNROLL_M 16
  955. #define DGEMM_DEFAULT_UNROLL_M 4
  956. #define QGEMM_DEFAULT_UNROLL_M 2
  957. #define CGEMM_DEFAULT_UNROLL_M 8
  958. #define ZGEMM_DEFAULT_UNROLL_M 4
  959. #define XGEMM_DEFAULT_UNROLL_M 1
  960. #define SGEMM_DEFAULT_UNROLL_N 4
  961. #define DGEMM_DEFAULT_UNROLL_N 4
  962. #define QGEMM_DEFAULT_UNROLL_N 2
  963. #define CGEMM_DEFAULT_UNROLL_N 2
  964. #define ZGEMM_DEFAULT_UNROLL_N 2
  965. #define XGEMM_DEFAULT_UNROLL_N 1
  966. #define DGEMM_DEFAULT_UNROLL_MN 16
  967. #endif
  968. #ifdef ARCH_X86
  969. #define SGEMM_DEFAULT_P 512
  970. #define SGEMM_DEFAULT_R sgemm_r
  971. #define DGEMM_DEFAULT_P 512
  972. #define DGEMM_DEFAULT_R dgemm_r
  973. #define QGEMM_DEFAULT_P 504
  974. #define QGEMM_DEFAULT_R qgemm_r
  975. #define CGEMM_DEFAULT_P 128
  976. #define CGEMM_DEFAULT_R 1024
  977. #define ZGEMM_DEFAULT_P 512
  978. #define ZGEMM_DEFAULT_R zgemm_r
  979. #define XGEMM_DEFAULT_P 252
  980. #define XGEMM_DEFAULT_R xgemm_r
  981. #define SGEMM_DEFAULT_Q 256
  982. #define DGEMM_DEFAULT_Q 256
  983. #define QGEMM_DEFAULT_Q 128
  984. #define CGEMM_DEFAULT_Q 256
  985. #define ZGEMM_DEFAULT_Q 192
  986. #define XGEMM_DEFAULT_Q 128
  987. #else
  988. #define SGEMM_DEFAULT_P 768
  989. #define DGEMM_DEFAULT_P 512
  990. #define CGEMM_DEFAULT_P 384
  991. #define ZGEMM_DEFAULT_P 256
  992. #ifdef WINDOWS_ABI
  993. #define SGEMM_DEFAULT_Q 320
  994. #define DGEMM_DEFAULT_Q 128
  995. #else
  996. #define SGEMM_DEFAULT_Q 384
  997. #define DGEMM_DEFAULT_Q 256
  998. #endif
  999. #define CGEMM_DEFAULT_Q 192
  1000. #define ZGEMM_DEFAULT_Q 128
  1001. #define SGEMM_DEFAULT_R sgemm_r
  1002. #define DGEMM_DEFAULT_R 13824
  1003. #define CGEMM_DEFAULT_R cgemm_r
  1004. #define ZGEMM_DEFAULT_R zgemm_r
  1005. #define QGEMM_DEFAULT_Q 128
  1006. #define QGEMM_DEFAULT_P 504
  1007. #define QGEMM_DEFAULT_R qgemm_r
  1008. #define XGEMM_DEFAULT_P 252
  1009. #define XGEMM_DEFAULT_R xgemm_r
  1010. #define XGEMM_DEFAULT_Q 128
  1011. #define CGEMM3M_DEFAULT_UNROLL_N 4
  1012. #define CGEMM3M_DEFAULT_UNROLL_M 8
  1013. #define ZGEMM3M_DEFAULT_UNROLL_N 2
  1014. #define ZGEMM3M_DEFAULT_UNROLL_M 8
  1015. #endif
  1016. #endif
  1017. #ifdef ATOM
  1018. #define SNUMOPT 2
  1019. #define DNUMOPT 1
  1020. #define GEMM_DEFAULT_OFFSET_A 64
  1021. #define GEMM_DEFAULT_OFFSET_B 0
  1022. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1023. #define SYMV_P 8
  1024. #ifdef ARCH_X86
  1025. #define SGEMM_DEFAULT_UNROLL_M 4
  1026. #define DGEMM_DEFAULT_UNROLL_M 2
  1027. #define QGEMM_DEFAULT_UNROLL_M 2
  1028. #define CGEMM_DEFAULT_UNROLL_M 2
  1029. #define ZGEMM_DEFAULT_UNROLL_M 1
  1030. #define XGEMM_DEFAULT_UNROLL_M 1
  1031. #else
  1032. #define SGEMM_DEFAULT_UNROLL_M 8
  1033. #define DGEMM_DEFAULT_UNROLL_M 4
  1034. #define QGEMM_DEFAULT_UNROLL_M 2
  1035. #define CGEMM_DEFAULT_UNROLL_M 4
  1036. #define ZGEMM_DEFAULT_UNROLL_M 2
  1037. #define XGEMM_DEFAULT_UNROLL_M 1
  1038. #endif
  1039. #define SGEMM_DEFAULT_UNROLL_N 4
  1040. #define DGEMM_DEFAULT_UNROLL_N 2
  1041. #define QGEMM_DEFAULT_UNROLL_N 2
  1042. #define CGEMM_DEFAULT_UNROLL_N 2
  1043. #define ZGEMM_DEFAULT_UNROLL_N 1
  1044. #define XGEMM_DEFAULT_UNROLL_N 1
  1045. #define SGEMM_DEFAULT_P sgemm_p
  1046. #define SGEMM_DEFAULT_R sgemm_r
  1047. #define DGEMM_DEFAULT_P dgemm_p
  1048. #define DGEMM_DEFAULT_R dgemm_r
  1049. #define QGEMM_DEFAULT_P qgemm_p
  1050. #define QGEMM_DEFAULT_R qgemm_r
  1051. #define CGEMM_DEFAULT_P cgemm_p
  1052. #define CGEMM_DEFAULT_R cgemm_r
  1053. #define ZGEMM_DEFAULT_P zgemm_p
  1054. #define ZGEMM_DEFAULT_R zgemm_r
  1055. #define XGEMM_DEFAULT_P xgemm_p
  1056. #define XGEMM_DEFAULT_R xgemm_r
  1057. #define SGEMM_DEFAULT_Q 256
  1058. #define DGEMM_DEFAULT_Q 256
  1059. #define QGEMM_DEFAULT_Q 256
  1060. #define CGEMM_DEFAULT_Q 256
  1061. #define ZGEMM_DEFAULT_Q 256
  1062. #define XGEMM_DEFAULT_Q 256
  1063. #endif
  1064. #ifdef ITANIUM2
  1065. #define SNUMOPT 4
  1066. #define DNUMOPT 4
  1067. #define GEMM_DEFAULT_OFFSET_A 0
  1068. #define GEMM_DEFAULT_OFFSET_B 128
  1069. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1070. #define SGEMM_DEFAULT_UNROLL_M 8
  1071. #define SGEMM_DEFAULT_UNROLL_N 8
  1072. #define DGEMM_DEFAULT_UNROLL_M 8
  1073. #define DGEMM_DEFAULT_UNROLL_N 8
  1074. #define QGEMM_DEFAULT_UNROLL_M 8
  1075. #define QGEMM_DEFAULT_UNROLL_N 8
  1076. #define CGEMM_DEFAULT_UNROLL_M 4
  1077. #define CGEMM_DEFAULT_UNROLL_N 4
  1078. #define ZGEMM_DEFAULT_UNROLL_M 4
  1079. #define ZGEMM_DEFAULT_UNROLL_N 4
  1080. #define XGEMM_DEFAULT_UNROLL_M 4
  1081. #define XGEMM_DEFAULT_UNROLL_N 4
  1082. #define SGEMM_DEFAULT_P sgemm_p
  1083. #define DGEMM_DEFAULT_P dgemm_p
  1084. #define QGEMM_DEFAULT_P qgemm_p
  1085. #define CGEMM_DEFAULT_P cgemm_p
  1086. #define ZGEMM_DEFAULT_P zgemm_p
  1087. #define XGEMM_DEFAULT_P xgemm_p
  1088. #define SGEMM_DEFAULT_Q 1024
  1089. #define DGEMM_DEFAULT_Q 1024
  1090. #define QGEMM_DEFAULT_Q 1024
  1091. #define CGEMM_DEFAULT_Q 1024
  1092. #define ZGEMM_DEFAULT_Q 1024
  1093. #define XGEMM_DEFAULT_Q 1024
  1094. #define SGEMM_DEFAULT_R sgemm_r
  1095. #define DGEMM_DEFAULT_R dgemm_r
  1096. #define QGEMM_DEFAULT_R qgemm_r
  1097. #define CGEMM_DEFAULT_R cgemm_r
  1098. #define ZGEMM_DEFAULT_R zgemm_r
  1099. #define XGEMM_DEFAULT_R xgemm_r
  1100. #define SYMV_P 16
  1101. #define GETRF_FACTOR 0.65
  1102. #endif
  1103. #if defined(EV4) || defined(EV5) || defined(EV6)
  1104. #ifdef EV4
  1105. #define SNUMOPT 1
  1106. #define DNUMOPT 1
  1107. #else
  1108. #define SNUMOPT 2
  1109. #define DNUMOPT 2
  1110. #endif
  1111. #define GEMM_DEFAULT_OFFSET_A 512
  1112. #define GEMM_DEFAULT_OFFSET_B 512
  1113. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1114. #define SGEMM_DEFAULT_UNROLL_M 4
  1115. #define SGEMM_DEFAULT_UNROLL_N 4
  1116. #define DGEMM_DEFAULT_UNROLL_M 4
  1117. #define DGEMM_DEFAULT_UNROLL_N 4
  1118. #define CGEMM_DEFAULT_UNROLL_M 2
  1119. #define CGEMM_DEFAULT_UNROLL_N 2
  1120. #define ZGEMM_DEFAULT_UNROLL_M 2
  1121. #define ZGEMM_DEFAULT_UNROLL_N 2
  1122. #define SYMV_P 8
  1123. #ifdef EV4
  1124. #define SGEMM_DEFAULT_P 32
  1125. #define SGEMM_DEFAULT_Q 112
  1126. #define SGEMM_DEFAULT_R 256
  1127. #define DGEMM_DEFAULT_P 32
  1128. #define DGEMM_DEFAULT_Q 56
  1129. #define DGEMM_DEFAULT_R 256
  1130. #define CGEMM_DEFAULT_P 32
  1131. #define CGEMM_DEFAULT_Q 64
  1132. #define CGEMM_DEFAULT_R 240
  1133. #define ZGEMM_DEFAULT_P 32
  1134. #define ZGEMM_DEFAULT_Q 32
  1135. #define ZGEMM_DEFAULT_R 240
  1136. #endif
  1137. #ifdef EV5
  1138. #define SGEMM_DEFAULT_P 64
  1139. #define SGEMM_DEFAULT_Q 256
  1140. #define DGEMM_DEFAULT_P 64
  1141. #define DGEMM_DEFAULT_Q 128
  1142. #define CGEMM_DEFAULT_P 64
  1143. #define CGEMM_DEFAULT_Q 128
  1144. #define ZGEMM_DEFAULT_P 64
  1145. #define ZGEMM_DEFAULT_Q 64
  1146. #endif
  1147. #ifdef EV6
  1148. #define SGEMM_DEFAULT_P 256
  1149. #define SGEMM_DEFAULT_Q 512
  1150. #define DGEMM_DEFAULT_P 256
  1151. #define DGEMM_DEFAULT_Q 256
  1152. #define CGEMM_DEFAULT_P 256
  1153. #define CGEMM_DEFAULT_Q 256
  1154. #define ZGEMM_DEFAULT_P 128
  1155. #define ZGEMM_DEFAULT_Q 256
  1156. #endif
  1157. #endif
  1158. #ifdef CELL
  1159. #define SNUMOPT 2
  1160. #define DNUMOPT 2
  1161. #define GEMM_DEFAULT_OFFSET_A 0
  1162. #define GEMM_DEFAULT_OFFSET_B 8192
  1163. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1164. #define SGEMM_DEFAULT_UNROLL_M 16
  1165. #define SGEMM_DEFAULT_UNROLL_N 4
  1166. #define DGEMM_DEFAULT_UNROLL_M 4
  1167. #define DGEMM_DEFAULT_UNROLL_N 4
  1168. #define CGEMM_DEFAULT_UNROLL_M 8
  1169. #define CGEMM_DEFAULT_UNROLL_N 2
  1170. #define ZGEMM_DEFAULT_UNROLL_M 2
  1171. #define ZGEMM_DEFAULT_UNROLL_N 2
  1172. #define SGEMM_DEFAULT_P 128
  1173. #define DGEMM_DEFAULT_P 128
  1174. #define CGEMM_DEFAULT_P 128
  1175. #define ZGEMM_DEFAULT_P 128
  1176. #define SGEMM_DEFAULT_Q 512
  1177. #define DGEMM_DEFAULT_Q 256
  1178. #define CGEMM_DEFAULT_Q 256
  1179. #define ZGEMM_DEFAULT_Q 128
  1180. #define SYMV_P 4
  1181. #endif
  1182. #ifdef PPCG4
  1183. #define GEMM_DEFAULT_OFFSET_A 0
  1184. #define GEMM_DEFAULT_OFFSET_B 1024
  1185. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1186. #define SGEMM_DEFAULT_UNROLL_M 16
  1187. #define SGEMM_DEFAULT_UNROLL_N 4
  1188. #define DGEMM_DEFAULT_UNROLL_M 4
  1189. #define DGEMM_DEFAULT_UNROLL_N 4
  1190. #define CGEMM_DEFAULT_UNROLL_M 8
  1191. #define CGEMM_DEFAULT_UNROLL_N 2
  1192. #define ZGEMM_DEFAULT_UNROLL_M 2
  1193. #define ZGEMM_DEFAULT_UNROLL_N 2
  1194. #define SGEMM_DEFAULT_P 256
  1195. #define DGEMM_DEFAULT_P 128
  1196. #define CGEMM_DEFAULT_P 128
  1197. #define ZGEMM_DEFAULT_P 64
  1198. #define SGEMM_DEFAULT_Q 256
  1199. #define DGEMM_DEFAULT_Q 256
  1200. #define CGEMM_DEFAULT_Q 256
  1201. #define ZGEMM_DEFAULT_Q 256
  1202. #define SYMV_P 4
  1203. #endif
  1204. #ifdef PPC970
  1205. #define SNUMOPT 4
  1206. #define DNUMOPT 4
  1207. #define GEMM_DEFAULT_OFFSET_A 2688
  1208. #define GEMM_DEFAULT_OFFSET_B 3072
  1209. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1210. #define SGEMM_DEFAULT_UNROLL_M 16
  1211. #define SGEMM_DEFAULT_UNROLL_N 4
  1212. #define DGEMM_DEFAULT_UNROLL_M 4
  1213. #define DGEMM_DEFAULT_UNROLL_N 4
  1214. #define CGEMM_DEFAULT_UNROLL_M 8
  1215. #define CGEMM_DEFAULT_UNROLL_N 2
  1216. #define ZGEMM_DEFAULT_UNROLL_M 2
  1217. #define ZGEMM_DEFAULT_UNROLL_N 2
  1218. #ifdef OS_LINUX
  1219. #if L2_SIZE == 1024976
  1220. #define SGEMM_DEFAULT_P 320
  1221. #define DGEMM_DEFAULT_P 256
  1222. #define CGEMM_DEFAULT_P 256
  1223. #define ZGEMM_DEFAULT_P 256
  1224. #else
  1225. #define SGEMM_DEFAULT_P 176
  1226. #define DGEMM_DEFAULT_P 176
  1227. #define CGEMM_DEFAULT_P 176
  1228. #define ZGEMM_DEFAULT_P 176
  1229. #endif
  1230. #endif
  1231. #define SGEMM_DEFAULT_Q 512
  1232. #define DGEMM_DEFAULT_Q 256
  1233. #define CGEMM_DEFAULT_Q 256
  1234. #define ZGEMM_DEFAULT_Q 128
  1235. #define SYMV_P 4
  1236. #endif
  1237. #ifdef PPC440
  1238. #define SNUMOPT 2
  1239. #define DNUMOPT 2
  1240. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1241. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1242. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1243. #define SGEMM_DEFAULT_UNROLL_M 4
  1244. #define SGEMM_DEFAULT_UNROLL_N 4
  1245. #define DGEMM_DEFAULT_UNROLL_M 4
  1246. #define DGEMM_DEFAULT_UNROLL_N 4
  1247. #define CGEMM_DEFAULT_UNROLL_M 2
  1248. #define CGEMM_DEFAULT_UNROLL_N 2
  1249. #define ZGEMM_DEFAULT_UNROLL_M 2
  1250. #define ZGEMM_DEFAULT_UNROLL_N 2
  1251. #define SGEMM_DEFAULT_P 512
  1252. #define DGEMM_DEFAULT_P 512
  1253. #define CGEMM_DEFAULT_P 512
  1254. #define ZGEMM_DEFAULT_P 512
  1255. #define SGEMM_DEFAULT_Q 1024
  1256. #define DGEMM_DEFAULT_Q 512
  1257. #define CGEMM_DEFAULT_Q 512
  1258. #define ZGEMM_DEFAULT_Q 256
  1259. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1260. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1261. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1262. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1263. #define SYMV_P 4
  1264. #endif
  1265. #ifdef PPC440FP2
  1266. #define SNUMOPT 4
  1267. #define DNUMOPT 4
  1268. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1269. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1270. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1271. #define SGEMM_DEFAULT_UNROLL_M 8
  1272. #define SGEMM_DEFAULT_UNROLL_N 4
  1273. #define DGEMM_DEFAULT_UNROLL_M 8
  1274. #define DGEMM_DEFAULT_UNROLL_N 4
  1275. #define CGEMM_DEFAULT_UNROLL_M 4
  1276. #define CGEMM_DEFAULT_UNROLL_N 2
  1277. #define ZGEMM_DEFAULT_UNROLL_M 4
  1278. #define ZGEMM_DEFAULT_UNROLL_N 2
  1279. #define SGEMM_DEFAULT_P 128
  1280. #define DGEMM_DEFAULT_P 128
  1281. #define CGEMM_DEFAULT_P 128
  1282. #define ZGEMM_DEFAULT_P 128
  1283. #if 1
  1284. #define SGEMM_DEFAULT_Q 4096
  1285. #define DGEMM_DEFAULT_Q 3072
  1286. #define CGEMM_DEFAULT_Q 2048
  1287. #define ZGEMM_DEFAULT_Q 1024
  1288. #else
  1289. #define SGEMM_DEFAULT_Q 512
  1290. #define DGEMM_DEFAULT_Q 256
  1291. #define CGEMM_DEFAULT_Q 256
  1292. #define ZGEMM_DEFAULT_Q 128
  1293. #endif
  1294. #define SYMV_P 4
  1295. #endif
  1296. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1297. #define GEMM_DEFAULT_OFFSET_A 0
  1298. #define GEMM_DEFAULT_OFFSET_B 2048
  1299. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1300. #define SGEMM_DEFAULT_UNROLL_M 4
  1301. #define SGEMM_DEFAULT_UNROLL_N 4
  1302. #define DGEMM_DEFAULT_UNROLL_M 4
  1303. #define DGEMM_DEFAULT_UNROLL_N 4
  1304. #define CGEMM_DEFAULT_UNROLL_M 2
  1305. #define CGEMM_DEFAULT_UNROLL_N 2
  1306. #define ZGEMM_DEFAULT_UNROLL_M 2
  1307. #define ZGEMM_DEFAULT_UNROLL_N 2
  1308. #ifdef POWER3
  1309. #define SNUMOPT 4
  1310. #define DNUMOPT 4
  1311. #define SGEMM_DEFAULT_P 256
  1312. #define SGEMM_DEFAULT_Q 432
  1313. #define SGEMM_DEFAULT_R 1012
  1314. #define DGEMM_DEFAULT_P 256
  1315. #define DGEMM_DEFAULT_Q 216
  1316. #define DGEMM_DEFAULT_R 1012
  1317. #define ZGEMM_DEFAULT_P 256
  1318. #define ZGEMM_DEFAULT_Q 104
  1319. #define ZGEMM_DEFAULT_R 1012
  1320. #endif
  1321. #if defined(POWER4)
  1322. #ifdef ALLOC_HUGETLB
  1323. #define SGEMM_DEFAULT_P 184
  1324. #define DGEMM_DEFAULT_P 184
  1325. #define CGEMM_DEFAULT_P 184
  1326. #define ZGEMM_DEFAULT_P 184
  1327. #else
  1328. #define SGEMM_DEFAULT_P 144
  1329. #define DGEMM_DEFAULT_P 144
  1330. #define CGEMM_DEFAULT_P 144
  1331. #define ZGEMM_DEFAULT_P 144
  1332. #endif
  1333. #endif
  1334. #if defined(POWER5)
  1335. #ifdef ALLOC_HUGETLB
  1336. #define SGEMM_DEFAULT_P 512
  1337. #define DGEMM_DEFAULT_P 256
  1338. #define CGEMM_DEFAULT_P 256
  1339. #define ZGEMM_DEFAULT_P 128
  1340. #else
  1341. #define SGEMM_DEFAULT_P 320
  1342. #define DGEMM_DEFAULT_P 160
  1343. #define CGEMM_DEFAULT_P 160
  1344. #define ZGEMM_DEFAULT_P 80
  1345. #endif
  1346. #define SGEMM_DEFAULT_Q 256
  1347. #define CGEMM_DEFAULT_Q 256
  1348. #define DGEMM_DEFAULT_Q 256
  1349. #define ZGEMM_DEFAULT_Q 256
  1350. #endif
  1351. #define SYMV_P 8
  1352. #endif
  1353. #if defined(POWER6)
  1354. #define SNUMOPT 4
  1355. #define DNUMOPT 4
  1356. #define GEMM_DEFAULT_OFFSET_A 384
  1357. #define GEMM_DEFAULT_OFFSET_B 1024
  1358. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1359. #define SGEMM_DEFAULT_UNROLL_M 4
  1360. #define SGEMM_DEFAULT_UNROLL_N 4
  1361. #define DGEMM_DEFAULT_UNROLL_M 4
  1362. #define DGEMM_DEFAULT_UNROLL_N 4
  1363. #define CGEMM_DEFAULT_UNROLL_M 2
  1364. #define CGEMM_DEFAULT_UNROLL_N 4
  1365. #define ZGEMM_DEFAULT_UNROLL_M 2
  1366. #define ZGEMM_DEFAULT_UNROLL_N 4
  1367. #define SGEMM_DEFAULT_P 992
  1368. #define DGEMM_DEFAULT_P 480
  1369. #define CGEMM_DEFAULT_P 488
  1370. #define ZGEMM_DEFAULT_P 248
  1371. #define SGEMM_DEFAULT_Q 504
  1372. #define DGEMM_DEFAULT_Q 504
  1373. #define CGEMM_DEFAULT_Q 400
  1374. #define ZGEMM_DEFAULT_Q 400
  1375. #define SYMV_P 8
  1376. #endif
  1377. #if defined(SPARC) && defined(V7)
  1378. #define SNUMOPT 4
  1379. #define DNUMOPT 4
  1380. #define GEMM_DEFAULT_OFFSET_A 0
  1381. #define GEMM_DEFAULT_OFFSET_B 2048
  1382. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1383. #define SGEMM_DEFAULT_UNROLL_M 2
  1384. #define SGEMM_DEFAULT_UNROLL_N 8
  1385. #define DGEMM_DEFAULT_UNROLL_M 2
  1386. #define DGEMM_DEFAULT_UNROLL_N 8
  1387. #define CGEMM_DEFAULT_UNROLL_M 1
  1388. #define CGEMM_DEFAULT_UNROLL_N 4
  1389. #define ZGEMM_DEFAULT_UNROLL_M 1
  1390. #define ZGEMM_DEFAULT_UNROLL_N 4
  1391. #define SGEMM_DEFAULT_P 256
  1392. #define DGEMM_DEFAULT_P 256
  1393. #define CGEMM_DEFAULT_P 256
  1394. #define ZGEMM_DEFAULT_P 256
  1395. #define SGEMM_DEFAULT_Q 512
  1396. #define DGEMM_DEFAULT_Q 256
  1397. #define CGEMM_DEFAULT_Q 256
  1398. #define ZGEMM_DEFAULT_Q 128
  1399. #define SYMV_P 8
  1400. #define GEMM_THREAD gemm_thread_mn
  1401. #endif
  1402. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1403. #define SNUMOPT 2
  1404. #define DNUMOPT 2
  1405. #define GEMM_DEFAULT_OFFSET_A 0
  1406. #define GEMM_DEFAULT_OFFSET_B 2048
  1407. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1408. #define SGEMM_DEFAULT_UNROLL_M 4
  1409. #define SGEMM_DEFAULT_UNROLL_N 4
  1410. #define DGEMM_DEFAULT_UNROLL_M 4
  1411. #define DGEMM_DEFAULT_UNROLL_N 4
  1412. #define CGEMM_DEFAULT_UNROLL_M 2
  1413. #define CGEMM_DEFAULT_UNROLL_N 2
  1414. #define ZGEMM_DEFAULT_UNROLL_M 2
  1415. #define ZGEMM_DEFAULT_UNROLL_N 2
  1416. #define SGEMM_DEFAULT_P 512
  1417. #define DGEMM_DEFAULT_P 512
  1418. #define CGEMM_DEFAULT_P 512
  1419. #define ZGEMM_DEFAULT_P 512
  1420. #define SGEMM_DEFAULT_Q 1024
  1421. #define DGEMM_DEFAULT_Q 512
  1422. #define CGEMM_DEFAULT_Q 512
  1423. #define ZGEMM_DEFAULT_Q 256
  1424. #define SYMV_P 8
  1425. #endif
  1426. #ifdef SICORTEX
  1427. #define SNUMOPT 2
  1428. #define DNUMOPT 2
  1429. #define GEMM_DEFAULT_OFFSET_A 0
  1430. #define GEMM_DEFAULT_OFFSET_B 0
  1431. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1432. #define SGEMM_DEFAULT_UNROLL_M 2
  1433. #define SGEMM_DEFAULT_UNROLL_N 8
  1434. #define DGEMM_DEFAULT_UNROLL_M 2
  1435. #define DGEMM_DEFAULT_UNROLL_N 8
  1436. #define CGEMM_DEFAULT_UNROLL_M 1
  1437. #define CGEMM_DEFAULT_UNROLL_N 4
  1438. #define ZGEMM_DEFAULT_UNROLL_M 1
  1439. #define ZGEMM_DEFAULT_UNROLL_N 4
  1440. #define SGEMM_DEFAULT_P 108
  1441. #define DGEMM_DEFAULT_P 112
  1442. #define CGEMM_DEFAULT_P 108
  1443. #define ZGEMM_DEFAULT_P 112
  1444. #define SGEMM_DEFAULT_Q 288
  1445. #define DGEMM_DEFAULT_Q 144
  1446. #define CGEMM_DEFAULT_Q 144
  1447. #define ZGEMM_DEFAULT_Q 72
  1448. #define SGEMM_DEFAULT_R 2000
  1449. #define DGEMM_DEFAULT_R 2000
  1450. #define CGEMM_DEFAULT_R 2000
  1451. #define ZGEMM_DEFAULT_R 2000
  1452. #define SYMV_P 16
  1453. #endif
  1454. #ifdef LOONGSON3A
  1455. ////Copy from SICORTEX
  1456. #define SNUMOPT 2
  1457. #define DNUMOPT 2
  1458. #define GEMM_DEFAULT_OFFSET_A 0
  1459. #define GEMM_DEFAULT_OFFSET_B 0
  1460. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1461. #define SGEMM_DEFAULT_UNROLL_M 8
  1462. #define SGEMM_DEFAULT_UNROLL_N 4
  1463. #define DGEMM_DEFAULT_UNROLL_M 4
  1464. #define DGEMM_DEFAULT_UNROLL_N 4
  1465. #define CGEMM_DEFAULT_UNROLL_M 4
  1466. #define CGEMM_DEFAULT_UNROLL_N 2
  1467. #define ZGEMM_DEFAULT_UNROLL_M 2
  1468. #define ZGEMM_DEFAULT_UNROLL_N 2
  1469. #define SGEMM_DEFAULT_P 64
  1470. #define DGEMM_DEFAULT_P 44
  1471. #define CGEMM_DEFAULT_P 64
  1472. #define ZGEMM_DEFAULT_P 32
  1473. #define SGEMM_DEFAULT_Q 192
  1474. #define DGEMM_DEFAULT_Q 92
  1475. #define CGEMM_DEFAULT_Q 128
  1476. #define ZGEMM_DEFAULT_Q 80
  1477. #define SGEMM_DEFAULT_R 640
  1478. #define DGEMM_DEFAULT_R dgemm_r
  1479. #define CGEMM_DEFAULT_R 640
  1480. #define ZGEMM_DEFAULT_R 640
  1481. #define GEMM_OFFSET_A1 0x10000
  1482. #define GEMM_OFFSET_B1 0x100000
  1483. #define SYMV_P 16
  1484. #endif
  1485. #ifdef LOONGSON3B
  1486. #define SNUMOPT 2
  1487. #define DNUMOPT 2
  1488. #define GEMM_DEFAULT_OFFSET_A 0
  1489. #define GEMM_DEFAULT_OFFSET_B 0
  1490. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1491. #define SGEMM_DEFAULT_UNROLL_M 2
  1492. #define SGEMM_DEFAULT_UNROLL_N 2
  1493. #define DGEMM_DEFAULT_UNROLL_M 2
  1494. #define DGEMM_DEFAULT_UNROLL_N 2
  1495. #define CGEMM_DEFAULT_UNROLL_M 2
  1496. #define CGEMM_DEFAULT_UNROLL_N 2
  1497. #define ZGEMM_DEFAULT_UNROLL_M 2
  1498. #define ZGEMM_DEFAULT_UNROLL_N 2
  1499. #define SGEMM_DEFAULT_P 64
  1500. #define DGEMM_DEFAULT_P 24
  1501. #define CGEMM_DEFAULT_P 24
  1502. #define ZGEMM_DEFAULT_P 20
  1503. #define SGEMM_DEFAULT_Q 192
  1504. #define DGEMM_DEFAULT_Q 128
  1505. #define CGEMM_DEFAULT_Q 128
  1506. #define ZGEMM_DEFAULT_Q 64
  1507. #define SGEMM_DEFAULT_R 512
  1508. #define DGEMM_DEFAULT_R 512
  1509. #define CGEMM_DEFAULT_R 512
  1510. #define ZGEMM_DEFAULT_R 512
  1511. #define GEMM_OFFSET_A1 0x10000
  1512. #define GEMM_OFFSET_B1 0x100000
  1513. #define SYMV_P 16
  1514. #endif
  1515. #ifdef ARMV7
  1516. #define SNUMOPT 2
  1517. #define DNUMOPT 2
  1518. #define GEMM_DEFAULT_OFFSET_A 0
  1519. #define GEMM_DEFAULT_OFFSET_B 0
  1520. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1521. #define SGEMM_DEFAULT_UNROLL_M 4
  1522. #define SGEMM_DEFAULT_UNROLL_N 4
  1523. #define DGEMM_DEFAULT_UNROLL_M 4
  1524. #define DGEMM_DEFAULT_UNROLL_N 4
  1525. #define CGEMM_DEFAULT_UNROLL_M 2
  1526. #define CGEMM_DEFAULT_UNROLL_N 2
  1527. #define ZGEMM_DEFAULT_UNROLL_M 2
  1528. #define ZGEMM_DEFAULT_UNROLL_N 2
  1529. #define SGEMM_DEFAULT_P 128
  1530. #define DGEMM_DEFAULT_P 128
  1531. #define CGEMM_DEFAULT_P 96
  1532. #define ZGEMM_DEFAULT_P 64
  1533. #define SGEMM_DEFAULT_Q 240
  1534. #define DGEMM_DEFAULT_Q 120
  1535. #define CGEMM_DEFAULT_Q 120
  1536. #define ZGEMM_DEFAULT_Q 120
  1537. #define SGEMM_DEFAULT_R 12288
  1538. #define DGEMM_DEFAULT_R 8192
  1539. #define CGEMM_DEFAULT_R 4096
  1540. #define ZGEMM_DEFAULT_R 4096
  1541. #define SYMV_P 16
  1542. #endif
  1543. #if defined(ARMV6)
  1544. #define SNUMOPT 2
  1545. #define DNUMOPT 2
  1546. #define GEMM_DEFAULT_OFFSET_A 0
  1547. #define GEMM_DEFAULT_OFFSET_B 0
  1548. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1549. #define SGEMM_DEFAULT_UNROLL_M 4
  1550. #define SGEMM_DEFAULT_UNROLL_N 2
  1551. #define DGEMM_DEFAULT_UNROLL_M 4
  1552. #define DGEMM_DEFAULT_UNROLL_N 2
  1553. #define CGEMM_DEFAULT_UNROLL_M 2
  1554. #define CGEMM_DEFAULT_UNROLL_N 2
  1555. #define ZGEMM_DEFAULT_UNROLL_M 2
  1556. #define ZGEMM_DEFAULT_UNROLL_N 2
  1557. #define SGEMM_DEFAULT_P 128
  1558. #define DGEMM_DEFAULT_P 128
  1559. #define CGEMM_DEFAULT_P 96
  1560. #define ZGEMM_DEFAULT_P 64
  1561. #define SGEMM_DEFAULT_Q 240
  1562. #define DGEMM_DEFAULT_Q 120
  1563. #define CGEMM_DEFAULT_Q 120
  1564. #define ZGEMM_DEFAULT_Q 120
  1565. #define SGEMM_DEFAULT_R 12288
  1566. #define DGEMM_DEFAULT_R 8192
  1567. #define CGEMM_DEFAULT_R 4096
  1568. #define ZGEMM_DEFAULT_R 4096
  1569. #define SYMV_P 16
  1570. #endif
  1571. #if defined(ARMV8)
  1572. #define SNUMOPT 2
  1573. #define DNUMOPT 2
  1574. #define GEMM_DEFAULT_OFFSET_A 0
  1575. #define GEMM_DEFAULT_OFFSET_B 0
  1576. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1577. #define SGEMM_DEFAULT_UNROLL_M 2
  1578. #define SGEMM_DEFAULT_UNROLL_N 2
  1579. #define DGEMM_DEFAULT_UNROLL_M 2
  1580. #define DGEMM_DEFAULT_UNROLL_N 2
  1581. #define CGEMM_DEFAULT_UNROLL_M 2
  1582. #define CGEMM_DEFAULT_UNROLL_N 2
  1583. #define ZGEMM_DEFAULT_UNROLL_M 2
  1584. #define ZGEMM_DEFAULT_UNROLL_N 2
  1585. #define SGEMM_DEFAULT_P 128
  1586. #define DGEMM_DEFAULT_P 128
  1587. #define CGEMM_DEFAULT_P 96
  1588. #define ZGEMM_DEFAULT_P 64
  1589. #define SGEMM_DEFAULT_Q 240
  1590. #define DGEMM_DEFAULT_Q 120
  1591. #define CGEMM_DEFAULT_Q 120
  1592. #define ZGEMM_DEFAULT_Q 120
  1593. #define SGEMM_DEFAULT_R 12288
  1594. #define DGEMM_DEFAULT_R 8192
  1595. #define CGEMM_DEFAULT_R 4096
  1596. #define ZGEMM_DEFAULT_R 4096
  1597. #define SYMV_P 16
  1598. #endif
  1599. #if defined(ARMV5)
  1600. #define SNUMOPT 2
  1601. #define DNUMOPT 2
  1602. #define GEMM_DEFAULT_OFFSET_A 0
  1603. #define GEMM_DEFAULT_OFFSET_B 0
  1604. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1605. #define SGEMM_DEFAULT_UNROLL_M 2
  1606. #define SGEMM_DEFAULT_UNROLL_N 2
  1607. #define DGEMM_DEFAULT_UNROLL_M 2
  1608. #define DGEMM_DEFAULT_UNROLL_N 2
  1609. #define CGEMM_DEFAULT_UNROLL_M 2
  1610. #define CGEMM_DEFAULT_UNROLL_N 2
  1611. #define ZGEMM_DEFAULT_UNROLL_M 2
  1612. #define ZGEMM_DEFAULT_UNROLL_N 2
  1613. #define SGEMM_DEFAULT_P 128
  1614. #define DGEMM_DEFAULT_P 128
  1615. #define CGEMM_DEFAULT_P 96
  1616. #define ZGEMM_DEFAULT_P 64
  1617. #define SGEMM_DEFAULT_Q 240
  1618. #define DGEMM_DEFAULT_Q 120
  1619. #define CGEMM_DEFAULT_Q 120
  1620. #define ZGEMM_DEFAULT_Q 120
  1621. #define SGEMM_DEFAULT_R 12288
  1622. #define DGEMM_DEFAULT_R 8192
  1623. #define CGEMM_DEFAULT_R 4096
  1624. #define ZGEMM_DEFAULT_R 4096
  1625. #define SYMV_P 16
  1626. #endif
  1627. #ifdef GENERIC
  1628. #define SNUMOPT 2
  1629. #define DNUMOPT 2
  1630. #define GEMM_DEFAULT_OFFSET_A 0
  1631. #define GEMM_DEFAULT_OFFSET_B 0
  1632. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1633. #define SGEMM_DEFAULT_UNROLL_N 4
  1634. #define DGEMM_DEFAULT_UNROLL_N 4
  1635. #define QGEMM_DEFAULT_UNROLL_N 2
  1636. #define CGEMM_DEFAULT_UNROLL_N 2
  1637. #define ZGEMM_DEFAULT_UNROLL_N 2
  1638. #define XGEMM_DEFAULT_UNROLL_N 1
  1639. #ifdef ARCH_X86
  1640. #define SGEMM_DEFAULT_UNROLL_M 4
  1641. #define DGEMM_DEFAULT_UNROLL_M 2
  1642. #define QGEMM_DEFAULT_UNROLL_M 2
  1643. #define CGEMM_DEFAULT_UNROLL_M 2
  1644. #define ZGEMM_DEFAULT_UNROLL_M 1
  1645. #define XGEMM_DEFAULT_UNROLL_M 1
  1646. #else
  1647. #define SGEMM_DEFAULT_UNROLL_M 8
  1648. #define DGEMM_DEFAULT_UNROLL_M 4
  1649. #define QGEMM_DEFAULT_UNROLL_M 2
  1650. #define CGEMM_DEFAULT_UNROLL_M 4
  1651. #define ZGEMM_DEFAULT_UNROLL_M 2
  1652. #define XGEMM_DEFAULT_UNROLL_M 1
  1653. #endif
  1654. #define SGEMM_DEFAULT_P sgemm_p
  1655. #define DGEMM_DEFAULT_P dgemm_p
  1656. #define QGEMM_DEFAULT_P qgemm_p
  1657. #define CGEMM_DEFAULT_P cgemm_p
  1658. #define ZGEMM_DEFAULT_P zgemm_p
  1659. #define XGEMM_DEFAULT_P xgemm_p
  1660. #define SGEMM_DEFAULT_R sgemm_r
  1661. #define DGEMM_DEFAULT_R dgemm_r
  1662. #define QGEMM_DEFAULT_R qgemm_r
  1663. #define CGEMM_DEFAULT_R cgemm_r
  1664. #define ZGEMM_DEFAULT_R zgemm_r
  1665. #define XGEMM_DEFAULT_R xgemm_r
  1666. #define SGEMM_DEFAULT_Q 128
  1667. #define DGEMM_DEFAULT_Q 128
  1668. #define QGEMM_DEFAULT_Q 128
  1669. #define CGEMM_DEFAULT_Q 128
  1670. #define ZGEMM_DEFAULT_Q 128
  1671. #define XGEMM_DEFAULT_Q 128
  1672. #define SYMV_P 16
  1673. #endif
  1674. #ifndef QGEMM_DEFAULT_UNROLL_M
  1675. #define QGEMM_DEFAULT_UNROLL_M 2
  1676. #endif
  1677. #ifndef QGEMM_DEFAULT_UNROLL_N
  1678. #define QGEMM_DEFAULT_UNROLL_N 2
  1679. #endif
  1680. #ifndef XGEMM_DEFAULT_UNROLL_M
  1681. #define XGEMM_DEFAULT_UNROLL_M 2
  1682. #endif
  1683. #ifndef XGEMM_DEFAULT_UNROLL_N
  1684. #define XGEMM_DEFAULT_UNROLL_N 2
  1685. #endif
  1686. #ifndef HAVE_SSE2
  1687. #define SHUFPD_0 shufps $0x44,
  1688. #define SHUFPD_1 shufps $0x4e,
  1689. #define SHUFPD_2 shufps $0xe4,
  1690. #define SHUFPD_3 shufps $0xee,
  1691. #endif
  1692. #ifndef SHUFPD_0
  1693. #define SHUFPD_0 shufpd $0,
  1694. #endif
  1695. #ifndef SHUFPD_1
  1696. #define SHUFPD_1 shufpd $1,
  1697. #endif
  1698. #ifndef SHUFPD_2
  1699. #define SHUFPD_2 shufpd $2,
  1700. #endif
  1701. #ifndef SHUFPD_3
  1702. #define SHUFPD_3 shufpd $3,
  1703. #endif
  1704. #ifndef SHUFPS_39
  1705. #define SHUFPS_39 shufps $0x39,
  1706. #endif
  1707. #endif