You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 42 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761
  1. /*****************************************************************************
  2. Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the ISCAS nor the names of its contributors may
  14. be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************************/
  27. /*********************************************************************/
  28. /* Copyright 2009, 2010 The University of Texas at Austin. */
  29. /* All rights reserved. */
  30. /* */
  31. /* Redistribution and use in source and binary forms, with or */
  32. /* without modification, are permitted provided that the following */
  33. /* conditions are met: */
  34. /* */
  35. /* 1. Redistributions of source code must retain the above */
  36. /* copyright notice, this list of conditions and the following */
  37. /* disclaimer. */
  38. /* */
  39. /* 2. Redistributions in binary form must reproduce the above */
  40. /* copyright notice, this list of conditions and the following */
  41. /* disclaimer in the documentation and/or other materials */
  42. /* provided with the distribution. */
  43. /* */
  44. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  45. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  46. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  47. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  48. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  49. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  50. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  51. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  52. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  53. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  54. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  55. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  56. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  57. /* POSSIBILITY OF SUCH DAMAGE. */
  58. /* */
  59. /* The views and conclusions contained in the software and */
  60. /* documentation are those of the authors and should not be */
  61. /* interpreted as representing official policies, either expressed */
  62. /* or implied, of The University of Texas at Austin. */
  63. /*********************************************************************/
  64. #ifndef PARAM_H
  65. #define PARAM_H
  66. #ifdef OPTERON
  67. #define SNUMOPT 4
  68. #define DNUMOPT 2
  69. #define GEMM_DEFAULT_OFFSET_A 64
  70. #define GEMM_DEFAULT_OFFSET_B 256
  71. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  72. #define SGEMM_DEFAULT_UNROLL_N 4
  73. #define DGEMM_DEFAULT_UNROLL_N 4
  74. #define QGEMM_DEFAULT_UNROLL_N 2
  75. #define CGEMM_DEFAULT_UNROLL_N 2
  76. #define ZGEMM_DEFAULT_UNROLL_N 2
  77. #define XGEMM_DEFAULT_UNROLL_N 1
  78. #ifdef ARCH_X86
  79. #define SGEMM_DEFAULT_UNROLL_M 4
  80. #define DGEMM_DEFAULT_UNROLL_M 2
  81. #define QGEMM_DEFAULT_UNROLL_M 2
  82. #define CGEMM_DEFAULT_UNROLL_M 2
  83. #define ZGEMM_DEFAULT_UNROLL_M 1
  84. #define XGEMM_DEFAULT_UNROLL_M 1
  85. #else
  86. #define SGEMM_DEFAULT_UNROLL_M 8
  87. #define DGEMM_DEFAULT_UNROLL_M 4
  88. #define QGEMM_DEFAULT_UNROLL_M 2
  89. #define CGEMM_DEFAULT_UNROLL_M 4
  90. #define ZGEMM_DEFAULT_UNROLL_M 2
  91. #define XGEMM_DEFAULT_UNROLL_M 1
  92. #endif
  93. #define SGEMM_DEFAULT_P sgemm_p
  94. #define DGEMM_DEFAULT_P dgemm_p
  95. #define QGEMM_DEFAULT_P qgemm_p
  96. #define CGEMM_DEFAULT_P cgemm_p
  97. #define ZGEMM_DEFAULT_P zgemm_p
  98. #define XGEMM_DEFAULT_P xgemm_p
  99. #define SGEMM_DEFAULT_R sgemm_r
  100. #define DGEMM_DEFAULT_R dgemm_r
  101. #define QGEMM_DEFAULT_R qgemm_r
  102. #define CGEMM_DEFAULT_R cgemm_r
  103. #define ZGEMM_DEFAULT_R zgemm_r
  104. #define XGEMM_DEFAULT_R xgemm_r
  105. #ifdef ALLOC_HUGETLB
  106. #define SGEMM_DEFAULT_Q 248
  107. #define DGEMM_DEFAULT_Q 248
  108. #define QGEMM_DEFAULT_Q 248
  109. #define CGEMM_DEFAULT_Q 248
  110. #define ZGEMM_DEFAULT_Q 248
  111. #define XGEMM_DEFAULT_Q 248
  112. #else
  113. #define SGEMM_DEFAULT_Q 240
  114. #define DGEMM_DEFAULT_Q 240
  115. #define QGEMM_DEFAULT_Q 240
  116. #define CGEMM_DEFAULT_Q 240
  117. #define ZGEMM_DEFAULT_Q 240
  118. #define XGEMM_DEFAULT_Q 240
  119. #endif
  120. #define SYMV_P 16
  121. #define HAVE_EXCLUSIVE_CACHE
  122. #endif
  123. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
  124. #define SNUMOPT 8
  125. #define DNUMOPT 4
  126. #define GEMM_DEFAULT_OFFSET_A 64
  127. #define GEMM_DEFAULT_OFFSET_B 832
  128. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  129. #if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(COMPLEX)
  130. #define SGEMM_DEFAULT_UNROLL_N 2
  131. #define DGEMM_DEFAULT_UNROLL_N 2
  132. #else
  133. #define SGEMM_DEFAULT_UNROLL_N 4
  134. #define DGEMM_DEFAULT_UNROLL_N 4
  135. #endif
  136. #define QGEMM_DEFAULT_UNROLL_N 2
  137. #define CGEMM_DEFAULT_UNROLL_N 2
  138. #define ZGEMM_DEFAULT_UNROLL_N 2
  139. #define XGEMM_DEFAULT_UNROLL_N 1
  140. #ifdef ARCH_X86
  141. #define SGEMM_DEFAULT_UNROLL_M 4
  142. #define DGEMM_DEFAULT_UNROLL_M 4
  143. #define QGEMM_DEFAULT_UNROLL_M 2
  144. #define CGEMM_DEFAULT_UNROLL_M 2
  145. #define ZGEMM_DEFAULT_UNROLL_M 1
  146. #define XGEMM_DEFAULT_UNROLL_M 1
  147. #else
  148. #if defined(BULLDOZER) && !defined(COMPLEX)
  149. #define SGEMM_DEFAULT_UNROLL_M 16
  150. #define DGEMM_DEFAULT_UNROLL_M 8
  151. #else
  152. #define SGEMM_DEFAULT_UNROLL_M 8
  153. #define DGEMM_DEFAULT_UNROLL_M 4
  154. #endif
  155. #define QGEMM_DEFAULT_UNROLL_M 2
  156. #define CGEMM_DEFAULT_UNROLL_M 4
  157. #define ZGEMM_DEFAULT_UNROLL_M 2
  158. #define XGEMM_DEFAULT_UNROLL_M 1
  159. #define CGEMM3M_DEFAULT_UNROLL_N 4
  160. #define CGEMM3M_DEFAULT_UNROLL_M 8
  161. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  162. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  163. #define GEMV_UNROLL 8
  164. #endif
  165. #if 0
  166. #define SGEMM_DEFAULT_P 496
  167. #define DGEMM_DEFAULT_P 248
  168. #define QGEMM_DEFAULT_P 124
  169. #define CGEMM_DEFAULT_P 248
  170. #define ZGEMM_DEFAULT_P 124
  171. #define XGEMM_DEFAULT_P 62
  172. #define SGEMM_DEFAULT_Q 248
  173. #define DGEMM_DEFAULT_Q 248
  174. #define QGEMM_DEFAULT_Q 248
  175. #define CGEMM_DEFAULT_Q 248
  176. #define ZGEMM_DEFAULT_Q 248
  177. #define XGEMM_DEFAULT_Q 248
  178. #else
  179. #if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(COMPLEX)
  180. #define SGEMM_DEFAULT_P 768
  181. #define DGEMM_DEFAULT_P 384
  182. #else
  183. #define SGEMM_DEFAULT_P 448
  184. #define DGEMM_DEFAULT_P 224
  185. #endif
  186. #define QGEMM_DEFAULT_P 112
  187. #define CGEMM_DEFAULT_P 224
  188. #define ZGEMM_DEFAULT_P 112
  189. #define XGEMM_DEFAULT_P 56
  190. #if defined(BULLDOZER) && defined(ARCH_X86_64) && !defined(COMPLEX)
  191. #define SGEMM_DEFAULT_Q 168
  192. #define DGEMM_DEFAULT_Q 168
  193. #else
  194. #define SGEMM_DEFAULT_Q 224
  195. #define DGEMM_DEFAULT_Q 224
  196. #endif
  197. #define QGEMM_DEFAULT_Q 224
  198. #define CGEMM_DEFAULT_Q 224
  199. #define ZGEMM_DEFAULT_Q 224
  200. #define XGEMM_DEFAULT_Q 224
  201. #endif
  202. #define SGEMM_DEFAULT_R sgemm_r
  203. #define QGEMM_DEFAULT_R qgemm_r
  204. #define DGEMM_DEFAULT_R dgemm_r
  205. #define CGEMM_DEFAULT_R cgemm_r
  206. #define ZGEMM_DEFAULT_R zgemm_r
  207. #define XGEMM_DEFAULT_R xgemm_r
  208. #define SYMV_P 16
  209. #define HAVE_EXCLUSIVE_CACHE
  210. #define GEMM_THREAD gemm_thread_mn
  211. #endif
  212. #ifdef ATHLON
  213. #define SNUMOPT 4
  214. #define DNUMOPT 2
  215. #define GEMM_DEFAULT_OFFSET_A 0
  216. #define GEMM_DEFAULT_OFFSET_B 384
  217. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  218. #define SGEMM_DEFAULT_UNROLL_N 4
  219. #define DGEMM_DEFAULT_UNROLL_N 4
  220. #define QGEMM_DEFAULT_UNROLL_N 2
  221. #define CGEMM_DEFAULT_UNROLL_N 2
  222. #define ZGEMM_DEFAULT_UNROLL_N 2
  223. #define XGEMM_DEFAULT_UNROLL_N 1
  224. #define SGEMM_DEFAULT_UNROLL_M 2
  225. #define DGEMM_DEFAULT_UNROLL_M 1
  226. #define QGEMM_DEFAULT_UNROLL_M 2
  227. #define CGEMM_DEFAULT_UNROLL_M 1
  228. #define ZGEMM_DEFAULT_UNROLL_M 1
  229. #define XGEMM_DEFAULT_UNROLL_M 1
  230. #define SGEMM_DEFAULT_R sgemm_r
  231. #define DGEMM_DEFAULT_R dgemm_r
  232. #define QGEMM_DEFAULT_R qgemm_r
  233. #define CGEMM_DEFAULT_R cgemm_r
  234. #define ZGEMM_DEFAULT_R zgemm_r
  235. #define XGEMM_DEFAULT_R xgemm_r
  236. #define SGEMM_DEFAULT_P 208
  237. #define DGEMM_DEFAULT_P 104
  238. #define QGEMM_DEFAULT_P 56
  239. #define CGEMM_DEFAULT_P 104
  240. #define ZGEMM_DEFAULT_P 56
  241. #define XGEMM_DEFAULT_P 28
  242. #define SGEMM_DEFAULT_Q 208
  243. #define DGEMM_DEFAULT_Q 208
  244. #define QGEMM_DEFAULT_Q 208
  245. #define CGEMM_DEFAULT_Q 208
  246. #define ZGEMM_DEFAULT_Q 208
  247. #define XGEMM_DEFAULT_Q 208
  248. #define SYMV_P 16
  249. #define HAVE_EXCLUSIVE_CACHE
  250. #endif
  251. #ifdef VIAC3
  252. #define SNUMOPT 2
  253. #define DNUMOPT 1
  254. #define GEMM_DEFAULT_OFFSET_A 0
  255. #define GEMM_DEFAULT_OFFSET_B 256
  256. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  257. #define SGEMM_DEFAULT_UNROLL_N 4
  258. #define DGEMM_DEFAULT_UNROLL_N 4
  259. #define QGEMM_DEFAULT_UNROLL_N 2
  260. #define CGEMM_DEFAULT_UNROLL_N 2
  261. #define ZGEMM_DEFAULT_UNROLL_N 2
  262. #define XGEMM_DEFAULT_UNROLL_N 1
  263. #define SGEMM_DEFAULT_UNROLL_M 2
  264. #define DGEMM_DEFAULT_UNROLL_M 1
  265. #define QGEMM_DEFAULT_UNROLL_M 2
  266. #define CGEMM_DEFAULT_UNROLL_M 1
  267. #define ZGEMM_DEFAULT_UNROLL_M 1
  268. #define XGEMM_DEFAULT_UNROLL_M 1
  269. #define SGEMM_DEFAULT_R sgemm_r
  270. #define DGEMM_DEFAULT_R dgemm_r
  271. #define QGEMM_DEFAULT_R qgemm_r
  272. #define CGEMM_DEFAULT_R cgemm_r
  273. #define ZGEMM_DEFAULT_R zgemm_r
  274. #define XGEMM_DEFAULT_R xgemm_r
  275. #define SGEMM_DEFAULT_P 128
  276. #define DGEMM_DEFAULT_P 128
  277. #define QGEMM_DEFAULT_P 128
  278. #define CGEMM_DEFAULT_P 128
  279. #define ZGEMM_DEFAULT_P 128
  280. #define XGEMM_DEFAULT_P 128
  281. #define SGEMM_DEFAULT_Q 512
  282. #define DGEMM_DEFAULT_Q 256
  283. #define QGEMM_DEFAULT_Q 256
  284. #define CGEMM_DEFAULT_Q 256
  285. #define ZGEMM_DEFAULT_Q 128
  286. #define XGEMM_DEFAULT_Q 128
  287. #define SYMV_P 16
  288. #endif
  289. #ifdef NANO
  290. #define SNUMOPT 4
  291. #define DNUMOPT 2
  292. #define GEMM_DEFAULT_OFFSET_A 64
  293. #define GEMM_DEFAULT_OFFSET_B 256
  294. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  295. #ifdef ARCH_X86
  296. #define SGEMM_DEFAULT_UNROLL_N 4
  297. #define DGEMM_DEFAULT_UNROLL_N 4
  298. #define QGEMM_DEFAULT_UNROLL_N 2
  299. #define CGEMM_DEFAULT_UNROLL_N 2
  300. #define ZGEMM_DEFAULT_UNROLL_N 2
  301. #define XGEMM_DEFAULT_UNROLL_N 1
  302. #define SGEMM_DEFAULT_UNROLL_M 4
  303. #define DGEMM_DEFAULT_UNROLL_M 2
  304. #define QGEMM_DEFAULT_UNROLL_M 2
  305. #define CGEMM_DEFAULT_UNROLL_M 2
  306. #define ZGEMM_DEFAULT_UNROLL_M 1
  307. #define XGEMM_DEFAULT_UNROLL_M 1
  308. #else
  309. #define SGEMM_DEFAULT_UNROLL_N 8
  310. #define DGEMM_DEFAULT_UNROLL_N 4
  311. #define QGEMM_DEFAULT_UNROLL_N 2
  312. #define CGEMM_DEFAULT_UNROLL_N 4
  313. #define ZGEMM_DEFAULT_UNROLL_N 2
  314. #define XGEMM_DEFAULT_UNROLL_N 1
  315. #define SGEMM_DEFAULT_UNROLL_M 4
  316. #define DGEMM_DEFAULT_UNROLL_M 4
  317. #define QGEMM_DEFAULT_UNROLL_M 2
  318. #define CGEMM_DEFAULT_UNROLL_M 2
  319. #define ZGEMM_DEFAULT_UNROLL_M 2
  320. #define XGEMM_DEFAULT_UNROLL_M 1
  321. #endif
  322. #define SGEMM_DEFAULT_P 288
  323. #define DGEMM_DEFAULT_P 288
  324. #define QGEMM_DEFAULT_P 288
  325. #define CGEMM_DEFAULT_P 288
  326. #define ZGEMM_DEFAULT_P 288
  327. #define XGEMM_DEFAULT_P 288
  328. #define SGEMM_DEFAULT_R sgemm_r
  329. #define DGEMM_DEFAULT_R dgemm_r
  330. #define QGEMM_DEFAULT_R qgemm_r
  331. #define CGEMM_DEFAULT_R cgemm_r
  332. #define ZGEMM_DEFAULT_R zgemm_r
  333. #define XGEMM_DEFAULT_R xgemm_r
  334. #define SGEMM_DEFAULT_Q 256
  335. #define DGEMM_DEFAULT_Q 128
  336. #define QGEMM_DEFAULT_Q 64
  337. #define CGEMM_DEFAULT_Q 128
  338. #define ZGEMM_DEFAULT_Q 64
  339. #define XGEMM_DEFAULT_Q 32
  340. #define SYMV_P 16
  341. #define HAVE_EXCLUSIVE_CACHE
  342. #endif
  343. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  344. #ifdef HAVE_SSE
  345. #define SNUMOPT 2
  346. #else
  347. #define SNUMOPT 1
  348. #endif
  349. #define DNUMOPT 1
  350. #define GEMM_DEFAULT_OFFSET_A 0
  351. #define GEMM_DEFAULT_OFFSET_B 0
  352. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  353. #ifdef HAVE_SSE
  354. #define SGEMM_DEFAULT_UNROLL_M 8
  355. #define CGEMM_DEFAULT_UNROLL_M 4
  356. #else
  357. #define SGEMM_DEFAULT_UNROLL_M 4
  358. #define CGEMM_DEFAULT_UNROLL_M 2
  359. #endif
  360. #define DGEMM_DEFAULT_UNROLL_M 2
  361. #define SGEMM_DEFAULT_UNROLL_N 2
  362. #define DGEMM_DEFAULT_UNROLL_N 2
  363. #define QGEMM_DEFAULT_UNROLL_M 2
  364. #define QGEMM_DEFAULT_UNROLL_N 2
  365. #define CGEMM_DEFAULT_UNROLL_N 1
  366. #define ZGEMM_DEFAULT_UNROLL_M 1
  367. #define ZGEMM_DEFAULT_UNROLL_N 1
  368. #define XGEMM_DEFAULT_UNROLL_M 1
  369. #define XGEMM_DEFAULT_UNROLL_N 1
  370. #define SGEMM_DEFAULT_P sgemm_p
  371. #define SGEMM_DEFAULT_Q 256
  372. #define SGEMM_DEFAULT_R sgemm_r
  373. #define DGEMM_DEFAULT_P dgemm_p
  374. #define DGEMM_DEFAULT_Q 256
  375. #define DGEMM_DEFAULT_R dgemm_r
  376. #define QGEMM_DEFAULT_P qgemm_p
  377. #define QGEMM_DEFAULT_Q 256
  378. #define QGEMM_DEFAULT_R qgemm_r
  379. #define CGEMM_DEFAULT_P cgemm_p
  380. #define CGEMM_DEFAULT_Q 256
  381. #define CGEMM_DEFAULT_R cgemm_r
  382. #define ZGEMM_DEFAULT_P zgemm_p
  383. #define ZGEMM_DEFAULT_Q 256
  384. #define ZGEMM_DEFAULT_R zgemm_r
  385. #define XGEMM_DEFAULT_P xgemm_p
  386. #define XGEMM_DEFAULT_Q 256
  387. #define XGEMM_DEFAULT_R xgemm_r
  388. #define SYMV_P 4
  389. #endif
  390. #ifdef PENTIUMM
  391. #define SNUMOPT 2
  392. #define DNUMOPT 1
  393. #define GEMM_DEFAULT_OFFSET_A 0
  394. #define GEMM_DEFAULT_OFFSET_B 0
  395. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  396. #ifdef CORE_YONAH
  397. #define SGEMM_DEFAULT_UNROLL_M 4
  398. #define SGEMM_DEFAULT_UNROLL_N 4
  399. #define DGEMM_DEFAULT_UNROLL_M 2
  400. #define DGEMM_DEFAULT_UNROLL_N 4
  401. #define QGEMM_DEFAULT_UNROLL_M 2
  402. #define QGEMM_DEFAULT_UNROLL_N 2
  403. #define CGEMM_DEFAULT_UNROLL_M 2
  404. #define CGEMM_DEFAULT_UNROLL_N 2
  405. #define ZGEMM_DEFAULT_UNROLL_M 1
  406. #define ZGEMM_DEFAULT_UNROLL_N 2
  407. #define XGEMM_DEFAULT_UNROLL_M 1
  408. #define XGEMM_DEFAULT_UNROLL_N 1
  409. #else
  410. #define SGEMM_DEFAULT_UNROLL_M 8
  411. #define SGEMM_DEFAULT_UNROLL_N 2
  412. #define DGEMM_DEFAULT_UNROLL_M 2
  413. #define DGEMM_DEFAULT_UNROLL_N 2
  414. #define QGEMM_DEFAULT_UNROLL_M 2
  415. #define QGEMM_DEFAULT_UNROLL_N 2
  416. #define CGEMM_DEFAULT_UNROLL_M 4
  417. #define CGEMM_DEFAULT_UNROLL_N 1
  418. #define ZGEMM_DEFAULT_UNROLL_M 1
  419. #define ZGEMM_DEFAULT_UNROLL_N 1
  420. #define XGEMM_DEFAULT_UNROLL_M 1
  421. #define XGEMM_DEFAULT_UNROLL_N 1
  422. #endif
  423. #define SGEMM_DEFAULT_P sgemm_p
  424. #define SGEMM_DEFAULT_Q 256
  425. #define SGEMM_DEFAULT_R sgemm_r
  426. #define DGEMM_DEFAULT_P dgemm_p
  427. #define DGEMM_DEFAULT_Q 256
  428. #define DGEMM_DEFAULT_R dgemm_r
  429. #define QGEMM_DEFAULT_P qgemm_p
  430. #define QGEMM_DEFAULT_Q 256
  431. #define QGEMM_DEFAULT_R qgemm_r
  432. #define CGEMM_DEFAULT_P cgemm_p
  433. #define CGEMM_DEFAULT_Q 256
  434. #define CGEMM_DEFAULT_R cgemm_r
  435. #define ZGEMM_DEFAULT_P zgemm_p
  436. #define ZGEMM_DEFAULT_Q 256
  437. #define ZGEMM_DEFAULT_R zgemm_r
  438. #define XGEMM_DEFAULT_P xgemm_p
  439. #define XGEMM_DEFAULT_Q 256
  440. #define XGEMM_DEFAULT_R xgemm_r
  441. #define SYMV_P 4
  442. #endif
  443. #ifdef CORE_NORTHWOOD
  444. #define SNUMOPT 4
  445. #define DNUMOPT 2
  446. #define GEMM_DEFAULT_OFFSET_A 0
  447. #define GEMM_DEFAULT_OFFSET_B 32
  448. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  449. #define SYMV_P 8
  450. #define SGEMM_DEFAULT_UNROLL_M 8
  451. #define DGEMM_DEFAULT_UNROLL_M 4
  452. #define QGEMM_DEFAULT_UNROLL_M 2
  453. #define CGEMM_DEFAULT_UNROLL_M 4
  454. #define ZGEMM_DEFAULT_UNROLL_M 2
  455. #define XGEMM_DEFAULT_UNROLL_M 1
  456. #define SGEMM_DEFAULT_UNROLL_N 2
  457. #define DGEMM_DEFAULT_UNROLL_N 2
  458. #define QGEMM_DEFAULT_UNROLL_N 2
  459. #define CGEMM_DEFAULT_UNROLL_N 1
  460. #define ZGEMM_DEFAULT_UNROLL_N 1
  461. #define XGEMM_DEFAULT_UNROLL_N 1
  462. #define SGEMM_DEFAULT_P sgemm_p
  463. #define SGEMM_DEFAULT_R sgemm_r
  464. #define DGEMM_DEFAULT_P dgemm_p
  465. #define DGEMM_DEFAULT_R dgemm_r
  466. #define QGEMM_DEFAULT_P qgemm_p
  467. #define QGEMM_DEFAULT_R qgemm_r
  468. #define CGEMM_DEFAULT_P cgemm_p
  469. #define CGEMM_DEFAULT_R cgemm_r
  470. #define ZGEMM_DEFAULT_P zgemm_p
  471. #define ZGEMM_DEFAULT_R zgemm_r
  472. #define XGEMM_DEFAULT_P xgemm_p
  473. #define XGEMM_DEFAULT_R xgemm_r
  474. #define SGEMM_DEFAULT_Q 128
  475. #define DGEMM_DEFAULT_Q 128
  476. #define QGEMM_DEFAULT_Q 128
  477. #define CGEMM_DEFAULT_Q 128
  478. #define ZGEMM_DEFAULT_Q 128
  479. #define XGEMM_DEFAULT_Q 128
  480. #endif
  481. #ifdef CORE_PRESCOTT
  482. #define SNUMOPT 4
  483. #define DNUMOPT 2
  484. #ifndef __64BIT__
  485. #define GEMM_DEFAULT_OFFSET_A 128
  486. #define GEMM_DEFAULT_OFFSET_B 192
  487. #else
  488. #define GEMM_DEFAULT_OFFSET_A 0
  489. #define GEMM_DEFAULT_OFFSET_B 256
  490. #endif
  491. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  492. #define SYMV_P 8
  493. #ifdef ARCH_X86
  494. #define SGEMM_DEFAULT_UNROLL_M 4
  495. #define DGEMM_DEFAULT_UNROLL_M 2
  496. #define QGEMM_DEFAULT_UNROLL_M 2
  497. #define CGEMM_DEFAULT_UNROLL_M 2
  498. #define ZGEMM_DEFAULT_UNROLL_M 1
  499. #define XGEMM_DEFAULT_UNROLL_M 1
  500. #else
  501. #define SGEMM_DEFAULT_UNROLL_M 8
  502. #define DGEMM_DEFAULT_UNROLL_M 4
  503. #define QGEMM_DEFAULT_UNROLL_M 2
  504. #define CGEMM_DEFAULT_UNROLL_M 4
  505. #define ZGEMM_DEFAULT_UNROLL_M 2
  506. #define XGEMM_DEFAULT_UNROLL_M 1
  507. #endif
  508. #define SGEMM_DEFAULT_UNROLL_N 4
  509. #define DGEMM_DEFAULT_UNROLL_N 4
  510. #define QGEMM_DEFAULT_UNROLL_N 2
  511. #define CGEMM_DEFAULT_UNROLL_N 2
  512. #define ZGEMM_DEFAULT_UNROLL_N 2
  513. #define XGEMM_DEFAULT_UNROLL_N 1
  514. #define SGEMM_DEFAULT_P sgemm_p
  515. #define SGEMM_DEFAULT_R sgemm_r
  516. #define DGEMM_DEFAULT_P dgemm_p
  517. #define DGEMM_DEFAULT_R dgemm_r
  518. #define QGEMM_DEFAULT_P qgemm_p
  519. #define QGEMM_DEFAULT_R qgemm_r
  520. #define CGEMM_DEFAULT_P cgemm_p
  521. #define CGEMM_DEFAULT_R cgemm_r
  522. #define ZGEMM_DEFAULT_P zgemm_p
  523. #define ZGEMM_DEFAULT_R zgemm_r
  524. #define XGEMM_DEFAULT_P xgemm_p
  525. #define XGEMM_DEFAULT_R xgemm_r
  526. #define SGEMM_DEFAULT_Q 128
  527. #define DGEMM_DEFAULT_Q 128
  528. #define QGEMM_DEFAULT_Q 128
  529. #define CGEMM_DEFAULT_Q 128
  530. #define ZGEMM_DEFAULT_Q 128
  531. #define XGEMM_DEFAULT_Q 128
  532. #endif
  533. #ifdef CORE2
  534. #define SNUMOPT 8
  535. #define DNUMOPT 4
  536. #define GEMM_DEFAULT_OFFSET_A 448
  537. #define GEMM_DEFAULT_OFFSET_B 128
  538. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  539. #define SYMV_P 8
  540. #define SWITCH_RATIO 4
  541. #ifdef ARCH_X86
  542. #define SGEMM_DEFAULT_UNROLL_M 8
  543. #define DGEMM_DEFAULT_UNROLL_M 4
  544. #define QGEMM_DEFAULT_UNROLL_M 2
  545. #define CGEMM_DEFAULT_UNROLL_M 4
  546. #define ZGEMM_DEFAULT_UNROLL_M 2
  547. #define XGEMM_DEFAULT_UNROLL_M 1
  548. #define SGEMM_DEFAULT_UNROLL_N 2
  549. #define DGEMM_DEFAULT_UNROLL_N 2
  550. #define QGEMM_DEFAULT_UNROLL_N 2
  551. #define CGEMM_DEFAULT_UNROLL_N 1
  552. #define ZGEMM_DEFAULT_UNROLL_N 1
  553. #define XGEMM_DEFAULT_UNROLL_N 1
  554. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  555. #else
  556. #define SGEMM_DEFAULT_UNROLL_M 8
  557. #define DGEMM_DEFAULT_UNROLL_M 4
  558. #define QGEMM_DEFAULT_UNROLL_M 2
  559. #define CGEMM_DEFAULT_UNROLL_M 4
  560. #define ZGEMM_DEFAULT_UNROLL_M 2
  561. #define XGEMM_DEFAULT_UNROLL_M 1
  562. #define SGEMM_DEFAULT_UNROLL_N 4
  563. #define DGEMM_DEFAULT_UNROLL_N 4
  564. #define QGEMM_DEFAULT_UNROLL_N 2
  565. #define CGEMM_DEFAULT_UNROLL_N 2
  566. #define ZGEMM_DEFAULT_UNROLL_N 2
  567. #define XGEMM_DEFAULT_UNROLL_N 1
  568. #endif
  569. #define SGEMM_DEFAULT_P sgemm_p
  570. #define SGEMM_DEFAULT_R sgemm_r
  571. #define DGEMM_DEFAULT_P dgemm_p
  572. #define DGEMM_DEFAULT_R dgemm_r
  573. #define QGEMM_DEFAULT_P qgemm_p
  574. #define QGEMM_DEFAULT_R qgemm_r
  575. #define CGEMM_DEFAULT_P cgemm_p
  576. #define CGEMM_DEFAULT_R cgemm_r
  577. #define ZGEMM_DEFAULT_P zgemm_p
  578. #define ZGEMM_DEFAULT_R zgemm_r
  579. #define XGEMM_DEFAULT_P xgemm_p
  580. #define XGEMM_DEFAULT_R xgemm_r
  581. #define SGEMM_DEFAULT_Q 256
  582. #define DGEMM_DEFAULT_Q 256
  583. #define QGEMM_DEFAULT_Q 256
  584. #define CGEMM_DEFAULT_Q 256
  585. #define ZGEMM_DEFAULT_Q 256
  586. #define XGEMM_DEFAULT_Q 256
  587. #endif
  588. #ifdef PENRYN
  589. #define SNUMOPT 8
  590. #define DNUMOPT 4
  591. #define GEMM_DEFAULT_OFFSET_A 128
  592. #define GEMM_DEFAULT_OFFSET_B 0
  593. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  594. #define SYMV_P 8
  595. #define SWITCH_RATIO 4
  596. #ifdef ARCH_X86
  597. #define SGEMM_DEFAULT_UNROLL_M 4
  598. #define DGEMM_DEFAULT_UNROLL_M 2
  599. #define QGEMM_DEFAULT_UNROLL_M 2
  600. #define CGEMM_DEFAULT_UNROLL_M 2
  601. #define ZGEMM_DEFAULT_UNROLL_M 1
  602. #define XGEMM_DEFAULT_UNROLL_M 1
  603. #define SGEMM_DEFAULT_UNROLL_N 4
  604. #define DGEMM_DEFAULT_UNROLL_N 4
  605. #define QGEMM_DEFAULT_UNROLL_N 2
  606. #define CGEMM_DEFAULT_UNROLL_N 2
  607. #define ZGEMM_DEFAULT_UNROLL_N 2
  608. #define XGEMM_DEFAULT_UNROLL_N 1
  609. #else
  610. #define SGEMM_DEFAULT_UNROLL_M 8
  611. #define DGEMM_DEFAULT_UNROLL_M 4
  612. #define QGEMM_DEFAULT_UNROLL_M 2
  613. #define CGEMM_DEFAULT_UNROLL_M 4
  614. #define ZGEMM_DEFAULT_UNROLL_M 2
  615. #define XGEMM_DEFAULT_UNROLL_M 1
  616. #define SGEMM_DEFAULT_UNROLL_N 4
  617. #define DGEMM_DEFAULT_UNROLL_N 4
  618. #define QGEMM_DEFAULT_UNROLL_N 2
  619. #define CGEMM_DEFAULT_UNROLL_N 2
  620. #define ZGEMM_DEFAULT_UNROLL_N 2
  621. #define XGEMM_DEFAULT_UNROLL_N 1
  622. #endif
  623. #define SGEMM_DEFAULT_P sgemm_p
  624. #define SGEMM_DEFAULT_R sgemm_r
  625. #define DGEMM_DEFAULT_P dgemm_p
  626. #define DGEMM_DEFAULT_R dgemm_r
  627. #define QGEMM_DEFAULT_P qgemm_p
  628. #define QGEMM_DEFAULT_R qgemm_r
  629. #define CGEMM_DEFAULT_P cgemm_p
  630. #define CGEMM_DEFAULT_R cgemm_r
  631. #define ZGEMM_DEFAULT_P zgemm_p
  632. #define ZGEMM_DEFAULT_R zgemm_r
  633. #define XGEMM_DEFAULT_P xgemm_p
  634. #define XGEMM_DEFAULT_R xgemm_r
  635. #define SGEMM_DEFAULT_Q 512
  636. #define DGEMM_DEFAULT_Q 256
  637. #define QGEMM_DEFAULT_Q 128
  638. #define CGEMM_DEFAULT_Q 512
  639. #define ZGEMM_DEFAULT_Q 256
  640. #define XGEMM_DEFAULT_Q 128
  641. #define GETRF_FACTOR 0.75
  642. #endif
  643. #ifdef DUNNINGTON
  644. #define SNUMOPT 8
  645. #define DNUMOPT 4
  646. #define GEMM_DEFAULT_OFFSET_A 128
  647. #define GEMM_DEFAULT_OFFSET_B 0
  648. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  649. #define SYMV_P 8
  650. #define SWITCH_RATIO 4
  651. #ifdef ARCH_X86
  652. #define SGEMM_DEFAULT_UNROLL_M 4
  653. #define DGEMM_DEFAULT_UNROLL_M 2
  654. #define QGEMM_DEFAULT_UNROLL_M 2
  655. #define CGEMM_DEFAULT_UNROLL_M 2
  656. #define ZGEMM_DEFAULT_UNROLL_M 1
  657. #define XGEMM_DEFAULT_UNROLL_M 1
  658. #define SGEMM_DEFAULT_UNROLL_N 4
  659. #define DGEMM_DEFAULT_UNROLL_N 4
  660. #define QGEMM_DEFAULT_UNROLL_N 2
  661. #define CGEMM_DEFAULT_UNROLL_N 2
  662. #define ZGEMM_DEFAULT_UNROLL_N 2
  663. #define XGEMM_DEFAULT_UNROLL_N 1
  664. #else
  665. #define SGEMM_DEFAULT_UNROLL_M 8
  666. #define DGEMM_DEFAULT_UNROLL_M 4
  667. #define QGEMM_DEFAULT_UNROLL_M 2
  668. #define CGEMM_DEFAULT_UNROLL_M 4
  669. #define ZGEMM_DEFAULT_UNROLL_M 2
  670. #define XGEMM_DEFAULT_UNROLL_M 1
  671. #define SGEMM_DEFAULT_UNROLL_N 4
  672. #define DGEMM_DEFAULT_UNROLL_N 4
  673. #define QGEMM_DEFAULT_UNROLL_N 2
  674. #define CGEMM_DEFAULT_UNROLL_N 2
  675. #define ZGEMM_DEFAULT_UNROLL_N 2
  676. #define XGEMM_DEFAULT_UNROLL_N 1
  677. #endif
  678. #define SGEMM_DEFAULT_P sgemm_p
  679. #define SGEMM_DEFAULT_R sgemm_r
  680. #define DGEMM_DEFAULT_P dgemm_p
  681. #define DGEMM_DEFAULT_R dgemm_r
  682. #define QGEMM_DEFAULT_P qgemm_p
  683. #define QGEMM_DEFAULT_R qgemm_r
  684. #define CGEMM_DEFAULT_P cgemm_p
  685. #define CGEMM_DEFAULT_R cgemm_r
  686. #define ZGEMM_DEFAULT_P zgemm_p
  687. #define ZGEMM_DEFAULT_R zgemm_r
  688. #define XGEMM_DEFAULT_P xgemm_p
  689. #define XGEMM_DEFAULT_R xgemm_r
  690. #define SGEMM_DEFAULT_Q 768
  691. #define DGEMM_DEFAULT_Q 384
  692. #define QGEMM_DEFAULT_Q 192
  693. #define CGEMM_DEFAULT_Q 768
  694. #define ZGEMM_DEFAULT_Q 384
  695. #define XGEMM_DEFAULT_Q 192
  696. #define GETRF_FACTOR 0.75
  697. #define GEMM_THREAD gemm_thread_mn
  698. #endif
  699. #ifdef NEHALEM
  700. #define SNUMOPT 8
  701. #define DNUMOPT 4
  702. #define GEMM_DEFAULT_OFFSET_A 32
  703. #define GEMM_DEFAULT_OFFSET_B 0
  704. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  705. #define SYMV_P 8
  706. #define SWITCH_RATIO 4
  707. #ifdef ARCH_X86
  708. #define SGEMM_DEFAULT_UNROLL_M 4
  709. #define DGEMM_DEFAULT_UNROLL_M 2
  710. #define QGEMM_DEFAULT_UNROLL_M 2
  711. #define CGEMM_DEFAULT_UNROLL_M 2
  712. #define ZGEMM_DEFAULT_UNROLL_M 1
  713. #define XGEMM_DEFAULT_UNROLL_M 1
  714. #define SGEMM_DEFAULT_UNROLL_N 4
  715. #define DGEMM_DEFAULT_UNROLL_N 4
  716. #define QGEMM_DEFAULT_UNROLL_N 2
  717. #define CGEMM_DEFAULT_UNROLL_N 2
  718. #define ZGEMM_DEFAULT_UNROLL_N 2
  719. #define XGEMM_DEFAULT_UNROLL_N 1
  720. #else
  721. #define SGEMM_DEFAULT_UNROLL_M 4
  722. #define DGEMM_DEFAULT_UNROLL_M 2
  723. #define QGEMM_DEFAULT_UNROLL_M 2
  724. #define CGEMM_DEFAULT_UNROLL_M 2
  725. #define ZGEMM_DEFAULT_UNROLL_M 1
  726. #define XGEMM_DEFAULT_UNROLL_M 1
  727. #define SGEMM_DEFAULT_UNROLL_N 8
  728. #define DGEMM_DEFAULT_UNROLL_N 8
  729. #define QGEMM_DEFAULT_UNROLL_N 2
  730. #define CGEMM_DEFAULT_UNROLL_N 4
  731. #define ZGEMM_DEFAULT_UNROLL_N 4
  732. #define XGEMM_DEFAULT_UNROLL_N 1
  733. #endif
  734. #define SGEMM_DEFAULT_P 504
  735. #define SGEMM_DEFAULT_R sgemm_r
  736. #define DGEMM_DEFAULT_P 504
  737. #define DGEMM_DEFAULT_R dgemm_r
  738. #define QGEMM_DEFAULT_P 504
  739. #define QGEMM_DEFAULT_R qgemm_r
  740. #define CGEMM_DEFAULT_P 252
  741. #define CGEMM_DEFAULT_R cgemm_r
  742. #define ZGEMM_DEFAULT_P 252
  743. #define ZGEMM_DEFAULT_R zgemm_r
  744. #define XGEMM_DEFAULT_P 252
  745. #define XGEMM_DEFAULT_R xgemm_r
  746. #define SGEMM_DEFAULT_Q 512
  747. #define DGEMM_DEFAULT_Q 256
  748. #define QGEMM_DEFAULT_Q 128
  749. #define CGEMM_DEFAULT_Q 512
  750. #define ZGEMM_DEFAULT_Q 256
  751. #define XGEMM_DEFAULT_Q 128
  752. #define GETRF_FACTOR 0.72
  753. #endif
  754. #ifdef SANDYBRIDGE
  755. #define SNUMOPT 8
  756. #define DNUMOPT 4
  757. #define GEMM_DEFAULT_OFFSET_A 0
  758. #define GEMM_DEFAULT_OFFSET_B 0
  759. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  760. #define SYMV_P 8
  761. #define SWITCH_RATIO 4
  762. #ifdef ARCH_X86
  763. #define SGEMM_DEFAULT_UNROLL_M 4
  764. #define DGEMM_DEFAULT_UNROLL_M 2
  765. #define QGEMM_DEFAULT_UNROLL_M 2
  766. #define CGEMM_DEFAULT_UNROLL_M 2
  767. #define ZGEMM_DEFAULT_UNROLL_M 1
  768. #define XGEMM_DEFAULT_UNROLL_M 1
  769. #define SGEMM_DEFAULT_UNROLL_N 4
  770. #define DGEMM_DEFAULT_UNROLL_N 4
  771. #define QGEMM_DEFAULT_UNROLL_N 2
  772. #define CGEMM_DEFAULT_UNROLL_N 2
  773. #define ZGEMM_DEFAULT_UNROLL_N 2
  774. #define XGEMM_DEFAULT_UNROLL_N 1
  775. #else
  776. #define SGEMM_DEFAULT_UNROLL_M 8
  777. #define DGEMM_DEFAULT_UNROLL_M 8
  778. #define QGEMM_DEFAULT_UNROLL_M 2
  779. #define CGEMM_DEFAULT_UNROLL_M 8
  780. #define ZGEMM_DEFAULT_UNROLL_M 4
  781. #define XGEMM_DEFAULT_UNROLL_M 1
  782. #define SGEMM_DEFAULT_UNROLL_N 8
  783. #define DGEMM_DEFAULT_UNROLL_N 4
  784. #define QGEMM_DEFAULT_UNROLL_N 2
  785. #define CGEMM_DEFAULT_UNROLL_N 4
  786. #define ZGEMM_DEFAULT_UNROLL_N 4
  787. #define XGEMM_DEFAULT_UNROLL_N 1
  788. #endif
  789. #define SGEMM_DEFAULT_P 512
  790. #define SGEMM_DEFAULT_R sgemm_r
  791. //#define SGEMM_DEFAULT_R 1024
  792. #define DGEMM_DEFAULT_P 512
  793. #define DGEMM_DEFAULT_R dgemm_r
  794. //#define DGEMM_DEFAULT_R 1024
  795. #define QGEMM_DEFAULT_P 504
  796. #define QGEMM_DEFAULT_R qgemm_r
  797. #define CGEMM_DEFAULT_P 128
  798. //#define CGEMM_DEFAULT_R cgemm_r
  799. #define CGEMM_DEFAULT_R 1024
  800. #define ZGEMM_DEFAULT_P 512
  801. #define ZGEMM_DEFAULT_R zgemm_r
  802. //#define ZGEMM_DEFAULT_R 1024
  803. #define XGEMM_DEFAULT_P 252
  804. #define XGEMM_DEFAULT_R xgemm_r
  805. #define SGEMM_DEFAULT_Q 256
  806. #define DGEMM_DEFAULT_Q 256
  807. #define QGEMM_DEFAULT_Q 128
  808. #define CGEMM_DEFAULT_Q 256
  809. #define ZGEMM_DEFAULT_Q 192
  810. #define XGEMM_DEFAULT_Q 128
  811. #define GETRF_FACTOR 0.72
  812. #endif
  813. #ifdef ATOM
  814. #define SNUMOPT 2
  815. #define DNUMOPT 1
  816. #define GEMM_DEFAULT_OFFSET_A 64
  817. #define GEMM_DEFAULT_OFFSET_B 0
  818. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  819. #define SYMV_P 8
  820. #ifdef ARCH_X86
  821. #define SGEMM_DEFAULT_UNROLL_M 4
  822. #define DGEMM_DEFAULT_UNROLL_M 2
  823. #define QGEMM_DEFAULT_UNROLL_M 2
  824. #define CGEMM_DEFAULT_UNROLL_M 2
  825. #define ZGEMM_DEFAULT_UNROLL_M 1
  826. #define XGEMM_DEFAULT_UNROLL_M 1
  827. #else
  828. #define SGEMM_DEFAULT_UNROLL_M 8
  829. #define DGEMM_DEFAULT_UNROLL_M 4
  830. #define QGEMM_DEFAULT_UNROLL_M 2
  831. #define CGEMM_DEFAULT_UNROLL_M 4
  832. #define ZGEMM_DEFAULT_UNROLL_M 2
  833. #define XGEMM_DEFAULT_UNROLL_M 1
  834. #endif
  835. #define SGEMM_DEFAULT_UNROLL_N 4
  836. #define DGEMM_DEFAULT_UNROLL_N 2
  837. #define QGEMM_DEFAULT_UNROLL_N 2
  838. #define CGEMM_DEFAULT_UNROLL_N 2
  839. #define ZGEMM_DEFAULT_UNROLL_N 1
  840. #define XGEMM_DEFAULT_UNROLL_N 1
  841. #define SGEMM_DEFAULT_P sgemm_p
  842. #define SGEMM_DEFAULT_R sgemm_r
  843. #define DGEMM_DEFAULT_P dgemm_p
  844. #define DGEMM_DEFAULT_R dgemm_r
  845. #define QGEMM_DEFAULT_P qgemm_p
  846. #define QGEMM_DEFAULT_R qgemm_r
  847. #define CGEMM_DEFAULT_P cgemm_p
  848. #define CGEMM_DEFAULT_R cgemm_r
  849. #define ZGEMM_DEFAULT_P zgemm_p
  850. #define ZGEMM_DEFAULT_R zgemm_r
  851. #define XGEMM_DEFAULT_P xgemm_p
  852. #define XGEMM_DEFAULT_R xgemm_r
  853. #define SGEMM_DEFAULT_Q 256
  854. #define DGEMM_DEFAULT_Q 256
  855. #define QGEMM_DEFAULT_Q 256
  856. #define CGEMM_DEFAULT_Q 256
  857. #define ZGEMM_DEFAULT_Q 256
  858. #define XGEMM_DEFAULT_Q 256
  859. #endif
  860. #ifdef ITANIUM2
  861. #define SNUMOPT 4
  862. #define DNUMOPT 4
  863. #define GEMM_DEFAULT_OFFSET_A 0
  864. #define GEMM_DEFAULT_OFFSET_B 128
  865. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  866. #define SGEMM_DEFAULT_UNROLL_M 8
  867. #define SGEMM_DEFAULT_UNROLL_N 8
  868. #define DGEMM_DEFAULT_UNROLL_M 8
  869. #define DGEMM_DEFAULT_UNROLL_N 8
  870. #define QGEMM_DEFAULT_UNROLL_M 8
  871. #define QGEMM_DEFAULT_UNROLL_N 8
  872. #define CGEMM_DEFAULT_UNROLL_M 4
  873. #define CGEMM_DEFAULT_UNROLL_N 4
  874. #define ZGEMM_DEFAULT_UNROLL_M 4
  875. #define ZGEMM_DEFAULT_UNROLL_N 4
  876. #define XGEMM_DEFAULT_UNROLL_M 4
  877. #define XGEMM_DEFAULT_UNROLL_N 4
  878. #define SGEMM_DEFAULT_P sgemm_p
  879. #define DGEMM_DEFAULT_P dgemm_p
  880. #define QGEMM_DEFAULT_P qgemm_p
  881. #define CGEMM_DEFAULT_P cgemm_p
  882. #define ZGEMM_DEFAULT_P zgemm_p
  883. #define XGEMM_DEFAULT_P xgemm_p
  884. #define SGEMM_DEFAULT_Q 1024
  885. #define DGEMM_DEFAULT_Q 1024
  886. #define QGEMM_DEFAULT_Q 1024
  887. #define CGEMM_DEFAULT_Q 1024
  888. #define ZGEMM_DEFAULT_Q 1024
  889. #define XGEMM_DEFAULT_Q 1024
  890. #define SGEMM_DEFAULT_R sgemm_r
  891. #define DGEMM_DEFAULT_R dgemm_r
  892. #define QGEMM_DEFAULT_R qgemm_r
  893. #define CGEMM_DEFAULT_R cgemm_r
  894. #define ZGEMM_DEFAULT_R zgemm_r
  895. #define XGEMM_DEFAULT_R xgemm_r
  896. #define SYMV_P 16
  897. #define GETRF_FACTOR 0.65
  898. #endif
  899. #if defined(EV4) || defined(EV5) || defined(EV6)
  900. #ifdef EV4
  901. #define SNUMOPT 1
  902. #define DNUMOPT 1
  903. #else
  904. #define SNUMOPT 2
  905. #define DNUMOPT 2
  906. #endif
  907. #define GEMM_DEFAULT_OFFSET_A 512
  908. #define GEMM_DEFAULT_OFFSET_B 512
  909. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  910. #define SGEMM_DEFAULT_UNROLL_M 4
  911. #define SGEMM_DEFAULT_UNROLL_N 4
  912. #define DGEMM_DEFAULT_UNROLL_M 4
  913. #define DGEMM_DEFAULT_UNROLL_N 4
  914. #define CGEMM_DEFAULT_UNROLL_M 2
  915. #define CGEMM_DEFAULT_UNROLL_N 2
  916. #define ZGEMM_DEFAULT_UNROLL_M 2
  917. #define ZGEMM_DEFAULT_UNROLL_N 2
  918. #define SYMV_P 8
  919. #ifdef EV4
  920. #define SGEMM_DEFAULT_P 32
  921. #define SGEMM_DEFAULT_Q 112
  922. #define SGEMM_DEFAULT_R 256
  923. #define DGEMM_DEFAULT_P 32
  924. #define DGEMM_DEFAULT_Q 56
  925. #define DGEMM_DEFAULT_R 256
  926. #define CGEMM_DEFAULT_P 32
  927. #define CGEMM_DEFAULT_Q 64
  928. #define CGEMM_DEFAULT_R 240
  929. #define ZGEMM_DEFAULT_P 32
  930. #define ZGEMM_DEFAULT_Q 32
  931. #define ZGEMM_DEFAULT_R 240
  932. #endif
  933. #ifdef EV5
  934. #define SGEMM_DEFAULT_P 64
  935. #define SGEMM_DEFAULT_Q 256
  936. #define DGEMM_DEFAULT_P 64
  937. #define DGEMM_DEFAULT_Q 128
  938. #define CGEMM_DEFAULT_P 64
  939. #define CGEMM_DEFAULT_Q 128
  940. #define ZGEMM_DEFAULT_P 64
  941. #define ZGEMM_DEFAULT_Q 64
  942. #endif
  943. #ifdef EV6
  944. #define SGEMM_DEFAULT_P 256
  945. #define SGEMM_DEFAULT_Q 512
  946. #define DGEMM_DEFAULT_P 256
  947. #define DGEMM_DEFAULT_Q 256
  948. #define CGEMM_DEFAULT_P 256
  949. #define CGEMM_DEFAULT_Q 256
  950. #define ZGEMM_DEFAULT_P 128
  951. #define ZGEMM_DEFAULT_Q 256
  952. #endif
  953. #endif
  954. #ifdef CELL
  955. #define SNUMOPT 2
  956. #define DNUMOPT 2
  957. #define GEMM_DEFAULT_OFFSET_A 0
  958. #define GEMM_DEFAULT_OFFSET_B 8192
  959. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  960. #define SGEMM_DEFAULT_UNROLL_M 16
  961. #define SGEMM_DEFAULT_UNROLL_N 4
  962. #define DGEMM_DEFAULT_UNROLL_M 4
  963. #define DGEMM_DEFAULT_UNROLL_N 4
  964. #define CGEMM_DEFAULT_UNROLL_M 8
  965. #define CGEMM_DEFAULT_UNROLL_N 2
  966. #define ZGEMM_DEFAULT_UNROLL_M 2
  967. #define ZGEMM_DEFAULT_UNROLL_N 2
  968. #define SGEMM_DEFAULT_P 128
  969. #define DGEMM_DEFAULT_P 128
  970. #define CGEMM_DEFAULT_P 128
  971. #define ZGEMM_DEFAULT_P 128
  972. #define SGEMM_DEFAULT_Q 512
  973. #define DGEMM_DEFAULT_Q 256
  974. #define CGEMM_DEFAULT_Q 256
  975. #define ZGEMM_DEFAULT_Q 128
  976. #define SYMV_P 4
  977. #endif
  978. #ifdef PPCG4
  979. #define GEMM_DEFAULT_OFFSET_A 0
  980. #define GEMM_DEFAULT_OFFSET_B 1024
  981. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  982. #define SGEMM_DEFAULT_UNROLL_M 16
  983. #define SGEMM_DEFAULT_UNROLL_N 4
  984. #define DGEMM_DEFAULT_UNROLL_M 4
  985. #define DGEMM_DEFAULT_UNROLL_N 4
  986. #define CGEMM_DEFAULT_UNROLL_M 8
  987. #define CGEMM_DEFAULT_UNROLL_N 2
  988. #define ZGEMM_DEFAULT_UNROLL_M 2
  989. #define ZGEMM_DEFAULT_UNROLL_N 2
  990. #define SGEMM_DEFAULT_P 256
  991. #define DGEMM_DEFAULT_P 128
  992. #define CGEMM_DEFAULT_P 128
  993. #define ZGEMM_DEFAULT_P 64
  994. #define SGEMM_DEFAULT_Q 256
  995. #define DGEMM_DEFAULT_Q 256
  996. #define CGEMM_DEFAULT_Q 256
  997. #define ZGEMM_DEFAULT_Q 256
  998. #define SYMV_P 4
  999. #endif
  1000. #ifdef PPC970
  1001. #define SNUMOPT 4
  1002. #define DNUMOPT 4
  1003. #define GEMM_DEFAULT_OFFSET_A 2688
  1004. #define GEMM_DEFAULT_OFFSET_B 3072
  1005. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1006. #define SGEMM_DEFAULT_UNROLL_M 16
  1007. #define SGEMM_DEFAULT_UNROLL_N 4
  1008. #define DGEMM_DEFAULT_UNROLL_M 4
  1009. #define DGEMM_DEFAULT_UNROLL_N 4
  1010. #define CGEMM_DEFAULT_UNROLL_M 8
  1011. #define CGEMM_DEFAULT_UNROLL_N 2
  1012. #define ZGEMM_DEFAULT_UNROLL_M 2
  1013. #define ZGEMM_DEFAULT_UNROLL_N 2
  1014. #ifdef OS_LINUX
  1015. #if L2_SIZE == 1024976
  1016. #define SGEMM_DEFAULT_P 320
  1017. #define DGEMM_DEFAULT_P 256
  1018. #define CGEMM_DEFAULT_P 256
  1019. #define ZGEMM_DEFAULT_P 256
  1020. #else
  1021. #define SGEMM_DEFAULT_P 176
  1022. #define DGEMM_DEFAULT_P 176
  1023. #define CGEMM_DEFAULT_P 176
  1024. #define ZGEMM_DEFAULT_P 176
  1025. #endif
  1026. #endif
  1027. #define SGEMM_DEFAULT_Q 512
  1028. #define DGEMM_DEFAULT_Q 256
  1029. #define CGEMM_DEFAULT_Q 256
  1030. #define ZGEMM_DEFAULT_Q 128
  1031. #define SYMV_P 4
  1032. #endif
  1033. #ifdef PPC440
  1034. #define SNUMOPT 2
  1035. #define DNUMOPT 2
  1036. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1037. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1038. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1039. #define SGEMM_DEFAULT_UNROLL_M 4
  1040. #define SGEMM_DEFAULT_UNROLL_N 4
  1041. #define DGEMM_DEFAULT_UNROLL_M 4
  1042. #define DGEMM_DEFAULT_UNROLL_N 4
  1043. #define CGEMM_DEFAULT_UNROLL_M 2
  1044. #define CGEMM_DEFAULT_UNROLL_N 2
  1045. #define ZGEMM_DEFAULT_UNROLL_M 2
  1046. #define ZGEMM_DEFAULT_UNROLL_N 2
  1047. #define SGEMM_DEFAULT_P 512
  1048. #define DGEMM_DEFAULT_P 512
  1049. #define CGEMM_DEFAULT_P 512
  1050. #define ZGEMM_DEFAULT_P 512
  1051. #define SGEMM_DEFAULT_Q 1024
  1052. #define DGEMM_DEFAULT_Q 512
  1053. #define CGEMM_DEFAULT_Q 512
  1054. #define ZGEMM_DEFAULT_Q 256
  1055. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1056. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1057. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1058. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1059. #define SYMV_P 4
  1060. #endif
  1061. #ifdef PPC440FP2
  1062. #define SNUMOPT 4
  1063. #define DNUMOPT 4
  1064. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1065. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1066. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1067. #define SGEMM_DEFAULT_UNROLL_M 8
  1068. #define SGEMM_DEFAULT_UNROLL_N 4
  1069. #define DGEMM_DEFAULT_UNROLL_M 8
  1070. #define DGEMM_DEFAULT_UNROLL_N 4
  1071. #define CGEMM_DEFAULT_UNROLL_M 4
  1072. #define CGEMM_DEFAULT_UNROLL_N 2
  1073. #define ZGEMM_DEFAULT_UNROLL_M 4
  1074. #define ZGEMM_DEFAULT_UNROLL_N 2
  1075. #define SGEMM_DEFAULT_P 128
  1076. #define DGEMM_DEFAULT_P 128
  1077. #define CGEMM_DEFAULT_P 128
  1078. #define ZGEMM_DEFAULT_P 128
  1079. #if 1
  1080. #define SGEMM_DEFAULT_Q 4096
  1081. #define DGEMM_DEFAULT_Q 3072
  1082. #define CGEMM_DEFAULT_Q 2048
  1083. #define ZGEMM_DEFAULT_Q 1024
  1084. #else
  1085. #define SGEMM_DEFAULT_Q 512
  1086. #define DGEMM_DEFAULT_Q 256
  1087. #define CGEMM_DEFAULT_Q 256
  1088. #define ZGEMM_DEFAULT_Q 128
  1089. #endif
  1090. #define SYMV_P 4
  1091. #endif
  1092. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1093. #define GEMM_DEFAULT_OFFSET_A 0
  1094. #define GEMM_DEFAULT_OFFSET_B 2048
  1095. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1096. #define SGEMM_DEFAULT_UNROLL_M 4
  1097. #define SGEMM_DEFAULT_UNROLL_N 4
  1098. #define DGEMM_DEFAULT_UNROLL_M 4
  1099. #define DGEMM_DEFAULT_UNROLL_N 4
  1100. #define CGEMM_DEFAULT_UNROLL_M 2
  1101. #define CGEMM_DEFAULT_UNROLL_N 2
  1102. #define ZGEMM_DEFAULT_UNROLL_M 2
  1103. #define ZGEMM_DEFAULT_UNROLL_N 2
  1104. #ifdef POWER3
  1105. #define SNUMOPT 4
  1106. #define DNUMOPT 4
  1107. #define SGEMM_DEFAULT_P 256
  1108. #define SGEMM_DEFAULT_Q 432
  1109. #define SGEMM_DEFAULT_R 1012
  1110. #define DGEMM_DEFAULT_P 256
  1111. #define DGEMM_DEFAULT_Q 216
  1112. #define DGEMM_DEFAULT_R 1012
  1113. #define ZGEMM_DEFAULT_P 256
  1114. #define ZGEMM_DEFAULT_Q 104
  1115. #define ZGEMM_DEFAULT_R 1012
  1116. #endif
  1117. #if defined(POWER4)
  1118. #ifdef ALLOC_HUGETLB
  1119. #define SGEMM_DEFAULT_P 184
  1120. #define DGEMM_DEFAULT_P 184
  1121. #define CGEMM_DEFAULT_P 184
  1122. #define ZGEMM_DEFAULT_P 184
  1123. #else
  1124. #define SGEMM_DEFAULT_P 144
  1125. #define DGEMM_DEFAULT_P 144
  1126. #define CGEMM_DEFAULT_P 144
  1127. #define ZGEMM_DEFAULT_P 144
  1128. #endif
  1129. #endif
  1130. #if defined(POWER5)
  1131. #ifdef ALLOC_HUGETLB
  1132. #define SGEMM_DEFAULT_P 512
  1133. #define DGEMM_DEFAULT_P 256
  1134. #define CGEMM_DEFAULT_P 256
  1135. #define ZGEMM_DEFAULT_P 128
  1136. #else
  1137. #define SGEMM_DEFAULT_P 320
  1138. #define DGEMM_DEFAULT_P 160
  1139. #define CGEMM_DEFAULT_P 160
  1140. #define ZGEMM_DEFAULT_P 80
  1141. #endif
  1142. #define SGEMM_DEFAULT_Q 256
  1143. #define CGEMM_DEFAULT_Q 256
  1144. #define DGEMM_DEFAULT_Q 256
  1145. #define ZGEMM_DEFAULT_Q 256
  1146. #endif
  1147. #define SYMV_P 8
  1148. #endif
  1149. #if defined(POWER6)
  1150. #define SNUMOPT 4
  1151. #define DNUMOPT 4
  1152. #define GEMM_DEFAULT_OFFSET_A 384
  1153. #define GEMM_DEFAULT_OFFSET_B 1024
  1154. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1155. #define SGEMM_DEFAULT_UNROLL_M 4
  1156. #define SGEMM_DEFAULT_UNROLL_N 4
  1157. #define DGEMM_DEFAULT_UNROLL_M 4
  1158. #define DGEMM_DEFAULT_UNROLL_N 4
  1159. #define CGEMM_DEFAULT_UNROLL_M 2
  1160. #define CGEMM_DEFAULT_UNROLL_N 4
  1161. #define ZGEMM_DEFAULT_UNROLL_M 2
  1162. #define ZGEMM_DEFAULT_UNROLL_N 4
  1163. #define SGEMM_DEFAULT_P 992
  1164. #define DGEMM_DEFAULT_P 480
  1165. #define CGEMM_DEFAULT_P 488
  1166. #define ZGEMM_DEFAULT_P 248
  1167. #define SGEMM_DEFAULT_Q 504
  1168. #define DGEMM_DEFAULT_Q 504
  1169. #define CGEMM_DEFAULT_Q 400
  1170. #define ZGEMM_DEFAULT_Q 400
  1171. #define SYMV_P 8
  1172. #endif
  1173. #if defined(SPARC) && defined(V7)
  1174. #define SNUMOPT 4
  1175. #define DNUMOPT 4
  1176. #define GEMM_DEFAULT_OFFSET_A 0
  1177. #define GEMM_DEFAULT_OFFSET_B 2048
  1178. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1179. #define SGEMM_DEFAULT_UNROLL_M 2
  1180. #define SGEMM_DEFAULT_UNROLL_N 8
  1181. #define DGEMM_DEFAULT_UNROLL_M 2
  1182. #define DGEMM_DEFAULT_UNROLL_N 8
  1183. #define CGEMM_DEFAULT_UNROLL_M 1
  1184. #define CGEMM_DEFAULT_UNROLL_N 4
  1185. #define ZGEMM_DEFAULT_UNROLL_M 1
  1186. #define ZGEMM_DEFAULT_UNROLL_N 4
  1187. #define SGEMM_DEFAULT_P 256
  1188. #define DGEMM_DEFAULT_P 256
  1189. #define CGEMM_DEFAULT_P 256
  1190. #define ZGEMM_DEFAULT_P 256
  1191. #define SGEMM_DEFAULT_Q 512
  1192. #define DGEMM_DEFAULT_Q 256
  1193. #define CGEMM_DEFAULT_Q 256
  1194. #define ZGEMM_DEFAULT_Q 128
  1195. #define SYMV_P 8
  1196. #define GEMM_THREAD gemm_thread_mn
  1197. #endif
  1198. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1199. #define SNUMOPT 2
  1200. #define DNUMOPT 2
  1201. #define GEMM_DEFAULT_OFFSET_A 0
  1202. #define GEMM_DEFAULT_OFFSET_B 2048
  1203. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1204. #define SGEMM_DEFAULT_UNROLL_M 4
  1205. #define SGEMM_DEFAULT_UNROLL_N 4
  1206. #define DGEMM_DEFAULT_UNROLL_M 4
  1207. #define DGEMM_DEFAULT_UNROLL_N 4
  1208. #define CGEMM_DEFAULT_UNROLL_M 2
  1209. #define CGEMM_DEFAULT_UNROLL_N 2
  1210. #define ZGEMM_DEFAULT_UNROLL_M 2
  1211. #define ZGEMM_DEFAULT_UNROLL_N 2
  1212. #define SGEMM_DEFAULT_P 512
  1213. #define DGEMM_DEFAULT_P 512
  1214. #define CGEMM_DEFAULT_P 512
  1215. #define ZGEMM_DEFAULT_P 512
  1216. #define SGEMM_DEFAULT_Q 1024
  1217. #define DGEMM_DEFAULT_Q 512
  1218. #define CGEMM_DEFAULT_Q 512
  1219. #define ZGEMM_DEFAULT_Q 256
  1220. #define SYMV_P 8
  1221. #endif
  1222. #ifdef SICORTEX
  1223. #define SNUMOPT 2
  1224. #define DNUMOPT 2
  1225. #define GEMM_DEFAULT_OFFSET_A 0
  1226. #define GEMM_DEFAULT_OFFSET_B 0
  1227. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1228. #define SGEMM_DEFAULT_UNROLL_M 2
  1229. #define SGEMM_DEFAULT_UNROLL_N 8
  1230. #define DGEMM_DEFAULT_UNROLL_M 2
  1231. #define DGEMM_DEFAULT_UNROLL_N 8
  1232. #define CGEMM_DEFAULT_UNROLL_M 1
  1233. #define CGEMM_DEFAULT_UNROLL_N 4
  1234. #define ZGEMM_DEFAULT_UNROLL_M 1
  1235. #define ZGEMM_DEFAULT_UNROLL_N 4
  1236. #define SGEMM_DEFAULT_P 108
  1237. #define DGEMM_DEFAULT_P 112
  1238. #define CGEMM_DEFAULT_P 108
  1239. #define ZGEMM_DEFAULT_P 112
  1240. #define SGEMM_DEFAULT_Q 288
  1241. #define DGEMM_DEFAULT_Q 144
  1242. #define CGEMM_DEFAULT_Q 144
  1243. #define ZGEMM_DEFAULT_Q 72
  1244. #define SGEMM_DEFAULT_R 2000
  1245. #define DGEMM_DEFAULT_R 2000
  1246. #define CGEMM_DEFAULT_R 2000
  1247. #define ZGEMM_DEFAULT_R 2000
  1248. #define SYMV_P 16
  1249. #endif
  1250. #ifdef LOONGSON3A
  1251. ////Copy from SICORTEX
  1252. #define SNUMOPT 2
  1253. #define DNUMOPT 2
  1254. #define GEMM_DEFAULT_OFFSET_A 0
  1255. #define GEMM_DEFAULT_OFFSET_B 0
  1256. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1257. #define SGEMM_DEFAULT_UNROLL_M 8
  1258. #define SGEMM_DEFAULT_UNROLL_N 4
  1259. #define DGEMM_DEFAULT_UNROLL_M 4
  1260. #define DGEMM_DEFAULT_UNROLL_N 4
  1261. #define CGEMM_DEFAULT_UNROLL_M 4
  1262. #define CGEMM_DEFAULT_UNROLL_N 2
  1263. #define ZGEMM_DEFAULT_UNROLL_M 2
  1264. #define ZGEMM_DEFAULT_UNROLL_N 2
  1265. #define SGEMM_DEFAULT_P 64
  1266. #define DGEMM_DEFAULT_P 44
  1267. #define CGEMM_DEFAULT_P 64
  1268. #define ZGEMM_DEFAULT_P 32
  1269. #define SGEMM_DEFAULT_Q 192
  1270. #define DGEMM_DEFAULT_Q 92
  1271. #define CGEMM_DEFAULT_Q 128
  1272. #define ZGEMM_DEFAULT_Q 80
  1273. #define SGEMM_DEFAULT_R 640
  1274. #define DGEMM_DEFAULT_R dgemm_r
  1275. #define CGEMM_DEFAULT_R 640
  1276. #define ZGEMM_DEFAULT_R 640
  1277. #define GEMM_OFFSET_A1 0x10000
  1278. #define GEMM_OFFSET_B1 0x100000
  1279. #define SYMV_P 16
  1280. #endif
  1281. #ifdef LOONGSON3B
  1282. #define SNUMOPT 2
  1283. #define DNUMOPT 2
  1284. #define GEMM_DEFAULT_OFFSET_A 0
  1285. #define GEMM_DEFAULT_OFFSET_B 0
  1286. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1287. #define SGEMM_DEFAULT_UNROLL_M 2
  1288. #define SGEMM_DEFAULT_UNROLL_N 2
  1289. #define DGEMM_DEFAULT_UNROLL_M 2
  1290. #define DGEMM_DEFAULT_UNROLL_N 2
  1291. #define CGEMM_DEFAULT_UNROLL_M 2
  1292. #define CGEMM_DEFAULT_UNROLL_N 2
  1293. #define ZGEMM_DEFAULT_UNROLL_M 2
  1294. #define ZGEMM_DEFAULT_UNROLL_N 2
  1295. #define SGEMM_DEFAULT_P 64
  1296. #define DGEMM_DEFAULT_P 24
  1297. #define CGEMM_DEFAULT_P 24
  1298. #define ZGEMM_DEFAULT_P 20
  1299. #define SGEMM_DEFAULT_Q 192
  1300. #define DGEMM_DEFAULT_Q 128
  1301. #define CGEMM_DEFAULT_Q 128
  1302. #define ZGEMM_DEFAULT_Q 64
  1303. #define SGEMM_DEFAULT_R 512
  1304. #define DGEMM_DEFAULT_R 512
  1305. #define CGEMM_DEFAULT_R 512
  1306. #define ZGEMM_DEFAULT_R 512
  1307. #define GEMM_OFFSET_A1 0x10000
  1308. #define GEMM_OFFSET_B1 0x100000
  1309. #define SYMV_P 16
  1310. #endif
  1311. #ifdef GENERIC
  1312. #define SNUMOPT 2
  1313. #define DNUMOPT 2
  1314. #define GEMM_DEFAULT_OFFSET_A 0
  1315. #define GEMM_DEFAULT_OFFSET_B 0
  1316. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1317. #define SGEMM_DEFAULT_UNROLL_N 4
  1318. #define DGEMM_DEFAULT_UNROLL_N 4
  1319. #define QGEMM_DEFAULT_UNROLL_N 2
  1320. #define CGEMM_DEFAULT_UNROLL_N 2
  1321. #define ZGEMM_DEFAULT_UNROLL_N 2
  1322. #define XGEMM_DEFAULT_UNROLL_N 1
  1323. #ifdef ARCH_X86
  1324. #define SGEMM_DEFAULT_UNROLL_M 4
  1325. #define DGEMM_DEFAULT_UNROLL_M 2
  1326. #define QGEMM_DEFAULT_UNROLL_M 2
  1327. #define CGEMM_DEFAULT_UNROLL_M 2
  1328. #define ZGEMM_DEFAULT_UNROLL_M 1
  1329. #define XGEMM_DEFAULT_UNROLL_M 1
  1330. #else
  1331. #define SGEMM_DEFAULT_UNROLL_M 8
  1332. #define DGEMM_DEFAULT_UNROLL_M 4
  1333. #define QGEMM_DEFAULT_UNROLL_M 2
  1334. #define CGEMM_DEFAULT_UNROLL_M 4
  1335. #define ZGEMM_DEFAULT_UNROLL_M 2
  1336. #define XGEMM_DEFAULT_UNROLL_M 1
  1337. #endif
  1338. #define SGEMM_DEFAULT_P sgemm_p
  1339. #define DGEMM_DEFAULT_P dgemm_p
  1340. #define QGEMM_DEFAULT_P qgemm_p
  1341. #define CGEMM_DEFAULT_P cgemm_p
  1342. #define ZGEMM_DEFAULT_P zgemm_p
  1343. #define XGEMM_DEFAULT_P xgemm_p
  1344. #define SGEMM_DEFAULT_R sgemm_r
  1345. #define DGEMM_DEFAULT_R dgemm_r
  1346. #define QGEMM_DEFAULT_R qgemm_r
  1347. #define CGEMM_DEFAULT_R cgemm_r
  1348. #define ZGEMM_DEFAULT_R zgemm_r
  1349. #define XGEMM_DEFAULT_R xgemm_r
  1350. #define SGEMM_DEFAULT_Q 128
  1351. #define DGEMM_DEFAULT_Q 128
  1352. #define QGEMM_DEFAULT_Q 128
  1353. #define CGEMM_DEFAULT_Q 128
  1354. #define ZGEMM_DEFAULT_Q 128
  1355. #define XGEMM_DEFAULT_Q 128
  1356. #define SYMV_P 16
  1357. #endif
  1358. #ifndef QGEMM_DEFAULT_UNROLL_M
  1359. #define QGEMM_DEFAULT_UNROLL_M 2
  1360. #endif
  1361. #ifndef QGEMM_DEFAULT_UNROLL_N
  1362. #define QGEMM_DEFAULT_UNROLL_N 2
  1363. #endif
  1364. #ifndef XGEMM_DEFAULT_UNROLL_M
  1365. #define XGEMM_DEFAULT_UNROLL_M 2
  1366. #endif
  1367. #ifndef XGEMM_DEFAULT_UNROLL_N
  1368. #define XGEMM_DEFAULT_UNROLL_N 2
  1369. #endif
  1370. #ifndef HAVE_SSE2
  1371. #define SHUFPD_0 shufps $0x44,
  1372. #define SHUFPD_1 shufps $0x4e,
  1373. #define SHUFPD_2 shufps $0xe4,
  1374. #define SHUFPD_3 shufps $0xee,
  1375. #endif
  1376. #ifndef SHUFPD_0
  1377. #define SHUFPD_0 shufpd $0,
  1378. #endif
  1379. #ifndef SHUFPD_1
  1380. #define SHUFPD_1 shufpd $1,
  1381. #endif
  1382. #ifndef SHUFPD_2
  1383. #define SHUFPD_2 shufpd $2,
  1384. #endif
  1385. #ifndef SHUFPD_3
  1386. #define SHUFPD_3 shufpd $3,
  1387. #endif
  1388. #ifndef SHUFPS_39
  1389. #define SHUFPS_39 shufps $0x39,
  1390. #endif
  1391. #endif