You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 43 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794
  1. /*****************************************************************************
  2. Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the ISCAS nor the names of its contributors may
  14. be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************************/
  27. /*********************************************************************/
  28. /* Copyright 2009, 2010 The University of Texas at Austin. */
  29. /* All rights reserved. */
  30. /* */
  31. /* Redistribution and use in source and binary forms, with or */
  32. /* without modification, are permitted provided that the following */
  33. /* conditions are met: */
  34. /* */
  35. /* 1. Redistributions of source code must retain the above */
  36. /* copyright notice, this list of conditions and the following */
  37. /* disclaimer. */
  38. /* */
  39. /* 2. Redistributions in binary form must reproduce the above */
  40. /* copyright notice, this list of conditions and the following */
  41. /* disclaimer in the documentation and/or other materials */
  42. /* provided with the distribution. */
  43. /* */
  44. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  45. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  46. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  47. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  48. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  49. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  50. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  51. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  52. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  53. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  54. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  55. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  56. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  57. /* POSSIBILITY OF SUCH DAMAGE. */
  58. /* */
  59. /* The views and conclusions contained in the software and */
  60. /* documentation are those of the authors and should not be */
  61. /* interpreted as representing official policies, either expressed */
  62. /* or implied, of The University of Texas at Austin. */
  63. /*********************************************************************/
  64. #ifndef PARAM_H
  65. #define PARAM_H
  66. #ifdef OPTERON
  67. #define SNUMOPT 4
  68. #define DNUMOPT 2
  69. #define GEMM_DEFAULT_OFFSET_A 64
  70. #define GEMM_DEFAULT_OFFSET_B 256
  71. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  72. #define SGEMM_DEFAULT_UNROLL_N 4
  73. #define DGEMM_DEFAULT_UNROLL_N 4
  74. #define QGEMM_DEFAULT_UNROLL_N 2
  75. #define CGEMM_DEFAULT_UNROLL_N 2
  76. #define ZGEMM_DEFAULT_UNROLL_N 2
  77. #define XGEMM_DEFAULT_UNROLL_N 1
  78. #ifdef ARCH_X86
  79. #define SGEMM_DEFAULT_UNROLL_M 4
  80. #define DGEMM_DEFAULT_UNROLL_M 2
  81. #define QGEMM_DEFAULT_UNROLL_M 2
  82. #define CGEMM_DEFAULT_UNROLL_M 2
  83. #define ZGEMM_DEFAULT_UNROLL_M 1
  84. #define XGEMM_DEFAULT_UNROLL_M 1
  85. #else
  86. #define SGEMM_DEFAULT_UNROLL_M 8
  87. #define DGEMM_DEFAULT_UNROLL_M 4
  88. #define QGEMM_DEFAULT_UNROLL_M 2
  89. #define CGEMM_DEFAULT_UNROLL_M 4
  90. #define ZGEMM_DEFAULT_UNROLL_M 2
  91. #define XGEMM_DEFAULT_UNROLL_M 1
  92. #endif
  93. #define SGEMM_DEFAULT_P sgemm_p
  94. #define DGEMM_DEFAULT_P dgemm_p
  95. #define QGEMM_DEFAULT_P qgemm_p
  96. #define CGEMM_DEFAULT_P cgemm_p
  97. #define ZGEMM_DEFAULT_P zgemm_p
  98. #define XGEMM_DEFAULT_P xgemm_p
  99. #define SGEMM_DEFAULT_R sgemm_r
  100. #define DGEMM_DEFAULT_R dgemm_r
  101. #define QGEMM_DEFAULT_R qgemm_r
  102. #define CGEMM_DEFAULT_R cgemm_r
  103. #define ZGEMM_DEFAULT_R zgemm_r
  104. #define XGEMM_DEFAULT_R xgemm_r
  105. #ifdef ALLOC_HUGETLB
  106. #define SGEMM_DEFAULT_Q 248
  107. #define DGEMM_DEFAULT_Q 248
  108. #define QGEMM_DEFAULT_Q 248
  109. #define CGEMM_DEFAULT_Q 248
  110. #define ZGEMM_DEFAULT_Q 248
  111. #define XGEMM_DEFAULT_Q 248
  112. #else
  113. #define SGEMM_DEFAULT_Q 240
  114. #define DGEMM_DEFAULT_Q 240
  115. #define QGEMM_DEFAULT_Q 240
  116. #define CGEMM_DEFAULT_Q 240
  117. #define ZGEMM_DEFAULT_Q 240
  118. #define XGEMM_DEFAULT_Q 240
  119. #endif
  120. #define SYMV_P 16
  121. #define HAVE_EXCLUSIVE_CACHE
  122. #endif
  123. #if defined(BARCELONA) || defined(SHANGHAI)
  124. #define SNUMOPT 8
  125. #define DNUMOPT 4
  126. #define GEMM_DEFAULT_OFFSET_A 64
  127. #define GEMM_DEFAULT_OFFSET_B 832
  128. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  129. #define SGEMM_DEFAULT_UNROLL_N 4
  130. #define DGEMM_DEFAULT_UNROLL_N 4
  131. #define QGEMM_DEFAULT_UNROLL_N 2
  132. #define CGEMM_DEFAULT_UNROLL_N 2
  133. #define ZGEMM_DEFAULT_UNROLL_N 2
  134. #define XGEMM_DEFAULT_UNROLL_N 1
  135. #ifdef ARCH_X86
  136. #define SGEMM_DEFAULT_UNROLL_M 4
  137. #define DGEMM_DEFAULT_UNROLL_M 2
  138. #define QGEMM_DEFAULT_UNROLL_M 2
  139. #define CGEMM_DEFAULT_UNROLL_M 2
  140. #define ZGEMM_DEFAULT_UNROLL_M 1
  141. #define XGEMM_DEFAULT_UNROLL_M 1
  142. #else
  143. #define SGEMM_DEFAULT_UNROLL_M 8
  144. #define DGEMM_DEFAULT_UNROLL_M 4
  145. #define QGEMM_DEFAULT_UNROLL_M 2
  146. #define CGEMM_DEFAULT_UNROLL_M 4
  147. #define ZGEMM_DEFAULT_UNROLL_M 2
  148. #define XGEMM_DEFAULT_UNROLL_M 1
  149. #endif
  150. #if 0
  151. #define SGEMM_DEFAULT_P 496
  152. #define DGEMM_DEFAULT_P 248
  153. #define QGEMM_DEFAULT_P 124
  154. #define CGEMM_DEFAULT_P 248
  155. #define ZGEMM_DEFAULT_P 124
  156. #define XGEMM_DEFAULT_P 62
  157. #define SGEMM_DEFAULT_Q 248
  158. #define DGEMM_DEFAULT_Q 248
  159. #define QGEMM_DEFAULT_Q 248
  160. #define CGEMM_DEFAULT_Q 248
  161. #define ZGEMM_DEFAULT_Q 248
  162. #define XGEMM_DEFAULT_Q 248
  163. #else
  164. #define SGEMM_DEFAULT_P 448
  165. #define DGEMM_DEFAULT_P 224
  166. #define QGEMM_DEFAULT_P 112
  167. #define CGEMM_DEFAULT_P 224
  168. #define ZGEMM_DEFAULT_P 112
  169. #define XGEMM_DEFAULT_P 56
  170. #define SGEMM_DEFAULT_Q 224
  171. #define DGEMM_DEFAULT_Q 224
  172. #define QGEMM_DEFAULT_Q 224
  173. #define CGEMM_DEFAULT_Q 224
  174. #define ZGEMM_DEFAULT_Q 224
  175. #define XGEMM_DEFAULT_Q 224
  176. #endif
  177. #define SGEMM_DEFAULT_R sgemm_r
  178. #define QGEMM_DEFAULT_R qgemm_r
  179. #define DGEMM_DEFAULT_R dgemm_r
  180. #define CGEMM_DEFAULT_R cgemm_r
  181. #define ZGEMM_DEFAULT_R zgemm_r
  182. #define XGEMM_DEFAULT_R xgemm_r
  183. #define SYMV_P 16
  184. #define HAVE_EXCLUSIVE_CACHE
  185. #define GEMM_THREAD gemm_thread_mn
  186. #endif
  187. #if defined(BOBCATE)
  188. #define SNUMOPT 8
  189. #define DNUMOPT 4
  190. #define GEMM_DEFAULT_OFFSET_A 64
  191. #define GEMM_DEFAULT_OFFSET_B 832
  192. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  193. #define SGEMM_DEFAULT_UNROLL_N 4
  194. #define DGEMM_DEFAULT_UNROLL_N 4
  195. #define QGEMM_DEFAULT_UNROLL_N 2
  196. #define CGEMM_DEFAULT_UNROLL_N 2
  197. #define ZGEMM_DEFAULT_UNROLL_N 2
  198. #define XGEMM_DEFAULT_UNROLL_N 1
  199. #ifdef ARCH_X86
  200. #define SGEMM_DEFAULT_UNROLL_M 4
  201. #define DGEMM_DEFAULT_UNROLL_M 2
  202. #define QGEMM_DEFAULT_UNROLL_M 2
  203. #define CGEMM_DEFAULT_UNROLL_M 2
  204. #define ZGEMM_DEFAULT_UNROLL_M 1
  205. #define XGEMM_DEFAULT_UNROLL_M 1
  206. #else
  207. #define SGEMM_DEFAULT_UNROLL_M 8
  208. #define DGEMM_DEFAULT_UNROLL_M 4
  209. #define QGEMM_DEFAULT_UNROLL_M 2
  210. #define CGEMM_DEFAULT_UNROLL_M 4
  211. #define ZGEMM_DEFAULT_UNROLL_M 2
  212. #define XGEMM_DEFAULT_UNROLL_M 1
  213. #endif
  214. #define SGEMM_DEFAULT_P 448
  215. #define DGEMM_DEFAULT_P 224
  216. #define QGEMM_DEFAULT_P 112
  217. #define CGEMM_DEFAULT_P 224
  218. #define ZGEMM_DEFAULT_P 112
  219. #define XGEMM_DEFAULT_P 56
  220. #define SGEMM_DEFAULT_Q 224
  221. #define DGEMM_DEFAULT_Q 224
  222. #define QGEMM_DEFAULT_Q 224
  223. #define CGEMM_DEFAULT_Q 224
  224. #define ZGEMM_DEFAULT_Q 224
  225. #define XGEMM_DEFAULT_Q 224
  226. #define SGEMM_DEFAULT_R sgemm_r
  227. #define QGEMM_DEFAULT_R qgemm_r
  228. #define DGEMM_DEFAULT_R dgemm_r
  229. #define CGEMM_DEFAULT_R cgemm_r
  230. #define ZGEMM_DEFAULT_R zgemm_r
  231. #define XGEMM_DEFAULT_R xgemm_r
  232. #define SYMV_P 16
  233. #define HAVE_EXCLUSIVE_CACHE
  234. #define GEMM_THREAD gemm_thread_mn
  235. #endif
  236. #ifdef ATHLON
  237. #define SNUMOPT 4
  238. #define DNUMOPT 2
  239. #define GEMM_DEFAULT_OFFSET_A 0
  240. #define GEMM_DEFAULT_OFFSET_B 384
  241. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  242. #define SGEMM_DEFAULT_UNROLL_N 4
  243. #define DGEMM_DEFAULT_UNROLL_N 4
  244. #define QGEMM_DEFAULT_UNROLL_N 2
  245. #define CGEMM_DEFAULT_UNROLL_N 2
  246. #define ZGEMM_DEFAULT_UNROLL_N 2
  247. #define XGEMM_DEFAULT_UNROLL_N 1
  248. #define SGEMM_DEFAULT_UNROLL_M 2
  249. #define DGEMM_DEFAULT_UNROLL_M 1
  250. #define QGEMM_DEFAULT_UNROLL_M 2
  251. #define CGEMM_DEFAULT_UNROLL_M 1
  252. #define ZGEMM_DEFAULT_UNROLL_M 1
  253. #define XGEMM_DEFAULT_UNROLL_M 1
  254. #define SGEMM_DEFAULT_R sgemm_r
  255. #define DGEMM_DEFAULT_R dgemm_r
  256. #define QGEMM_DEFAULT_R qgemm_r
  257. #define CGEMM_DEFAULT_R cgemm_r
  258. #define ZGEMM_DEFAULT_R zgemm_r
  259. #define XGEMM_DEFAULT_R xgemm_r
  260. #define SGEMM_DEFAULT_P 208
  261. #define DGEMM_DEFAULT_P 104
  262. #define QGEMM_DEFAULT_P 56
  263. #define CGEMM_DEFAULT_P 104
  264. #define ZGEMM_DEFAULT_P 56
  265. #define XGEMM_DEFAULT_P 28
  266. #define SGEMM_DEFAULT_Q 208
  267. #define DGEMM_DEFAULT_Q 208
  268. #define QGEMM_DEFAULT_Q 208
  269. #define CGEMM_DEFAULT_Q 208
  270. #define ZGEMM_DEFAULT_Q 208
  271. #define XGEMM_DEFAULT_Q 208
  272. #define SYMV_P 16
  273. #define HAVE_EXCLUSIVE_CACHE
  274. #endif
  275. #ifdef VIAC3
  276. #define SNUMOPT 2
  277. #define DNUMOPT 1
  278. #define GEMM_DEFAULT_OFFSET_A 0
  279. #define GEMM_DEFAULT_OFFSET_B 256
  280. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  281. #define SGEMM_DEFAULT_UNROLL_N 4
  282. #define DGEMM_DEFAULT_UNROLL_N 4
  283. #define QGEMM_DEFAULT_UNROLL_N 2
  284. #define CGEMM_DEFAULT_UNROLL_N 2
  285. #define ZGEMM_DEFAULT_UNROLL_N 2
  286. #define XGEMM_DEFAULT_UNROLL_N 1
  287. #define SGEMM_DEFAULT_UNROLL_M 2
  288. #define DGEMM_DEFAULT_UNROLL_M 1
  289. #define QGEMM_DEFAULT_UNROLL_M 2
  290. #define CGEMM_DEFAULT_UNROLL_M 1
  291. #define ZGEMM_DEFAULT_UNROLL_M 1
  292. #define XGEMM_DEFAULT_UNROLL_M 1
  293. #define SGEMM_DEFAULT_R sgemm_r
  294. #define DGEMM_DEFAULT_R dgemm_r
  295. #define QGEMM_DEFAULT_R qgemm_r
  296. #define CGEMM_DEFAULT_R cgemm_r
  297. #define ZGEMM_DEFAULT_R zgemm_r
  298. #define XGEMM_DEFAULT_R xgemm_r
  299. #define SGEMM_DEFAULT_P 128
  300. #define DGEMM_DEFAULT_P 128
  301. #define QGEMM_DEFAULT_P 128
  302. #define CGEMM_DEFAULT_P 128
  303. #define ZGEMM_DEFAULT_P 128
  304. #define XGEMM_DEFAULT_P 128
  305. #define SGEMM_DEFAULT_Q 512
  306. #define DGEMM_DEFAULT_Q 256
  307. #define QGEMM_DEFAULT_Q 256
  308. #define CGEMM_DEFAULT_Q 256
  309. #define ZGEMM_DEFAULT_Q 128
  310. #define XGEMM_DEFAULT_Q 128
  311. #define SYMV_P 16
  312. #endif
  313. #ifdef NANO
  314. #define SNUMOPT 4
  315. #define DNUMOPT 2
  316. #define GEMM_DEFAULT_OFFSET_A 64
  317. #define GEMM_DEFAULT_OFFSET_B 256
  318. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  319. #ifdef ARCH_X86
  320. #define SGEMM_DEFAULT_UNROLL_N 4
  321. #define DGEMM_DEFAULT_UNROLL_N 4
  322. #define QGEMM_DEFAULT_UNROLL_N 2
  323. #define CGEMM_DEFAULT_UNROLL_N 2
  324. #define ZGEMM_DEFAULT_UNROLL_N 2
  325. #define XGEMM_DEFAULT_UNROLL_N 1
  326. #define SGEMM_DEFAULT_UNROLL_M 4
  327. #define DGEMM_DEFAULT_UNROLL_M 2
  328. #define QGEMM_DEFAULT_UNROLL_M 2
  329. #define CGEMM_DEFAULT_UNROLL_M 2
  330. #define ZGEMM_DEFAULT_UNROLL_M 1
  331. #define XGEMM_DEFAULT_UNROLL_M 1
  332. #else
  333. #define SGEMM_DEFAULT_UNROLL_N 8
  334. #define DGEMM_DEFAULT_UNROLL_N 4
  335. #define QGEMM_DEFAULT_UNROLL_N 2
  336. #define CGEMM_DEFAULT_UNROLL_N 4
  337. #define ZGEMM_DEFAULT_UNROLL_N 2
  338. #define XGEMM_DEFAULT_UNROLL_N 1
  339. #define SGEMM_DEFAULT_UNROLL_M 4
  340. #define DGEMM_DEFAULT_UNROLL_M 4
  341. #define QGEMM_DEFAULT_UNROLL_M 2
  342. #define CGEMM_DEFAULT_UNROLL_M 2
  343. #define ZGEMM_DEFAULT_UNROLL_M 2
  344. #define XGEMM_DEFAULT_UNROLL_M 1
  345. #endif
  346. #define SGEMM_DEFAULT_P 288
  347. #define DGEMM_DEFAULT_P 288
  348. #define QGEMM_DEFAULT_P 288
  349. #define CGEMM_DEFAULT_P 288
  350. #define ZGEMM_DEFAULT_P 288
  351. #define XGEMM_DEFAULT_P 288
  352. #define SGEMM_DEFAULT_R sgemm_r
  353. #define DGEMM_DEFAULT_R dgemm_r
  354. #define QGEMM_DEFAULT_R qgemm_r
  355. #define CGEMM_DEFAULT_R cgemm_r
  356. #define ZGEMM_DEFAULT_R zgemm_r
  357. #define XGEMM_DEFAULT_R xgemm_r
  358. #define SGEMM_DEFAULT_Q 256
  359. #define DGEMM_DEFAULT_Q 128
  360. #define QGEMM_DEFAULT_Q 64
  361. #define CGEMM_DEFAULT_Q 128
  362. #define ZGEMM_DEFAULT_Q 64
  363. #define XGEMM_DEFAULT_Q 32
  364. #define SYMV_P 16
  365. #define HAVE_EXCLUSIVE_CACHE
  366. #endif
  367. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  368. #ifdef HAVE_SSE
  369. #define SNUMOPT 2
  370. #else
  371. #define SNUMOPT 1
  372. #endif
  373. #define DNUMOPT 1
  374. #define GEMM_DEFAULT_OFFSET_A 0
  375. #define GEMM_DEFAULT_OFFSET_B 0
  376. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  377. #ifdef HAVE_SSE
  378. #define SGEMM_DEFAULT_UNROLL_M 8
  379. #define CGEMM_DEFAULT_UNROLL_M 4
  380. #else
  381. #define SGEMM_DEFAULT_UNROLL_M 4
  382. #define CGEMM_DEFAULT_UNROLL_M 2
  383. #endif
  384. #define DGEMM_DEFAULT_UNROLL_M 2
  385. #define SGEMM_DEFAULT_UNROLL_N 2
  386. #define DGEMM_DEFAULT_UNROLL_N 2
  387. #define QGEMM_DEFAULT_UNROLL_M 2
  388. #define QGEMM_DEFAULT_UNROLL_N 2
  389. #define CGEMM_DEFAULT_UNROLL_N 1
  390. #define ZGEMM_DEFAULT_UNROLL_M 1
  391. #define ZGEMM_DEFAULT_UNROLL_N 1
  392. #define XGEMM_DEFAULT_UNROLL_M 1
  393. #define XGEMM_DEFAULT_UNROLL_N 1
  394. #define SGEMM_DEFAULT_P sgemm_p
  395. #define SGEMM_DEFAULT_Q 256
  396. #define SGEMM_DEFAULT_R sgemm_r
  397. #define DGEMM_DEFAULT_P dgemm_p
  398. #define DGEMM_DEFAULT_Q 256
  399. #define DGEMM_DEFAULT_R dgemm_r
  400. #define QGEMM_DEFAULT_P qgemm_p
  401. #define QGEMM_DEFAULT_Q 256
  402. #define QGEMM_DEFAULT_R qgemm_r
  403. #define CGEMM_DEFAULT_P cgemm_p
  404. #define CGEMM_DEFAULT_Q 256
  405. #define CGEMM_DEFAULT_R cgemm_r
  406. #define ZGEMM_DEFAULT_P zgemm_p
  407. #define ZGEMM_DEFAULT_Q 256
  408. #define ZGEMM_DEFAULT_R zgemm_r
  409. #define XGEMM_DEFAULT_P xgemm_p
  410. #define XGEMM_DEFAULT_Q 256
  411. #define XGEMM_DEFAULT_R xgemm_r
  412. #define SYMV_P 4
  413. #endif
  414. #ifdef PENTIUMM
  415. #define SNUMOPT 2
  416. #define DNUMOPT 1
  417. #define GEMM_DEFAULT_OFFSET_A 0
  418. #define GEMM_DEFAULT_OFFSET_B 0
  419. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  420. #ifdef CORE_YONAH
  421. #define SGEMM_DEFAULT_UNROLL_M 4
  422. #define SGEMM_DEFAULT_UNROLL_N 4
  423. #define DGEMM_DEFAULT_UNROLL_M 2
  424. #define DGEMM_DEFAULT_UNROLL_N 4
  425. #define QGEMM_DEFAULT_UNROLL_M 2
  426. #define QGEMM_DEFAULT_UNROLL_N 2
  427. #define CGEMM_DEFAULT_UNROLL_M 2
  428. #define CGEMM_DEFAULT_UNROLL_N 2
  429. #define ZGEMM_DEFAULT_UNROLL_M 1
  430. #define ZGEMM_DEFAULT_UNROLL_N 2
  431. #define XGEMM_DEFAULT_UNROLL_M 1
  432. #define XGEMM_DEFAULT_UNROLL_N 1
  433. #else
  434. #define SGEMM_DEFAULT_UNROLL_M 8
  435. #define SGEMM_DEFAULT_UNROLL_N 2
  436. #define DGEMM_DEFAULT_UNROLL_M 2
  437. #define DGEMM_DEFAULT_UNROLL_N 2
  438. #define QGEMM_DEFAULT_UNROLL_M 2
  439. #define QGEMM_DEFAULT_UNROLL_N 2
  440. #define CGEMM_DEFAULT_UNROLL_M 4
  441. #define CGEMM_DEFAULT_UNROLL_N 1
  442. #define ZGEMM_DEFAULT_UNROLL_M 1
  443. #define ZGEMM_DEFAULT_UNROLL_N 1
  444. #define XGEMM_DEFAULT_UNROLL_M 1
  445. #define XGEMM_DEFAULT_UNROLL_N 1
  446. #endif
  447. #define SGEMM_DEFAULT_P sgemm_p
  448. #define SGEMM_DEFAULT_Q 256
  449. #define SGEMM_DEFAULT_R sgemm_r
  450. #define DGEMM_DEFAULT_P dgemm_p
  451. #define DGEMM_DEFAULT_Q 256
  452. #define DGEMM_DEFAULT_R dgemm_r
  453. #define QGEMM_DEFAULT_P qgemm_p
  454. #define QGEMM_DEFAULT_Q 256
  455. #define QGEMM_DEFAULT_R qgemm_r
  456. #define CGEMM_DEFAULT_P cgemm_p
  457. #define CGEMM_DEFAULT_Q 256
  458. #define CGEMM_DEFAULT_R cgemm_r
  459. #define ZGEMM_DEFAULT_P zgemm_p
  460. #define ZGEMM_DEFAULT_Q 256
  461. #define ZGEMM_DEFAULT_R zgemm_r
  462. #define XGEMM_DEFAULT_P xgemm_p
  463. #define XGEMM_DEFAULT_Q 256
  464. #define XGEMM_DEFAULT_R xgemm_r
  465. #define SYMV_P 4
  466. #endif
  467. #ifdef CORE_NORTHWOOD
  468. #define SNUMOPT 4
  469. #define DNUMOPT 2
  470. #define GEMM_DEFAULT_OFFSET_A 0
  471. #define GEMM_DEFAULT_OFFSET_B 32
  472. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  473. #define SYMV_P 8
  474. #define SGEMM_DEFAULT_UNROLL_M 8
  475. #define DGEMM_DEFAULT_UNROLL_M 4
  476. #define QGEMM_DEFAULT_UNROLL_M 2
  477. #define CGEMM_DEFAULT_UNROLL_M 4
  478. #define ZGEMM_DEFAULT_UNROLL_M 2
  479. #define XGEMM_DEFAULT_UNROLL_M 1
  480. #define SGEMM_DEFAULT_UNROLL_N 2
  481. #define DGEMM_DEFAULT_UNROLL_N 2
  482. #define QGEMM_DEFAULT_UNROLL_N 2
  483. #define CGEMM_DEFAULT_UNROLL_N 1
  484. #define ZGEMM_DEFAULT_UNROLL_N 1
  485. #define XGEMM_DEFAULT_UNROLL_N 1
  486. #define SGEMM_DEFAULT_P sgemm_p
  487. #define SGEMM_DEFAULT_R sgemm_r
  488. #define DGEMM_DEFAULT_P dgemm_p
  489. #define DGEMM_DEFAULT_R dgemm_r
  490. #define QGEMM_DEFAULT_P qgemm_p
  491. #define QGEMM_DEFAULT_R qgemm_r
  492. #define CGEMM_DEFAULT_P cgemm_p
  493. #define CGEMM_DEFAULT_R cgemm_r
  494. #define ZGEMM_DEFAULT_P zgemm_p
  495. #define ZGEMM_DEFAULT_R zgemm_r
  496. #define XGEMM_DEFAULT_P xgemm_p
  497. #define XGEMM_DEFAULT_R xgemm_r
  498. #define SGEMM_DEFAULT_Q 128
  499. #define DGEMM_DEFAULT_Q 128
  500. #define QGEMM_DEFAULT_Q 128
  501. #define CGEMM_DEFAULT_Q 128
  502. #define ZGEMM_DEFAULT_Q 128
  503. #define XGEMM_DEFAULT_Q 128
  504. #endif
  505. #ifdef CORE_PRESCOTT
  506. #define SNUMOPT 4
  507. #define DNUMOPT 2
  508. #ifndef __64BIT__
  509. #define GEMM_DEFAULT_OFFSET_A 128
  510. #define GEMM_DEFAULT_OFFSET_B 192
  511. #else
  512. #define GEMM_DEFAULT_OFFSET_A 0
  513. #define GEMM_DEFAULT_OFFSET_B 256
  514. #endif
  515. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  516. #define SYMV_P 8
  517. #ifdef ARCH_X86
  518. #define SGEMM_DEFAULT_UNROLL_M 4
  519. #define DGEMM_DEFAULT_UNROLL_M 2
  520. #define QGEMM_DEFAULT_UNROLL_M 2
  521. #define CGEMM_DEFAULT_UNROLL_M 2
  522. #define ZGEMM_DEFAULT_UNROLL_M 1
  523. #define XGEMM_DEFAULT_UNROLL_M 1
  524. #else
  525. #define SGEMM_DEFAULT_UNROLL_M 8
  526. #define DGEMM_DEFAULT_UNROLL_M 4
  527. #define QGEMM_DEFAULT_UNROLL_M 2
  528. #define CGEMM_DEFAULT_UNROLL_M 4
  529. #define ZGEMM_DEFAULT_UNROLL_M 2
  530. #define XGEMM_DEFAULT_UNROLL_M 1
  531. #endif
  532. #define SGEMM_DEFAULT_UNROLL_N 4
  533. #define DGEMM_DEFAULT_UNROLL_N 4
  534. #define QGEMM_DEFAULT_UNROLL_N 2
  535. #define CGEMM_DEFAULT_UNROLL_N 2
  536. #define ZGEMM_DEFAULT_UNROLL_N 2
  537. #define XGEMM_DEFAULT_UNROLL_N 1
  538. #define SGEMM_DEFAULT_P sgemm_p
  539. #define SGEMM_DEFAULT_R sgemm_r
  540. #define DGEMM_DEFAULT_P dgemm_p
  541. #define DGEMM_DEFAULT_R dgemm_r
  542. #define QGEMM_DEFAULT_P qgemm_p
  543. #define QGEMM_DEFAULT_R qgemm_r
  544. #define CGEMM_DEFAULT_P cgemm_p
  545. #define CGEMM_DEFAULT_R cgemm_r
  546. #define ZGEMM_DEFAULT_P zgemm_p
  547. #define ZGEMM_DEFAULT_R zgemm_r
  548. #define XGEMM_DEFAULT_P xgemm_p
  549. #define XGEMM_DEFAULT_R xgemm_r
  550. #define SGEMM_DEFAULT_Q 128
  551. #define DGEMM_DEFAULT_Q 128
  552. #define QGEMM_DEFAULT_Q 128
  553. #define CGEMM_DEFAULT_Q 128
  554. #define ZGEMM_DEFAULT_Q 128
  555. #define XGEMM_DEFAULT_Q 128
  556. #endif
  557. #ifdef CORE2
  558. #define SNUMOPT 8
  559. #define DNUMOPT 4
  560. #define GEMM_DEFAULT_OFFSET_A 448
  561. #define GEMM_DEFAULT_OFFSET_B 128
  562. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  563. #define SYMV_P 8
  564. #define SWITCH_RATIO 4
  565. #ifdef ARCH_X86
  566. #define SGEMM_DEFAULT_UNROLL_M 8
  567. #define DGEMM_DEFAULT_UNROLL_M 4
  568. #define QGEMM_DEFAULT_UNROLL_M 2
  569. #define CGEMM_DEFAULT_UNROLL_M 4
  570. #define ZGEMM_DEFAULT_UNROLL_M 2
  571. #define XGEMM_DEFAULT_UNROLL_M 1
  572. #define SGEMM_DEFAULT_UNROLL_N 2
  573. #define DGEMM_DEFAULT_UNROLL_N 2
  574. #define QGEMM_DEFAULT_UNROLL_N 2
  575. #define CGEMM_DEFAULT_UNROLL_N 1
  576. #define ZGEMM_DEFAULT_UNROLL_N 1
  577. #define XGEMM_DEFAULT_UNROLL_N 1
  578. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  579. #else
  580. #define SGEMM_DEFAULT_UNROLL_M 8
  581. #define DGEMM_DEFAULT_UNROLL_M 4
  582. #define QGEMM_DEFAULT_UNROLL_M 2
  583. #define CGEMM_DEFAULT_UNROLL_M 4
  584. #define ZGEMM_DEFAULT_UNROLL_M 2
  585. #define XGEMM_DEFAULT_UNROLL_M 1
  586. #define SGEMM_DEFAULT_UNROLL_N 4
  587. #define DGEMM_DEFAULT_UNROLL_N 4
  588. #define QGEMM_DEFAULT_UNROLL_N 2
  589. #define CGEMM_DEFAULT_UNROLL_N 2
  590. #define ZGEMM_DEFAULT_UNROLL_N 2
  591. #define XGEMM_DEFAULT_UNROLL_N 1
  592. #endif
  593. #define SGEMM_DEFAULT_P sgemm_p
  594. #define SGEMM_DEFAULT_R sgemm_r
  595. #define DGEMM_DEFAULT_P dgemm_p
  596. #define DGEMM_DEFAULT_R dgemm_r
  597. #define QGEMM_DEFAULT_P qgemm_p
  598. #define QGEMM_DEFAULT_R qgemm_r
  599. #define CGEMM_DEFAULT_P cgemm_p
  600. #define CGEMM_DEFAULT_R cgemm_r
  601. #define ZGEMM_DEFAULT_P zgemm_p
  602. #define ZGEMM_DEFAULT_R zgemm_r
  603. #define XGEMM_DEFAULT_P xgemm_p
  604. #define XGEMM_DEFAULT_R xgemm_r
  605. #define SGEMM_DEFAULT_Q 256
  606. #define DGEMM_DEFAULT_Q 256
  607. #define QGEMM_DEFAULT_Q 256
  608. #define CGEMM_DEFAULT_Q 256
  609. #define ZGEMM_DEFAULT_Q 256
  610. #define XGEMM_DEFAULT_Q 256
  611. #endif
  612. #ifdef PENRYN
  613. #define SNUMOPT 8
  614. #define DNUMOPT 4
  615. #define GEMM_DEFAULT_OFFSET_A 128
  616. #define GEMM_DEFAULT_OFFSET_B 0
  617. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  618. #define SYMV_P 8
  619. #define SWITCH_RATIO 4
  620. #ifdef ARCH_X86
  621. #define SGEMM_DEFAULT_UNROLL_M 4
  622. #define DGEMM_DEFAULT_UNROLL_M 2
  623. #define QGEMM_DEFAULT_UNROLL_M 2
  624. #define CGEMM_DEFAULT_UNROLL_M 2
  625. #define ZGEMM_DEFAULT_UNROLL_M 1
  626. #define XGEMM_DEFAULT_UNROLL_M 1
  627. #define SGEMM_DEFAULT_UNROLL_N 4
  628. #define DGEMM_DEFAULT_UNROLL_N 4
  629. #define QGEMM_DEFAULT_UNROLL_N 2
  630. #define CGEMM_DEFAULT_UNROLL_N 2
  631. #define ZGEMM_DEFAULT_UNROLL_N 2
  632. #define XGEMM_DEFAULT_UNROLL_N 1
  633. #else
  634. #define SGEMM_DEFAULT_UNROLL_M 8
  635. #define DGEMM_DEFAULT_UNROLL_M 4
  636. #define QGEMM_DEFAULT_UNROLL_M 2
  637. #define CGEMM_DEFAULT_UNROLL_M 4
  638. #define ZGEMM_DEFAULT_UNROLL_M 2
  639. #define XGEMM_DEFAULT_UNROLL_M 1
  640. #define SGEMM_DEFAULT_UNROLL_N 4
  641. #define DGEMM_DEFAULT_UNROLL_N 4
  642. #define QGEMM_DEFAULT_UNROLL_N 2
  643. #define CGEMM_DEFAULT_UNROLL_N 2
  644. #define ZGEMM_DEFAULT_UNROLL_N 2
  645. #define XGEMM_DEFAULT_UNROLL_N 1
  646. #endif
  647. #define SGEMM_DEFAULT_P sgemm_p
  648. #define SGEMM_DEFAULT_R sgemm_r
  649. #define DGEMM_DEFAULT_P dgemm_p
  650. #define DGEMM_DEFAULT_R dgemm_r
  651. #define QGEMM_DEFAULT_P qgemm_p
  652. #define QGEMM_DEFAULT_R qgemm_r
  653. #define CGEMM_DEFAULT_P cgemm_p
  654. #define CGEMM_DEFAULT_R cgemm_r
  655. #define ZGEMM_DEFAULT_P zgemm_p
  656. #define ZGEMM_DEFAULT_R zgemm_r
  657. #define XGEMM_DEFAULT_P xgemm_p
  658. #define XGEMM_DEFAULT_R xgemm_r
  659. #define SGEMM_DEFAULT_Q 512
  660. #define DGEMM_DEFAULT_Q 256
  661. #define QGEMM_DEFAULT_Q 128
  662. #define CGEMM_DEFAULT_Q 512
  663. #define ZGEMM_DEFAULT_Q 256
  664. #define XGEMM_DEFAULT_Q 128
  665. #define GETRF_FACTOR 0.75
  666. #endif
  667. #ifdef DUNNINGTON
  668. #define SNUMOPT 8
  669. #define DNUMOPT 4
  670. #define GEMM_DEFAULT_OFFSET_A 128
  671. #define GEMM_DEFAULT_OFFSET_B 0
  672. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  673. #define SYMV_P 8
  674. #define SWITCH_RATIO 4
  675. #ifdef ARCH_X86
  676. #define SGEMM_DEFAULT_UNROLL_M 4
  677. #define DGEMM_DEFAULT_UNROLL_M 2
  678. #define QGEMM_DEFAULT_UNROLL_M 2
  679. #define CGEMM_DEFAULT_UNROLL_M 2
  680. #define ZGEMM_DEFAULT_UNROLL_M 1
  681. #define XGEMM_DEFAULT_UNROLL_M 1
  682. #define SGEMM_DEFAULT_UNROLL_N 4
  683. #define DGEMM_DEFAULT_UNROLL_N 4
  684. #define QGEMM_DEFAULT_UNROLL_N 2
  685. #define CGEMM_DEFAULT_UNROLL_N 2
  686. #define ZGEMM_DEFAULT_UNROLL_N 2
  687. #define XGEMM_DEFAULT_UNROLL_N 1
  688. #else
  689. #define SGEMM_DEFAULT_UNROLL_M 8
  690. #define DGEMM_DEFAULT_UNROLL_M 4
  691. #define QGEMM_DEFAULT_UNROLL_M 2
  692. #define CGEMM_DEFAULT_UNROLL_M 4
  693. #define ZGEMM_DEFAULT_UNROLL_M 2
  694. #define XGEMM_DEFAULT_UNROLL_M 1
  695. #define SGEMM_DEFAULT_UNROLL_N 4
  696. #define DGEMM_DEFAULT_UNROLL_N 4
  697. #define QGEMM_DEFAULT_UNROLL_N 2
  698. #define CGEMM_DEFAULT_UNROLL_N 2
  699. #define ZGEMM_DEFAULT_UNROLL_N 2
  700. #define XGEMM_DEFAULT_UNROLL_N 1
  701. #endif
  702. #define SGEMM_DEFAULT_P sgemm_p
  703. #define SGEMM_DEFAULT_R sgemm_r
  704. #define DGEMM_DEFAULT_P dgemm_p
  705. #define DGEMM_DEFAULT_R dgemm_r
  706. #define QGEMM_DEFAULT_P qgemm_p
  707. #define QGEMM_DEFAULT_R qgemm_r
  708. #define CGEMM_DEFAULT_P cgemm_p
  709. #define CGEMM_DEFAULT_R cgemm_r
  710. #define ZGEMM_DEFAULT_P zgemm_p
  711. #define ZGEMM_DEFAULT_R zgemm_r
  712. #define XGEMM_DEFAULT_P xgemm_p
  713. #define XGEMM_DEFAULT_R xgemm_r
  714. #define SGEMM_DEFAULT_Q 768
  715. #define DGEMM_DEFAULT_Q 384
  716. #define QGEMM_DEFAULT_Q 192
  717. #define CGEMM_DEFAULT_Q 768
  718. #define ZGEMM_DEFAULT_Q 384
  719. #define XGEMM_DEFAULT_Q 192
  720. #define GETRF_FACTOR 0.75
  721. #define GEMM_THREAD gemm_thread_mn
  722. #endif
  723. #ifdef NEHALEM
  724. #define SNUMOPT 8
  725. #define DNUMOPT 4
  726. #define GEMM_DEFAULT_OFFSET_A 32
  727. #define GEMM_DEFAULT_OFFSET_B 0
  728. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  729. #define SYMV_P 8
  730. #define SWITCH_RATIO 4
  731. #ifdef ARCH_X86
  732. #define SGEMM_DEFAULT_UNROLL_M 4
  733. #define DGEMM_DEFAULT_UNROLL_M 2
  734. #define QGEMM_DEFAULT_UNROLL_M 2
  735. #define CGEMM_DEFAULT_UNROLL_M 2
  736. #define ZGEMM_DEFAULT_UNROLL_M 1
  737. #define XGEMM_DEFAULT_UNROLL_M 1
  738. #define SGEMM_DEFAULT_UNROLL_N 4
  739. #define DGEMM_DEFAULT_UNROLL_N 4
  740. #define QGEMM_DEFAULT_UNROLL_N 2
  741. #define CGEMM_DEFAULT_UNROLL_N 2
  742. #define ZGEMM_DEFAULT_UNROLL_N 2
  743. #define XGEMM_DEFAULT_UNROLL_N 1
  744. #else
  745. #define SGEMM_DEFAULT_UNROLL_M 4
  746. #define DGEMM_DEFAULT_UNROLL_M 2
  747. #define QGEMM_DEFAULT_UNROLL_M 2
  748. #define CGEMM_DEFAULT_UNROLL_M 2
  749. #define ZGEMM_DEFAULT_UNROLL_M 1
  750. #define XGEMM_DEFAULT_UNROLL_M 1
  751. #define SGEMM_DEFAULT_UNROLL_N 8
  752. #define DGEMM_DEFAULT_UNROLL_N 8
  753. #define QGEMM_DEFAULT_UNROLL_N 2
  754. #define CGEMM_DEFAULT_UNROLL_N 4
  755. #define ZGEMM_DEFAULT_UNROLL_N 4
  756. #define XGEMM_DEFAULT_UNROLL_N 1
  757. #endif
  758. #define SGEMM_DEFAULT_P 504
  759. #define SGEMM_DEFAULT_R sgemm_r
  760. #define DGEMM_DEFAULT_P 504
  761. #define DGEMM_DEFAULT_R dgemm_r
  762. #define QGEMM_DEFAULT_P 504
  763. #define QGEMM_DEFAULT_R qgemm_r
  764. #define CGEMM_DEFAULT_P 252
  765. #define CGEMM_DEFAULT_R cgemm_r
  766. #define ZGEMM_DEFAULT_P 252
  767. #define ZGEMM_DEFAULT_R zgemm_r
  768. #define XGEMM_DEFAULT_P 252
  769. #define XGEMM_DEFAULT_R xgemm_r
  770. #define SGEMM_DEFAULT_Q 512
  771. #define DGEMM_DEFAULT_Q 256
  772. #define QGEMM_DEFAULT_Q 128
  773. #define CGEMM_DEFAULT_Q 512
  774. #define ZGEMM_DEFAULT_Q 256
  775. #define XGEMM_DEFAULT_Q 128
  776. #define GETRF_FACTOR 0.72
  777. #endif
  778. #ifdef SANDYBRIDGE
  779. #define SNUMOPT 8
  780. #define DNUMOPT 4
  781. #define GEMM_DEFAULT_OFFSET_A 32
  782. #define GEMM_DEFAULT_OFFSET_B 0
  783. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  784. #define SYMV_P 8
  785. #define SWITCH_RATIO 4
  786. #ifdef ARCH_X86
  787. #define SGEMM_DEFAULT_UNROLL_M 4
  788. #define DGEMM_DEFAULT_UNROLL_M 2
  789. #define QGEMM_DEFAULT_UNROLL_M 2
  790. #define CGEMM_DEFAULT_UNROLL_M 2
  791. #define ZGEMM_DEFAULT_UNROLL_M 1
  792. #define XGEMM_DEFAULT_UNROLL_M 1
  793. #define SGEMM_DEFAULT_UNROLL_N 4
  794. #define DGEMM_DEFAULT_UNROLL_N 4
  795. #define QGEMM_DEFAULT_UNROLL_N 2
  796. #define CGEMM_DEFAULT_UNROLL_N 2
  797. #define ZGEMM_DEFAULT_UNROLL_N 2
  798. #define XGEMM_DEFAULT_UNROLL_N 1
  799. #else
  800. #define SGEMM_DEFAULT_UNROLL_M 4
  801. #define DGEMM_DEFAULT_UNROLL_M 2
  802. #define QGEMM_DEFAULT_UNROLL_M 2
  803. #define CGEMM_DEFAULT_UNROLL_M 2
  804. #define ZGEMM_DEFAULT_UNROLL_M 1
  805. #define XGEMM_DEFAULT_UNROLL_M 1
  806. #define SGEMM_DEFAULT_UNROLL_N 8
  807. #define DGEMM_DEFAULT_UNROLL_N 8
  808. #define QGEMM_DEFAULT_UNROLL_N 2
  809. #define CGEMM_DEFAULT_UNROLL_N 4
  810. #define ZGEMM_DEFAULT_UNROLL_N 4
  811. #define XGEMM_DEFAULT_UNROLL_N 1
  812. #endif
  813. #define SGEMM_DEFAULT_P 504
  814. #define SGEMM_DEFAULT_R sgemm_r
  815. #define DGEMM_DEFAULT_P 504
  816. #define DGEMM_DEFAULT_R dgemm_r
  817. #define QGEMM_DEFAULT_P 504
  818. #define QGEMM_DEFAULT_R qgemm_r
  819. #define CGEMM_DEFAULT_P 252
  820. #define CGEMM_DEFAULT_R cgemm_r
  821. #define ZGEMM_DEFAULT_P 252
  822. #define ZGEMM_DEFAULT_R zgemm_r
  823. #define XGEMM_DEFAULT_P 252
  824. #define XGEMM_DEFAULT_R xgemm_r
  825. #define SGEMM_DEFAULT_Q 512
  826. #define DGEMM_DEFAULT_Q 256
  827. #define QGEMM_DEFAULT_Q 128
  828. #define CGEMM_DEFAULT_Q 512
  829. #define ZGEMM_DEFAULT_Q 256
  830. #define XGEMM_DEFAULT_Q 128
  831. #define GETRF_FACTOR 0.72
  832. #endif
  833. #ifdef ATOM
  834. #define SNUMOPT 2
  835. #define DNUMOPT 1
  836. #define GEMM_DEFAULT_OFFSET_A 64
  837. #define GEMM_DEFAULT_OFFSET_B 0
  838. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  839. #define SYMV_P 8
  840. #ifdef ARCH_X86
  841. #define SGEMM_DEFAULT_UNROLL_M 4
  842. #define DGEMM_DEFAULT_UNROLL_M 2
  843. #define QGEMM_DEFAULT_UNROLL_M 2
  844. #define CGEMM_DEFAULT_UNROLL_M 2
  845. #define ZGEMM_DEFAULT_UNROLL_M 1
  846. #define XGEMM_DEFAULT_UNROLL_M 1
  847. #else
  848. #define SGEMM_DEFAULT_UNROLL_M 8
  849. #define DGEMM_DEFAULT_UNROLL_M 4
  850. #define QGEMM_DEFAULT_UNROLL_M 2
  851. #define CGEMM_DEFAULT_UNROLL_M 4
  852. #define ZGEMM_DEFAULT_UNROLL_M 2
  853. #define XGEMM_DEFAULT_UNROLL_M 1
  854. #endif
  855. #define SGEMM_DEFAULT_UNROLL_N 4
  856. #define DGEMM_DEFAULT_UNROLL_N 2
  857. #define QGEMM_DEFAULT_UNROLL_N 2
  858. #define CGEMM_DEFAULT_UNROLL_N 2
  859. #define ZGEMM_DEFAULT_UNROLL_N 1
  860. #define XGEMM_DEFAULT_UNROLL_N 1
  861. #define SGEMM_DEFAULT_P sgemm_p
  862. #define SGEMM_DEFAULT_R sgemm_r
  863. #define DGEMM_DEFAULT_P dgemm_p
  864. #define DGEMM_DEFAULT_R dgemm_r
  865. #define QGEMM_DEFAULT_P qgemm_p
  866. #define QGEMM_DEFAULT_R qgemm_r
  867. #define CGEMM_DEFAULT_P cgemm_p
  868. #define CGEMM_DEFAULT_R cgemm_r
  869. #define ZGEMM_DEFAULT_P zgemm_p
  870. #define ZGEMM_DEFAULT_R zgemm_r
  871. #define XGEMM_DEFAULT_P xgemm_p
  872. #define XGEMM_DEFAULT_R xgemm_r
  873. #define SGEMM_DEFAULT_Q 256
  874. #define DGEMM_DEFAULT_Q 256
  875. #define QGEMM_DEFAULT_Q 256
  876. #define CGEMM_DEFAULT_Q 256
  877. #define ZGEMM_DEFAULT_Q 256
  878. #define XGEMM_DEFAULT_Q 256
  879. #endif
  880. #ifdef ITANIUM2
  881. #define SNUMOPT 4
  882. #define DNUMOPT 4
  883. #define GEMM_DEFAULT_OFFSET_A 0
  884. #define GEMM_DEFAULT_OFFSET_B 128
  885. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  886. #define SGEMM_DEFAULT_UNROLL_M 8
  887. #define SGEMM_DEFAULT_UNROLL_N 8
  888. #define DGEMM_DEFAULT_UNROLL_M 8
  889. #define DGEMM_DEFAULT_UNROLL_N 8
  890. #define QGEMM_DEFAULT_UNROLL_M 8
  891. #define QGEMM_DEFAULT_UNROLL_N 8
  892. #define CGEMM_DEFAULT_UNROLL_M 4
  893. #define CGEMM_DEFAULT_UNROLL_N 4
  894. #define ZGEMM_DEFAULT_UNROLL_M 4
  895. #define ZGEMM_DEFAULT_UNROLL_N 4
  896. #define XGEMM_DEFAULT_UNROLL_M 4
  897. #define XGEMM_DEFAULT_UNROLL_N 4
  898. #define SGEMM_DEFAULT_P sgemm_p
  899. #define DGEMM_DEFAULT_P dgemm_p
  900. #define QGEMM_DEFAULT_P qgemm_p
  901. #define CGEMM_DEFAULT_P cgemm_p
  902. #define ZGEMM_DEFAULT_P zgemm_p
  903. #define XGEMM_DEFAULT_P xgemm_p
  904. #define SGEMM_DEFAULT_Q 1024
  905. #define DGEMM_DEFAULT_Q 1024
  906. #define QGEMM_DEFAULT_Q 1024
  907. #define CGEMM_DEFAULT_Q 1024
  908. #define ZGEMM_DEFAULT_Q 1024
  909. #define XGEMM_DEFAULT_Q 1024
  910. #define SGEMM_DEFAULT_R sgemm_r
  911. #define DGEMM_DEFAULT_R dgemm_r
  912. #define QGEMM_DEFAULT_R qgemm_r
  913. #define CGEMM_DEFAULT_R cgemm_r
  914. #define ZGEMM_DEFAULT_R zgemm_r
  915. #define XGEMM_DEFAULT_R xgemm_r
  916. #define SYMV_P 16
  917. #define GETRF_FACTOR 0.65
  918. #endif
  919. #if defined(EV4) || defined(EV5) || defined(EV6)
  920. #ifdef EV4
  921. #define SNUMOPT 1
  922. #define DNUMOPT 1
  923. #else
  924. #define SNUMOPT 2
  925. #define DNUMOPT 2
  926. #endif
  927. #define GEMM_DEFAULT_OFFSET_A 512
  928. #define GEMM_DEFAULT_OFFSET_B 512
  929. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  930. #define SGEMM_DEFAULT_UNROLL_M 4
  931. #define SGEMM_DEFAULT_UNROLL_N 4
  932. #define DGEMM_DEFAULT_UNROLL_M 4
  933. #define DGEMM_DEFAULT_UNROLL_N 4
  934. #define CGEMM_DEFAULT_UNROLL_M 2
  935. #define CGEMM_DEFAULT_UNROLL_N 2
  936. #define ZGEMM_DEFAULT_UNROLL_M 2
  937. #define ZGEMM_DEFAULT_UNROLL_N 2
  938. #define SYMV_P 8
  939. #ifdef EV4
  940. #define SGEMM_DEFAULT_P 32
  941. #define SGEMM_DEFAULT_Q 112
  942. #define SGEMM_DEFAULT_R 256
  943. #define DGEMM_DEFAULT_P 32
  944. #define DGEMM_DEFAULT_Q 56
  945. #define DGEMM_DEFAULT_R 256
  946. #define CGEMM_DEFAULT_P 32
  947. #define CGEMM_DEFAULT_Q 64
  948. #define CGEMM_DEFAULT_R 240
  949. #define ZGEMM_DEFAULT_P 32
  950. #define ZGEMM_DEFAULT_Q 32
  951. #define ZGEMM_DEFAULT_R 240
  952. #endif
  953. #ifdef EV5
  954. #define SGEMM_DEFAULT_P 64
  955. #define SGEMM_DEFAULT_Q 256
  956. #define DGEMM_DEFAULT_P 64
  957. #define DGEMM_DEFAULT_Q 128
  958. #define CGEMM_DEFAULT_P 64
  959. #define CGEMM_DEFAULT_Q 128
  960. #define ZGEMM_DEFAULT_P 64
  961. #define ZGEMM_DEFAULT_Q 64
  962. #endif
  963. #ifdef EV6
  964. #define SGEMM_DEFAULT_P 256
  965. #define SGEMM_DEFAULT_Q 512
  966. #define DGEMM_DEFAULT_P 256
  967. #define DGEMM_DEFAULT_Q 256
  968. #define CGEMM_DEFAULT_P 256
  969. #define CGEMM_DEFAULT_Q 256
  970. #define ZGEMM_DEFAULT_P 128
  971. #define ZGEMM_DEFAULT_Q 256
  972. #endif
  973. #endif
  974. #ifdef CELL
  975. #define SNUMOPT 2
  976. #define DNUMOPT 2
  977. #define GEMM_DEFAULT_OFFSET_A 0
  978. #define GEMM_DEFAULT_OFFSET_B 8192
  979. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  980. #define SGEMM_DEFAULT_UNROLL_M 16
  981. #define SGEMM_DEFAULT_UNROLL_N 4
  982. #define DGEMM_DEFAULT_UNROLL_M 4
  983. #define DGEMM_DEFAULT_UNROLL_N 4
  984. #define CGEMM_DEFAULT_UNROLL_M 8
  985. #define CGEMM_DEFAULT_UNROLL_N 2
  986. #define ZGEMM_DEFAULT_UNROLL_M 2
  987. #define ZGEMM_DEFAULT_UNROLL_N 2
  988. #define SGEMM_DEFAULT_P 128
  989. #define DGEMM_DEFAULT_P 128
  990. #define CGEMM_DEFAULT_P 128
  991. #define ZGEMM_DEFAULT_P 128
  992. #define SGEMM_DEFAULT_Q 512
  993. #define DGEMM_DEFAULT_Q 256
  994. #define CGEMM_DEFAULT_Q 256
  995. #define ZGEMM_DEFAULT_Q 128
  996. #define SYMV_P 4
  997. #endif
  998. #ifdef PPCG4
  999. #define GEMM_DEFAULT_OFFSET_A 0
  1000. #define GEMM_DEFAULT_OFFSET_B 1024
  1001. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1002. #define SGEMM_DEFAULT_UNROLL_M 16
  1003. #define SGEMM_DEFAULT_UNROLL_N 4
  1004. #define DGEMM_DEFAULT_UNROLL_M 4
  1005. #define DGEMM_DEFAULT_UNROLL_N 4
  1006. #define CGEMM_DEFAULT_UNROLL_M 8
  1007. #define CGEMM_DEFAULT_UNROLL_N 2
  1008. #define ZGEMM_DEFAULT_UNROLL_M 2
  1009. #define ZGEMM_DEFAULT_UNROLL_N 2
  1010. #define SGEMM_DEFAULT_P 256
  1011. #define DGEMM_DEFAULT_P 128
  1012. #define CGEMM_DEFAULT_P 128
  1013. #define ZGEMM_DEFAULT_P 64
  1014. #define SGEMM_DEFAULT_Q 256
  1015. #define DGEMM_DEFAULT_Q 256
  1016. #define CGEMM_DEFAULT_Q 256
  1017. #define ZGEMM_DEFAULT_Q 256
  1018. #define SYMV_P 4
  1019. #endif
  1020. #ifdef PPC970
  1021. #define SNUMOPT 4
  1022. #define DNUMOPT 4
  1023. #define GEMM_DEFAULT_OFFSET_A 2688
  1024. #define GEMM_DEFAULT_OFFSET_B 3072
  1025. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1026. #define SGEMM_DEFAULT_UNROLL_M 16
  1027. #define SGEMM_DEFAULT_UNROLL_N 4
  1028. #define DGEMM_DEFAULT_UNROLL_M 4
  1029. #define DGEMM_DEFAULT_UNROLL_N 4
  1030. #define CGEMM_DEFAULT_UNROLL_M 8
  1031. #define CGEMM_DEFAULT_UNROLL_N 2
  1032. #define ZGEMM_DEFAULT_UNROLL_M 2
  1033. #define ZGEMM_DEFAULT_UNROLL_N 2
  1034. #ifdef OS_LINUX
  1035. #if L2_SIZE == 1024976
  1036. #define SGEMM_DEFAULT_P 320
  1037. #define DGEMM_DEFAULT_P 256
  1038. #define CGEMM_DEFAULT_P 256
  1039. #define ZGEMM_DEFAULT_P 256
  1040. #else
  1041. #define SGEMM_DEFAULT_P 176
  1042. #define DGEMM_DEFAULT_P 176
  1043. #define CGEMM_DEFAULT_P 176
  1044. #define ZGEMM_DEFAULT_P 176
  1045. #endif
  1046. #endif
  1047. #define SGEMM_DEFAULT_Q 512
  1048. #define DGEMM_DEFAULT_Q 256
  1049. #define CGEMM_DEFAULT_Q 256
  1050. #define ZGEMM_DEFAULT_Q 128
  1051. #define SYMV_P 4
  1052. #endif
  1053. #ifdef PPC440
  1054. #define SNUMOPT 2
  1055. #define DNUMOPT 2
  1056. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1057. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1058. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1059. #define SGEMM_DEFAULT_UNROLL_M 4
  1060. #define SGEMM_DEFAULT_UNROLL_N 4
  1061. #define DGEMM_DEFAULT_UNROLL_M 4
  1062. #define DGEMM_DEFAULT_UNROLL_N 4
  1063. #define CGEMM_DEFAULT_UNROLL_M 2
  1064. #define CGEMM_DEFAULT_UNROLL_N 2
  1065. #define ZGEMM_DEFAULT_UNROLL_M 2
  1066. #define ZGEMM_DEFAULT_UNROLL_N 2
  1067. #define SGEMM_DEFAULT_P 512
  1068. #define DGEMM_DEFAULT_P 512
  1069. #define CGEMM_DEFAULT_P 512
  1070. #define ZGEMM_DEFAULT_P 512
  1071. #define SGEMM_DEFAULT_Q 1024
  1072. #define DGEMM_DEFAULT_Q 512
  1073. #define CGEMM_DEFAULT_Q 512
  1074. #define ZGEMM_DEFAULT_Q 256
  1075. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1076. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1077. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1078. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1079. #define SYMV_P 4
  1080. #endif
  1081. #ifdef PPC440FP2
  1082. #define SNUMOPT 4
  1083. #define DNUMOPT 4
  1084. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1085. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1086. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1087. #define SGEMM_DEFAULT_UNROLL_M 8
  1088. #define SGEMM_DEFAULT_UNROLL_N 4
  1089. #define DGEMM_DEFAULT_UNROLL_M 8
  1090. #define DGEMM_DEFAULT_UNROLL_N 4
  1091. #define CGEMM_DEFAULT_UNROLL_M 4
  1092. #define CGEMM_DEFAULT_UNROLL_N 2
  1093. #define ZGEMM_DEFAULT_UNROLL_M 4
  1094. #define ZGEMM_DEFAULT_UNROLL_N 2
  1095. #define SGEMM_DEFAULT_P 128
  1096. #define DGEMM_DEFAULT_P 128
  1097. #define CGEMM_DEFAULT_P 128
  1098. #define ZGEMM_DEFAULT_P 128
  1099. #if 1
  1100. #define SGEMM_DEFAULT_Q 4096
  1101. #define DGEMM_DEFAULT_Q 3072
  1102. #define CGEMM_DEFAULT_Q 2048
  1103. #define ZGEMM_DEFAULT_Q 1024
  1104. #else
  1105. #define SGEMM_DEFAULT_Q 512
  1106. #define DGEMM_DEFAULT_Q 256
  1107. #define CGEMM_DEFAULT_Q 256
  1108. #define ZGEMM_DEFAULT_Q 128
  1109. #endif
  1110. #define SYMV_P 4
  1111. #endif
  1112. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1113. #define GEMM_DEFAULT_OFFSET_A 0
  1114. #define GEMM_DEFAULT_OFFSET_B 2048
  1115. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1116. #define SGEMM_DEFAULT_UNROLL_M 4
  1117. #define SGEMM_DEFAULT_UNROLL_N 4
  1118. #define DGEMM_DEFAULT_UNROLL_M 4
  1119. #define DGEMM_DEFAULT_UNROLL_N 4
  1120. #define CGEMM_DEFAULT_UNROLL_M 2
  1121. #define CGEMM_DEFAULT_UNROLL_N 2
  1122. #define ZGEMM_DEFAULT_UNROLL_M 2
  1123. #define ZGEMM_DEFAULT_UNROLL_N 2
  1124. #ifdef POWER3
  1125. #define SNUMOPT 4
  1126. #define DNUMOPT 4
  1127. #define SGEMM_DEFAULT_P 256
  1128. #define SGEMM_DEFAULT_Q 432
  1129. #define SGEMM_DEFAULT_R 1012
  1130. #define DGEMM_DEFAULT_P 256
  1131. #define DGEMM_DEFAULT_Q 216
  1132. #define DGEMM_DEFAULT_R 1012
  1133. #define ZGEMM_DEFAULT_P 256
  1134. #define ZGEMM_DEFAULT_Q 104
  1135. #define ZGEMM_DEFAULT_R 1012
  1136. #endif
  1137. #if defined(POWER4)
  1138. #ifdef ALLOC_HUGETLB
  1139. #define SGEMM_DEFAULT_P 184
  1140. #define DGEMM_DEFAULT_P 184
  1141. #define CGEMM_DEFAULT_P 184
  1142. #define ZGEMM_DEFAULT_P 184
  1143. #else
  1144. #define SGEMM_DEFAULT_P 144
  1145. #define DGEMM_DEFAULT_P 144
  1146. #define CGEMM_DEFAULT_P 144
  1147. #define ZGEMM_DEFAULT_P 144
  1148. #endif
  1149. #endif
  1150. #if defined(POWER5)
  1151. #ifdef ALLOC_HUGETLB
  1152. #define SGEMM_DEFAULT_P 512
  1153. #define DGEMM_DEFAULT_P 256
  1154. #define CGEMM_DEFAULT_P 256
  1155. #define ZGEMM_DEFAULT_P 128
  1156. #else
  1157. #define SGEMM_DEFAULT_P 320
  1158. #define DGEMM_DEFAULT_P 160
  1159. #define CGEMM_DEFAULT_P 160
  1160. #define ZGEMM_DEFAULT_P 80
  1161. #endif
  1162. #define SGEMM_DEFAULT_Q 256
  1163. #define CGEMM_DEFAULT_Q 256
  1164. #define DGEMM_DEFAULT_Q 256
  1165. #define ZGEMM_DEFAULT_Q 256
  1166. #endif
  1167. #define SYMV_P 8
  1168. #endif
  1169. #if defined(POWER6)
  1170. #define SNUMOPT 4
  1171. #define DNUMOPT 4
  1172. #define GEMM_DEFAULT_OFFSET_A 384
  1173. #define GEMM_DEFAULT_OFFSET_B 1024
  1174. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1175. #define SGEMM_DEFAULT_UNROLL_M 4
  1176. #define SGEMM_DEFAULT_UNROLL_N 4
  1177. #define DGEMM_DEFAULT_UNROLL_M 4
  1178. #define DGEMM_DEFAULT_UNROLL_N 4
  1179. #define CGEMM_DEFAULT_UNROLL_M 2
  1180. #define CGEMM_DEFAULT_UNROLL_N 4
  1181. #define ZGEMM_DEFAULT_UNROLL_M 2
  1182. #define ZGEMM_DEFAULT_UNROLL_N 4
  1183. #define SGEMM_DEFAULT_P 992
  1184. #define DGEMM_DEFAULT_P 480
  1185. #define CGEMM_DEFAULT_P 488
  1186. #define ZGEMM_DEFAULT_P 248
  1187. #define SGEMM_DEFAULT_Q 504
  1188. #define DGEMM_DEFAULT_Q 504
  1189. #define CGEMM_DEFAULT_Q 400
  1190. #define ZGEMM_DEFAULT_Q 400
  1191. #define SYMV_P 8
  1192. #endif
  1193. #if defined(SPARC) && defined(V7)
  1194. #define SNUMOPT 4
  1195. #define DNUMOPT 4
  1196. #define GEMM_DEFAULT_OFFSET_A 0
  1197. #define GEMM_DEFAULT_OFFSET_B 2048
  1198. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1199. #define SGEMM_DEFAULT_UNROLL_M 2
  1200. #define SGEMM_DEFAULT_UNROLL_N 8
  1201. #define DGEMM_DEFAULT_UNROLL_M 2
  1202. #define DGEMM_DEFAULT_UNROLL_N 8
  1203. #define CGEMM_DEFAULT_UNROLL_M 1
  1204. #define CGEMM_DEFAULT_UNROLL_N 4
  1205. #define ZGEMM_DEFAULT_UNROLL_M 1
  1206. #define ZGEMM_DEFAULT_UNROLL_N 4
  1207. #define SGEMM_DEFAULT_P 256
  1208. #define DGEMM_DEFAULT_P 256
  1209. #define CGEMM_DEFAULT_P 256
  1210. #define ZGEMM_DEFAULT_P 256
  1211. #define SGEMM_DEFAULT_Q 512
  1212. #define DGEMM_DEFAULT_Q 256
  1213. #define CGEMM_DEFAULT_Q 256
  1214. #define ZGEMM_DEFAULT_Q 128
  1215. #define SYMV_P 8
  1216. #define GEMM_THREAD gemm_thread_mn
  1217. #endif
  1218. #if defined(SPARC) && defined(V9)
  1219. #define SNUMOPT 2
  1220. #define DNUMOPT 2
  1221. #define GEMM_DEFAULT_OFFSET_A 0
  1222. #define GEMM_DEFAULT_OFFSET_B 2048
  1223. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1224. #define SGEMM_DEFAULT_UNROLL_M 4
  1225. #define SGEMM_DEFAULT_UNROLL_N 4
  1226. #define DGEMM_DEFAULT_UNROLL_M 4
  1227. #define DGEMM_DEFAULT_UNROLL_N 4
  1228. #define CGEMM_DEFAULT_UNROLL_M 2
  1229. #define CGEMM_DEFAULT_UNROLL_N 2
  1230. #define ZGEMM_DEFAULT_UNROLL_M 2
  1231. #define ZGEMM_DEFAULT_UNROLL_N 2
  1232. #define SGEMM_DEFAULT_P 512
  1233. #define DGEMM_DEFAULT_P 512
  1234. #define CGEMM_DEFAULT_P 512
  1235. #define ZGEMM_DEFAULT_P 512
  1236. #define SGEMM_DEFAULT_Q 1024
  1237. #define DGEMM_DEFAULT_Q 512
  1238. #define CGEMM_DEFAULT_Q 512
  1239. #define ZGEMM_DEFAULT_Q 256
  1240. #define SYMV_P 8
  1241. #endif
  1242. #ifdef SICORTEX
  1243. #define SNUMOPT 2
  1244. #define DNUMOPT 2
  1245. #define GEMM_DEFAULT_OFFSET_A 0
  1246. #define GEMM_DEFAULT_OFFSET_B 0
  1247. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1248. #define SGEMM_DEFAULT_UNROLL_M 2
  1249. #define SGEMM_DEFAULT_UNROLL_N 8
  1250. #define DGEMM_DEFAULT_UNROLL_M 2
  1251. #define DGEMM_DEFAULT_UNROLL_N 8
  1252. #define CGEMM_DEFAULT_UNROLL_M 1
  1253. #define CGEMM_DEFAULT_UNROLL_N 4
  1254. #define ZGEMM_DEFAULT_UNROLL_M 1
  1255. #define ZGEMM_DEFAULT_UNROLL_N 4
  1256. #define SGEMM_DEFAULT_P 108
  1257. #define DGEMM_DEFAULT_P 112
  1258. #define CGEMM_DEFAULT_P 108
  1259. #define ZGEMM_DEFAULT_P 112
  1260. #define SGEMM_DEFAULT_Q 288
  1261. #define DGEMM_DEFAULT_Q 144
  1262. #define CGEMM_DEFAULT_Q 144
  1263. #define ZGEMM_DEFAULT_Q 72
  1264. #define SGEMM_DEFAULT_R 2000
  1265. #define DGEMM_DEFAULT_R 2000
  1266. #define CGEMM_DEFAULT_R 2000
  1267. #define ZGEMM_DEFAULT_R 2000
  1268. #define SYMV_P 16
  1269. #endif
  1270. #ifdef LOONGSON3A
  1271. ////Copy from SICORTEX
  1272. #define SNUMOPT 2
  1273. #define DNUMOPT 2
  1274. #define GEMM_DEFAULT_OFFSET_A 0
  1275. #define GEMM_DEFAULT_OFFSET_B 0
  1276. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1277. #define SGEMM_DEFAULT_UNROLL_M 8
  1278. #define SGEMM_DEFAULT_UNROLL_N 4
  1279. #define DGEMM_DEFAULT_UNROLL_M 4
  1280. #define DGEMM_DEFAULT_UNROLL_N 4
  1281. #define CGEMM_DEFAULT_UNROLL_M 4
  1282. #define CGEMM_DEFAULT_UNROLL_N 2
  1283. #define ZGEMM_DEFAULT_UNROLL_M 2
  1284. #define ZGEMM_DEFAULT_UNROLL_N 2
  1285. #define SGEMM_DEFAULT_P 64
  1286. #define DGEMM_DEFAULT_P 44
  1287. #define CGEMM_DEFAULT_P 64
  1288. #define ZGEMM_DEFAULT_P 32
  1289. #define SGEMM_DEFAULT_Q 192
  1290. #define DGEMM_DEFAULT_Q 92
  1291. #define CGEMM_DEFAULT_Q 128
  1292. #define ZGEMM_DEFAULT_Q 80
  1293. #define SGEMM_DEFAULT_R 640
  1294. #define DGEMM_DEFAULT_R dgemm_r
  1295. #define CGEMM_DEFAULT_R 640
  1296. #define ZGEMM_DEFAULT_R 640
  1297. #define GEMM_OFFSET_A1 0x10000
  1298. #define GEMM_OFFSET_B1 0x100000
  1299. #define SYMV_P 16
  1300. #endif
  1301. #ifdef LOONGSON3B
  1302. #define SNUMOPT 2
  1303. #define DNUMOPT 2
  1304. #define GEMM_DEFAULT_OFFSET_A 0
  1305. #define GEMM_DEFAULT_OFFSET_B 0
  1306. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1307. #define SGEMM_DEFAULT_UNROLL_M 2
  1308. #define SGEMM_DEFAULT_UNROLL_N 2
  1309. #define DGEMM_DEFAULT_UNROLL_M 2
  1310. #define DGEMM_DEFAULT_UNROLL_N 2
  1311. #define CGEMM_DEFAULT_UNROLL_M 2
  1312. #define CGEMM_DEFAULT_UNROLL_N 2
  1313. #define ZGEMM_DEFAULT_UNROLL_M 2
  1314. #define ZGEMM_DEFAULT_UNROLL_N 2
  1315. #define SGEMM_DEFAULT_P 64
  1316. #define DGEMM_DEFAULT_P 24
  1317. #define CGEMM_DEFAULT_P 24
  1318. #define ZGEMM_DEFAULT_P 20
  1319. #define SGEMM_DEFAULT_Q 192
  1320. #define DGEMM_DEFAULT_Q 128
  1321. #define CGEMM_DEFAULT_Q 128
  1322. #define ZGEMM_DEFAULT_Q 64
  1323. #define SGEMM_DEFAULT_R 512
  1324. #define DGEMM_DEFAULT_R 512
  1325. #define CGEMM_DEFAULT_R 512
  1326. #define ZGEMM_DEFAULT_R 512
  1327. #define GEMM_OFFSET_A1 0x10000
  1328. #define GEMM_OFFSET_B1 0x100000
  1329. #define SYMV_P 16
  1330. #endif
  1331. #ifdef GENERIC
  1332. #define SNUMOPT 2
  1333. #define DNUMOPT 2
  1334. #define GEMM_DEFAULT_OFFSET_A 0
  1335. #define GEMM_DEFAULT_OFFSET_B 0
  1336. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1337. #define SGEMM_DEFAULT_UNROLL_N 4
  1338. #define DGEMM_DEFAULT_UNROLL_N 4
  1339. #define QGEMM_DEFAULT_UNROLL_N 2
  1340. #define CGEMM_DEFAULT_UNROLL_N 2
  1341. #define ZGEMM_DEFAULT_UNROLL_N 2
  1342. #define XGEMM_DEFAULT_UNROLL_N 1
  1343. #ifdef ARCH_X86
  1344. #define SGEMM_DEFAULT_UNROLL_M 4
  1345. #define DGEMM_DEFAULT_UNROLL_M 2
  1346. #define QGEMM_DEFAULT_UNROLL_M 2
  1347. #define CGEMM_DEFAULT_UNROLL_M 2
  1348. #define ZGEMM_DEFAULT_UNROLL_M 1
  1349. #define XGEMM_DEFAULT_UNROLL_M 1
  1350. #else
  1351. #define SGEMM_DEFAULT_UNROLL_M 8
  1352. #define DGEMM_DEFAULT_UNROLL_M 4
  1353. #define QGEMM_DEFAULT_UNROLL_M 2
  1354. #define CGEMM_DEFAULT_UNROLL_M 4
  1355. #define ZGEMM_DEFAULT_UNROLL_M 2
  1356. #define XGEMM_DEFAULT_UNROLL_M 1
  1357. #endif
  1358. #define SGEMM_P sgemm_p
  1359. #define DGEMM_P dgemm_p
  1360. #define QGEMM_P qgemm_p
  1361. #define CGEMM_P cgemm_p
  1362. #define ZGEMM_P zgemm_p
  1363. #define XGEMM_P xgemm_p
  1364. #define SGEMM_R sgemm_r
  1365. #define DGEMM_R dgemm_r
  1366. #define QGEMM_R qgemm_r
  1367. #define CGEMM_R cgemm_r
  1368. #define ZGEMM_R zgemm_r
  1369. #define XGEMM_R xgemm_r
  1370. #define SGEMM_Q 128
  1371. #define DGEMM_Q 128
  1372. #define QGEMM_Q 128
  1373. #define CGEMM_Q 128
  1374. #define ZGEMM_Q 128
  1375. #define XGEMM_Q 128
  1376. #define SYMV_P 16
  1377. #endif
  1378. #ifndef QGEMM_DEFAULT_UNROLL_M
  1379. #define QGEMM_DEFAULT_UNROLL_M 2
  1380. #endif
  1381. #ifndef QGEMM_DEFAULT_UNROLL_N
  1382. #define QGEMM_DEFAULT_UNROLL_N 2
  1383. #endif
  1384. #ifndef XGEMM_DEFAULT_UNROLL_M
  1385. #define XGEMM_DEFAULT_UNROLL_M 2
  1386. #endif
  1387. #ifndef XGEMM_DEFAULT_UNROLL_N
  1388. #define XGEMM_DEFAULT_UNROLL_N 2
  1389. #endif
  1390. #ifndef HAVE_SSE2
  1391. #define SHUFPD_0 shufps $0x44,
  1392. #define SHUFPD_1 shufps $0x4e,
  1393. #define SHUFPD_2 shufps $0xe4,
  1394. #define SHUFPD_3 shufps $0xee,
  1395. #endif
  1396. #ifndef SHUFPD_0
  1397. #define SHUFPD_0 shufpd $0,
  1398. #endif
  1399. #ifndef SHUFPD_1
  1400. #define SHUFPD_1 shufpd $1,
  1401. #endif
  1402. #ifndef SHUFPD_2
  1403. #define SHUFPD_2 shufpd $2,
  1404. #endif
  1405. #ifndef SHUFPD_3
  1406. #define SHUFPD_3 shufpd $3,
  1407. #endif
  1408. #ifndef SHUFPS_39
  1409. #define SHUFPS_39 shufps $0x39,
  1410. #endif
  1411. #endif