You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 43 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817
  1. /*****************************************************************************
  2. Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the ISCAS nor the names of its contributors may
  14. be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************************/
  27. /*********************************************************************/
  28. /* Copyright 2009, 2010 The University of Texas at Austin. */
  29. /* All rights reserved. */
  30. /* */
  31. /* Redistribution and use in source and binary forms, with or */
  32. /* without modification, are permitted provided that the following */
  33. /* conditions are met: */
  34. /* */
  35. /* 1. Redistributions of source code must retain the above */
  36. /* copyright notice, this list of conditions and the following */
  37. /* disclaimer. */
  38. /* */
  39. /* 2. Redistributions in binary form must reproduce the above */
  40. /* copyright notice, this list of conditions and the following */
  41. /* disclaimer in the documentation and/or other materials */
  42. /* provided with the distribution. */
  43. /* */
  44. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  45. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  46. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  47. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  48. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  49. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  50. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  51. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  52. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  53. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  54. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  55. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  56. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  57. /* POSSIBILITY OF SUCH DAMAGE. */
  58. /* */
  59. /* The views and conclusions contained in the software and */
  60. /* documentation are those of the authors and should not be */
  61. /* interpreted as representing official policies, either expressed */
  62. /* or implied, of The University of Texas at Austin. */
  63. /*********************************************************************/
  64. #ifndef PARAM_H
  65. #define PARAM_H
  66. #ifdef OPTERON
  67. #define SNUMOPT 4
  68. #define DNUMOPT 2
  69. #define GEMM_DEFAULT_OFFSET_A 64
  70. #define GEMM_DEFAULT_OFFSET_B 256
  71. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  72. #define SGEMM_DEFAULT_UNROLL_N 4
  73. #define DGEMM_DEFAULT_UNROLL_N 4
  74. #define QGEMM_DEFAULT_UNROLL_N 2
  75. #define CGEMM_DEFAULT_UNROLL_N 2
  76. #define ZGEMM_DEFAULT_UNROLL_N 2
  77. #define XGEMM_DEFAULT_UNROLL_N 1
  78. #ifdef ARCH_X86
  79. #define SGEMM_DEFAULT_UNROLL_M 4
  80. #define DGEMM_DEFAULT_UNROLL_M 2
  81. #define QGEMM_DEFAULT_UNROLL_M 2
  82. #define CGEMM_DEFAULT_UNROLL_M 2
  83. #define ZGEMM_DEFAULT_UNROLL_M 1
  84. #define XGEMM_DEFAULT_UNROLL_M 1
  85. #else
  86. #define SGEMM_DEFAULT_UNROLL_M 8
  87. #define DGEMM_DEFAULT_UNROLL_M 4
  88. #define QGEMM_DEFAULT_UNROLL_M 2
  89. #define CGEMM_DEFAULT_UNROLL_M 4
  90. #define ZGEMM_DEFAULT_UNROLL_M 2
  91. #define XGEMM_DEFAULT_UNROLL_M 1
  92. #endif
  93. #define SGEMM_DEFAULT_P sgemm_p
  94. #define DGEMM_DEFAULT_P dgemm_p
  95. #define QGEMM_DEFAULT_P qgemm_p
  96. #define CGEMM_DEFAULT_P cgemm_p
  97. #define ZGEMM_DEFAULT_P zgemm_p
  98. #define XGEMM_DEFAULT_P xgemm_p
  99. #define SGEMM_DEFAULT_R sgemm_r
  100. #define DGEMM_DEFAULT_R dgemm_r
  101. #define QGEMM_DEFAULT_R qgemm_r
  102. #define CGEMM_DEFAULT_R cgemm_r
  103. #define ZGEMM_DEFAULT_R zgemm_r
  104. #define XGEMM_DEFAULT_R xgemm_r
  105. #ifdef ALLOC_HUGETLB
  106. #define SGEMM_DEFAULT_Q 248
  107. #define DGEMM_DEFAULT_Q 248
  108. #define QGEMM_DEFAULT_Q 248
  109. #define CGEMM_DEFAULT_Q 248
  110. #define ZGEMM_DEFAULT_Q 248
  111. #define XGEMM_DEFAULT_Q 248
  112. #else
  113. #define SGEMM_DEFAULT_Q 240
  114. #define DGEMM_DEFAULT_Q 240
  115. #define QGEMM_DEFAULT_Q 240
  116. #define CGEMM_DEFAULT_Q 240
  117. #define ZGEMM_DEFAULT_Q 240
  118. #define XGEMM_DEFAULT_Q 240
  119. #endif
  120. #define SYMV_P 16
  121. #define HAVE_EXCLUSIVE_CACHE
  122. #endif
  123. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  124. #define SNUMOPT 8
  125. #define DNUMOPT 4
  126. #define GEMM_DEFAULT_OFFSET_A 64
  127. #define GEMM_DEFAULT_OFFSET_B 832
  128. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  129. #define SGEMM_DEFAULT_UNROLL_N 4
  130. #define DGEMM_DEFAULT_UNROLL_N 4
  131. #define QGEMM_DEFAULT_UNROLL_N 2
  132. #define CGEMM_DEFAULT_UNROLL_N 2
  133. #define ZGEMM_DEFAULT_UNROLL_N 2
  134. #define XGEMM_DEFAULT_UNROLL_N 1
  135. #ifdef ARCH_X86
  136. #define SGEMM_DEFAULT_UNROLL_M 4
  137. #define DGEMM_DEFAULT_UNROLL_M 2
  138. #define QGEMM_DEFAULT_UNROLL_M 2
  139. #define CGEMM_DEFAULT_UNROLL_M 2
  140. #define ZGEMM_DEFAULT_UNROLL_M 1
  141. #define XGEMM_DEFAULT_UNROLL_M 1
  142. #else
  143. #define SGEMM_DEFAULT_UNROLL_M 8
  144. #define DGEMM_DEFAULT_UNROLL_M 4
  145. #define QGEMM_DEFAULT_UNROLL_M 2
  146. #define CGEMM_DEFAULT_UNROLL_M 4
  147. #define ZGEMM_DEFAULT_UNROLL_M 2
  148. #define XGEMM_DEFAULT_UNROLL_M 1
  149. #endif
  150. #if 0
  151. #define SGEMM_DEFAULT_P 496
  152. #define DGEMM_DEFAULT_P 248
  153. #define QGEMM_DEFAULT_P 124
  154. #define CGEMM_DEFAULT_P 248
  155. #define ZGEMM_DEFAULT_P 124
  156. #define XGEMM_DEFAULT_P 62
  157. #define SGEMM_DEFAULT_Q 248
  158. #define DGEMM_DEFAULT_Q 248
  159. #define QGEMM_DEFAULT_Q 248
  160. #define CGEMM_DEFAULT_Q 248
  161. #define ZGEMM_DEFAULT_Q 248
  162. #define XGEMM_DEFAULT_Q 248
  163. #else
  164. #define SGEMM_DEFAULT_P 448
  165. #define DGEMM_DEFAULT_P 224
  166. #define QGEMM_DEFAULT_P 112
  167. #define CGEMM_DEFAULT_P 224
  168. #define ZGEMM_DEFAULT_P 112
  169. #define XGEMM_DEFAULT_P 56
  170. #define SGEMM_DEFAULT_Q 224
  171. #define DGEMM_DEFAULT_Q 224
  172. #define QGEMM_DEFAULT_Q 224
  173. #define CGEMM_DEFAULT_Q 224
  174. #define ZGEMM_DEFAULT_Q 224
  175. #define XGEMM_DEFAULT_Q 224
  176. #endif
  177. #define SGEMM_DEFAULT_R sgemm_r
  178. #define QGEMM_DEFAULT_R qgemm_r
  179. #define DGEMM_DEFAULT_R dgemm_r
  180. #define CGEMM_DEFAULT_R cgemm_r
  181. #define ZGEMM_DEFAULT_R zgemm_r
  182. #define XGEMM_DEFAULT_R xgemm_r
  183. #define SYMV_P 16
  184. #define HAVE_EXCLUSIVE_CACHE
  185. #define GEMM_THREAD gemm_thread_mn
  186. #endif
  187. #ifdef BULLDOZER
  188. #define SNUMOPT 8
  189. #define DNUMOPT 4
  190. #define GEMM_DEFAULT_OFFSET_A 64
  191. #define GEMM_DEFAULT_OFFSET_B 832
  192. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  193. #define QGEMM_DEFAULT_UNROLL_N 2
  194. #define CGEMM_DEFAULT_UNROLL_N 2
  195. #define ZGEMM_DEFAULT_UNROLL_N 2
  196. #define XGEMM_DEFAULT_UNROLL_N 1
  197. #ifdef ARCH_X86
  198. #define SGEMM_DEFAULT_UNROLL_N 4
  199. #define DGEMM_DEFAULT_UNROLL_N 4
  200. #define SGEMM_DEFAULT_UNROLL_M 4
  201. #define DGEMM_DEFAULT_UNROLL_M 2
  202. #define QGEMM_DEFAULT_UNROLL_M 2
  203. #define CGEMM_DEFAULT_UNROLL_M 2
  204. #define ZGEMM_DEFAULT_UNROLL_M 1
  205. #define XGEMM_DEFAULT_UNROLL_M 1
  206. #else
  207. #define SGEMM_DEFAULT_UNROLL_N 2
  208. #define DGEMM_DEFAULT_UNROLL_N 2
  209. #define SGEMM_DEFAULT_UNROLL_M 16
  210. #define DGEMM_DEFAULT_UNROLL_M 8
  211. #define QGEMM_DEFAULT_UNROLL_M 2
  212. #define CGEMM_DEFAULT_UNROLL_M 4
  213. #define ZGEMM_DEFAULT_UNROLL_M 2
  214. #define XGEMM_DEFAULT_UNROLL_M 1
  215. #define CGEMM3M_DEFAULT_UNROLL_N 4
  216. #define CGEMM3M_DEFAULT_UNROLL_M 8
  217. #define ZGEMM3M_DEFAULT_UNROLL_N 4
  218. #define ZGEMM3M_DEFAULT_UNROLL_M 4
  219. #define GEMV_UNROLL 8
  220. #endif
  221. #if defined(ARCH_X86_64)
  222. #define SGEMM_DEFAULT_P 768
  223. #define DGEMM_DEFAULT_P 384
  224. #else
  225. #define SGEMM_DEFAULT_P 448
  226. #define DGEMM_DEFAULT_P 224
  227. #endif
  228. #define QGEMM_DEFAULT_P 112
  229. #define CGEMM_DEFAULT_P 224
  230. #define ZGEMM_DEFAULT_P 112
  231. #define XGEMM_DEFAULT_P 56
  232. #if defined(ARCH_X86_64)
  233. #define SGEMM_DEFAULT_Q 168
  234. #define DGEMM_DEFAULT_Q 168
  235. #else
  236. #define SGEMM_DEFAULT_Q 224
  237. #define DGEMM_DEFAULT_Q 224
  238. #endif
  239. #define QGEMM_DEFAULT_Q 224
  240. #define CGEMM_DEFAULT_Q 224
  241. #define ZGEMM_DEFAULT_Q 224
  242. #define XGEMM_DEFAULT_Q 224
  243. #define SGEMM_DEFAULT_R sgemm_r
  244. #define QGEMM_DEFAULT_R qgemm_r
  245. #define DGEMM_DEFAULT_R dgemm_r
  246. #define CGEMM_DEFAULT_R cgemm_r
  247. #define ZGEMM_DEFAULT_R zgemm_r
  248. #define XGEMM_DEFAULT_R xgemm_r
  249. #define SYMV_P 16
  250. #define HAVE_EXCLUSIVE_CACHE
  251. #define GEMM_THREAD gemm_thread_mn
  252. #endif
  253. #ifdef ATHLON
  254. #define SNUMOPT 4
  255. #define DNUMOPT 2
  256. #define GEMM_DEFAULT_OFFSET_A 0
  257. #define GEMM_DEFAULT_OFFSET_B 384
  258. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  259. #define SGEMM_DEFAULT_UNROLL_N 4
  260. #define DGEMM_DEFAULT_UNROLL_N 4
  261. #define QGEMM_DEFAULT_UNROLL_N 2
  262. #define CGEMM_DEFAULT_UNROLL_N 2
  263. #define ZGEMM_DEFAULT_UNROLL_N 2
  264. #define XGEMM_DEFAULT_UNROLL_N 1
  265. #define SGEMM_DEFAULT_UNROLL_M 2
  266. #define DGEMM_DEFAULT_UNROLL_M 1
  267. #define QGEMM_DEFAULT_UNROLL_M 2
  268. #define CGEMM_DEFAULT_UNROLL_M 1
  269. #define ZGEMM_DEFAULT_UNROLL_M 1
  270. #define XGEMM_DEFAULT_UNROLL_M 1
  271. #define SGEMM_DEFAULT_R sgemm_r
  272. #define DGEMM_DEFAULT_R dgemm_r
  273. #define QGEMM_DEFAULT_R qgemm_r
  274. #define CGEMM_DEFAULT_R cgemm_r
  275. #define ZGEMM_DEFAULT_R zgemm_r
  276. #define XGEMM_DEFAULT_R xgemm_r
  277. #define SGEMM_DEFAULT_P 208
  278. #define DGEMM_DEFAULT_P 104
  279. #define QGEMM_DEFAULT_P 56
  280. #define CGEMM_DEFAULT_P 104
  281. #define ZGEMM_DEFAULT_P 56
  282. #define XGEMM_DEFAULT_P 28
  283. #define SGEMM_DEFAULT_Q 208
  284. #define DGEMM_DEFAULT_Q 208
  285. #define QGEMM_DEFAULT_Q 208
  286. #define CGEMM_DEFAULT_Q 208
  287. #define ZGEMM_DEFAULT_Q 208
  288. #define XGEMM_DEFAULT_Q 208
  289. #define SYMV_P 16
  290. #define HAVE_EXCLUSIVE_CACHE
  291. #endif
  292. #ifdef VIAC3
  293. #define SNUMOPT 2
  294. #define DNUMOPT 1
  295. #define GEMM_DEFAULT_OFFSET_A 0
  296. #define GEMM_DEFAULT_OFFSET_B 256
  297. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  298. #define SGEMM_DEFAULT_UNROLL_N 4
  299. #define DGEMM_DEFAULT_UNROLL_N 4
  300. #define QGEMM_DEFAULT_UNROLL_N 2
  301. #define CGEMM_DEFAULT_UNROLL_N 2
  302. #define ZGEMM_DEFAULT_UNROLL_N 2
  303. #define XGEMM_DEFAULT_UNROLL_N 1
  304. #define SGEMM_DEFAULT_UNROLL_M 2
  305. #define DGEMM_DEFAULT_UNROLL_M 1
  306. #define QGEMM_DEFAULT_UNROLL_M 2
  307. #define CGEMM_DEFAULT_UNROLL_M 1
  308. #define ZGEMM_DEFAULT_UNROLL_M 1
  309. #define XGEMM_DEFAULT_UNROLL_M 1
  310. #define SGEMM_DEFAULT_R sgemm_r
  311. #define DGEMM_DEFAULT_R dgemm_r
  312. #define QGEMM_DEFAULT_R qgemm_r
  313. #define CGEMM_DEFAULT_R cgemm_r
  314. #define ZGEMM_DEFAULT_R zgemm_r
  315. #define XGEMM_DEFAULT_R xgemm_r
  316. #define SGEMM_DEFAULT_P 128
  317. #define DGEMM_DEFAULT_P 128
  318. #define QGEMM_DEFAULT_P 128
  319. #define CGEMM_DEFAULT_P 128
  320. #define ZGEMM_DEFAULT_P 128
  321. #define XGEMM_DEFAULT_P 128
  322. #define SGEMM_DEFAULT_Q 512
  323. #define DGEMM_DEFAULT_Q 256
  324. #define QGEMM_DEFAULT_Q 256
  325. #define CGEMM_DEFAULT_Q 256
  326. #define ZGEMM_DEFAULT_Q 128
  327. #define XGEMM_DEFAULT_Q 128
  328. #define SYMV_P 16
  329. #endif
  330. #ifdef NANO
  331. #define SNUMOPT 4
  332. #define DNUMOPT 2
  333. #define GEMM_DEFAULT_OFFSET_A 64
  334. #define GEMM_DEFAULT_OFFSET_B 256
  335. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  336. #ifdef ARCH_X86
  337. #define SGEMM_DEFAULT_UNROLL_N 4
  338. #define DGEMM_DEFAULT_UNROLL_N 4
  339. #define QGEMM_DEFAULT_UNROLL_N 2
  340. #define CGEMM_DEFAULT_UNROLL_N 2
  341. #define ZGEMM_DEFAULT_UNROLL_N 2
  342. #define XGEMM_DEFAULT_UNROLL_N 1
  343. #define SGEMM_DEFAULT_UNROLL_M 4
  344. #define DGEMM_DEFAULT_UNROLL_M 2
  345. #define QGEMM_DEFAULT_UNROLL_M 2
  346. #define CGEMM_DEFAULT_UNROLL_M 2
  347. #define ZGEMM_DEFAULT_UNROLL_M 1
  348. #define XGEMM_DEFAULT_UNROLL_M 1
  349. #else
  350. #define SGEMM_DEFAULT_UNROLL_N 8
  351. #define DGEMM_DEFAULT_UNROLL_N 4
  352. #define QGEMM_DEFAULT_UNROLL_N 2
  353. #define CGEMM_DEFAULT_UNROLL_N 4
  354. #define ZGEMM_DEFAULT_UNROLL_N 2
  355. #define XGEMM_DEFAULT_UNROLL_N 1
  356. #define SGEMM_DEFAULT_UNROLL_M 4
  357. #define DGEMM_DEFAULT_UNROLL_M 4
  358. #define QGEMM_DEFAULT_UNROLL_M 2
  359. #define CGEMM_DEFAULT_UNROLL_M 2
  360. #define ZGEMM_DEFAULT_UNROLL_M 2
  361. #define XGEMM_DEFAULT_UNROLL_M 1
  362. #endif
  363. #define SGEMM_DEFAULT_P 288
  364. #define DGEMM_DEFAULT_P 288
  365. #define QGEMM_DEFAULT_P 288
  366. #define CGEMM_DEFAULT_P 288
  367. #define ZGEMM_DEFAULT_P 288
  368. #define XGEMM_DEFAULT_P 288
  369. #define SGEMM_DEFAULT_R sgemm_r
  370. #define DGEMM_DEFAULT_R dgemm_r
  371. #define QGEMM_DEFAULT_R qgemm_r
  372. #define CGEMM_DEFAULT_R cgemm_r
  373. #define ZGEMM_DEFAULT_R zgemm_r
  374. #define XGEMM_DEFAULT_R xgemm_r
  375. #define SGEMM_DEFAULT_Q 256
  376. #define DGEMM_DEFAULT_Q 128
  377. #define QGEMM_DEFAULT_Q 64
  378. #define CGEMM_DEFAULT_Q 128
  379. #define ZGEMM_DEFAULT_Q 64
  380. #define XGEMM_DEFAULT_Q 32
  381. #define SYMV_P 16
  382. #define HAVE_EXCLUSIVE_CACHE
  383. #endif
  384. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  385. #ifdef HAVE_SSE
  386. #define SNUMOPT 2
  387. #else
  388. #define SNUMOPT 1
  389. #endif
  390. #define DNUMOPT 1
  391. #define GEMM_DEFAULT_OFFSET_A 0
  392. #define GEMM_DEFAULT_OFFSET_B 0
  393. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  394. #ifdef HAVE_SSE
  395. #define SGEMM_DEFAULT_UNROLL_M 8
  396. #define CGEMM_DEFAULT_UNROLL_M 4
  397. #else
  398. #define SGEMM_DEFAULT_UNROLL_M 4
  399. #define CGEMM_DEFAULT_UNROLL_M 2
  400. #endif
  401. #define DGEMM_DEFAULT_UNROLL_M 2
  402. #define SGEMM_DEFAULT_UNROLL_N 2
  403. #define DGEMM_DEFAULT_UNROLL_N 2
  404. #define QGEMM_DEFAULT_UNROLL_M 2
  405. #define QGEMM_DEFAULT_UNROLL_N 2
  406. #define CGEMM_DEFAULT_UNROLL_N 1
  407. #define ZGEMM_DEFAULT_UNROLL_M 1
  408. #define ZGEMM_DEFAULT_UNROLL_N 1
  409. #define XGEMM_DEFAULT_UNROLL_M 1
  410. #define XGEMM_DEFAULT_UNROLL_N 1
  411. #define SGEMM_DEFAULT_P sgemm_p
  412. #define SGEMM_DEFAULT_Q 256
  413. #define SGEMM_DEFAULT_R sgemm_r
  414. #define DGEMM_DEFAULT_P dgemm_p
  415. #define DGEMM_DEFAULT_Q 256
  416. #define DGEMM_DEFAULT_R dgemm_r
  417. #define QGEMM_DEFAULT_P qgemm_p
  418. #define QGEMM_DEFAULT_Q 256
  419. #define QGEMM_DEFAULT_R qgemm_r
  420. #define CGEMM_DEFAULT_P cgemm_p
  421. #define CGEMM_DEFAULT_Q 256
  422. #define CGEMM_DEFAULT_R cgemm_r
  423. #define ZGEMM_DEFAULT_P zgemm_p
  424. #define ZGEMM_DEFAULT_Q 256
  425. #define ZGEMM_DEFAULT_R zgemm_r
  426. #define XGEMM_DEFAULT_P xgemm_p
  427. #define XGEMM_DEFAULT_Q 256
  428. #define XGEMM_DEFAULT_R xgemm_r
  429. #define SYMV_P 4
  430. #endif
  431. #ifdef PENTIUMM
  432. #define SNUMOPT 2
  433. #define DNUMOPT 1
  434. #define GEMM_DEFAULT_OFFSET_A 0
  435. #define GEMM_DEFAULT_OFFSET_B 0
  436. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  437. #ifdef CORE_YONAH
  438. #define SGEMM_DEFAULT_UNROLL_M 4
  439. #define SGEMM_DEFAULT_UNROLL_N 4
  440. #define DGEMM_DEFAULT_UNROLL_M 2
  441. #define DGEMM_DEFAULT_UNROLL_N 4
  442. #define QGEMM_DEFAULT_UNROLL_M 2
  443. #define QGEMM_DEFAULT_UNROLL_N 2
  444. #define CGEMM_DEFAULT_UNROLL_M 2
  445. #define CGEMM_DEFAULT_UNROLL_N 2
  446. #define ZGEMM_DEFAULT_UNROLL_M 1
  447. #define ZGEMM_DEFAULT_UNROLL_N 2
  448. #define XGEMM_DEFAULT_UNROLL_M 1
  449. #define XGEMM_DEFAULT_UNROLL_N 1
  450. #else
  451. #define SGEMM_DEFAULT_UNROLL_M 8
  452. #define SGEMM_DEFAULT_UNROLL_N 2
  453. #define DGEMM_DEFAULT_UNROLL_M 2
  454. #define DGEMM_DEFAULT_UNROLL_N 2
  455. #define QGEMM_DEFAULT_UNROLL_M 2
  456. #define QGEMM_DEFAULT_UNROLL_N 2
  457. #define CGEMM_DEFAULT_UNROLL_M 4
  458. #define CGEMM_DEFAULT_UNROLL_N 1
  459. #define ZGEMM_DEFAULT_UNROLL_M 1
  460. #define ZGEMM_DEFAULT_UNROLL_N 1
  461. #define XGEMM_DEFAULT_UNROLL_M 1
  462. #define XGEMM_DEFAULT_UNROLL_N 1
  463. #endif
  464. #define SGEMM_DEFAULT_P sgemm_p
  465. #define SGEMM_DEFAULT_Q 256
  466. #define SGEMM_DEFAULT_R sgemm_r
  467. #define DGEMM_DEFAULT_P dgemm_p
  468. #define DGEMM_DEFAULT_Q 256
  469. #define DGEMM_DEFAULT_R dgemm_r
  470. #define QGEMM_DEFAULT_P qgemm_p
  471. #define QGEMM_DEFAULT_Q 256
  472. #define QGEMM_DEFAULT_R qgemm_r
  473. #define CGEMM_DEFAULT_P cgemm_p
  474. #define CGEMM_DEFAULT_Q 256
  475. #define CGEMM_DEFAULT_R cgemm_r
  476. #define ZGEMM_DEFAULT_P zgemm_p
  477. #define ZGEMM_DEFAULT_Q 256
  478. #define ZGEMM_DEFAULT_R zgemm_r
  479. #define XGEMM_DEFAULT_P xgemm_p
  480. #define XGEMM_DEFAULT_Q 256
  481. #define XGEMM_DEFAULT_R xgemm_r
  482. #define SYMV_P 4
  483. #endif
  484. #ifdef CORE_NORTHWOOD
  485. #define SNUMOPT 4
  486. #define DNUMOPT 2
  487. #define GEMM_DEFAULT_OFFSET_A 0
  488. #define GEMM_DEFAULT_OFFSET_B 32
  489. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  490. #define SYMV_P 8
  491. #define SGEMM_DEFAULT_UNROLL_M 8
  492. #define DGEMM_DEFAULT_UNROLL_M 4
  493. #define QGEMM_DEFAULT_UNROLL_M 2
  494. #define CGEMM_DEFAULT_UNROLL_M 4
  495. #define ZGEMM_DEFAULT_UNROLL_M 2
  496. #define XGEMM_DEFAULT_UNROLL_M 1
  497. #define SGEMM_DEFAULT_UNROLL_N 2
  498. #define DGEMM_DEFAULT_UNROLL_N 2
  499. #define QGEMM_DEFAULT_UNROLL_N 2
  500. #define CGEMM_DEFAULT_UNROLL_N 1
  501. #define ZGEMM_DEFAULT_UNROLL_N 1
  502. #define XGEMM_DEFAULT_UNROLL_N 1
  503. #define SGEMM_DEFAULT_P sgemm_p
  504. #define SGEMM_DEFAULT_R sgemm_r
  505. #define DGEMM_DEFAULT_P dgemm_p
  506. #define DGEMM_DEFAULT_R dgemm_r
  507. #define QGEMM_DEFAULT_P qgemm_p
  508. #define QGEMM_DEFAULT_R qgemm_r
  509. #define CGEMM_DEFAULT_P cgemm_p
  510. #define CGEMM_DEFAULT_R cgemm_r
  511. #define ZGEMM_DEFAULT_P zgemm_p
  512. #define ZGEMM_DEFAULT_R zgemm_r
  513. #define XGEMM_DEFAULT_P xgemm_p
  514. #define XGEMM_DEFAULT_R xgemm_r
  515. #define SGEMM_DEFAULT_Q 128
  516. #define DGEMM_DEFAULT_Q 128
  517. #define QGEMM_DEFAULT_Q 128
  518. #define CGEMM_DEFAULT_Q 128
  519. #define ZGEMM_DEFAULT_Q 128
  520. #define XGEMM_DEFAULT_Q 128
  521. #endif
  522. #ifdef CORE_PRESCOTT
  523. #define SNUMOPT 4
  524. #define DNUMOPT 2
  525. #ifndef __64BIT__
  526. #define GEMM_DEFAULT_OFFSET_A 128
  527. #define GEMM_DEFAULT_OFFSET_B 192
  528. #else
  529. #define GEMM_DEFAULT_OFFSET_A 0
  530. #define GEMM_DEFAULT_OFFSET_B 256
  531. #endif
  532. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  533. #define SYMV_P 8
  534. #ifdef ARCH_X86
  535. #define SGEMM_DEFAULT_UNROLL_M 4
  536. #define DGEMM_DEFAULT_UNROLL_M 2
  537. #define QGEMM_DEFAULT_UNROLL_M 2
  538. #define CGEMM_DEFAULT_UNROLL_M 2
  539. #define ZGEMM_DEFAULT_UNROLL_M 1
  540. #define XGEMM_DEFAULT_UNROLL_M 1
  541. #else
  542. #define SGEMM_DEFAULT_UNROLL_M 8
  543. #define DGEMM_DEFAULT_UNROLL_M 4
  544. #define QGEMM_DEFAULT_UNROLL_M 2
  545. #define CGEMM_DEFAULT_UNROLL_M 4
  546. #define ZGEMM_DEFAULT_UNROLL_M 2
  547. #define XGEMM_DEFAULT_UNROLL_M 1
  548. #endif
  549. #define SGEMM_DEFAULT_UNROLL_N 4
  550. #define DGEMM_DEFAULT_UNROLL_N 4
  551. #define QGEMM_DEFAULT_UNROLL_N 2
  552. #define CGEMM_DEFAULT_UNROLL_N 2
  553. #define ZGEMM_DEFAULT_UNROLL_N 2
  554. #define XGEMM_DEFAULT_UNROLL_N 1
  555. #define SGEMM_DEFAULT_P sgemm_p
  556. #define SGEMM_DEFAULT_R sgemm_r
  557. #define DGEMM_DEFAULT_P dgemm_p
  558. #define DGEMM_DEFAULT_R dgemm_r
  559. #define QGEMM_DEFAULT_P qgemm_p
  560. #define QGEMM_DEFAULT_R qgemm_r
  561. #define CGEMM_DEFAULT_P cgemm_p
  562. #define CGEMM_DEFAULT_R cgemm_r
  563. #define ZGEMM_DEFAULT_P zgemm_p
  564. #define ZGEMM_DEFAULT_R zgemm_r
  565. #define XGEMM_DEFAULT_P xgemm_p
  566. #define XGEMM_DEFAULT_R xgemm_r
  567. #define SGEMM_DEFAULT_Q 128
  568. #define DGEMM_DEFAULT_Q 128
  569. #define QGEMM_DEFAULT_Q 128
  570. #define CGEMM_DEFAULT_Q 128
  571. #define ZGEMM_DEFAULT_Q 128
  572. #define XGEMM_DEFAULT_Q 128
  573. #endif
  574. #ifdef CORE2
  575. #define SNUMOPT 8
  576. #define DNUMOPT 4
  577. #define GEMM_DEFAULT_OFFSET_A 448
  578. #define GEMM_DEFAULT_OFFSET_B 128
  579. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  580. #define SYMV_P 8
  581. #define SWITCH_RATIO 4
  582. #ifdef ARCH_X86
  583. #define SGEMM_DEFAULT_UNROLL_M 8
  584. #define DGEMM_DEFAULT_UNROLL_M 4
  585. #define QGEMM_DEFAULT_UNROLL_M 2
  586. #define CGEMM_DEFAULT_UNROLL_M 4
  587. #define ZGEMM_DEFAULT_UNROLL_M 2
  588. #define XGEMM_DEFAULT_UNROLL_M 1
  589. #define SGEMM_DEFAULT_UNROLL_N 2
  590. #define DGEMM_DEFAULT_UNROLL_N 2
  591. #define QGEMM_DEFAULT_UNROLL_N 2
  592. #define CGEMM_DEFAULT_UNROLL_N 1
  593. #define ZGEMM_DEFAULT_UNROLL_N 1
  594. #define XGEMM_DEFAULT_UNROLL_N 1
  595. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  596. #else
  597. #define SGEMM_DEFAULT_UNROLL_M 8
  598. #define DGEMM_DEFAULT_UNROLL_M 4
  599. #define QGEMM_DEFAULT_UNROLL_M 2
  600. #define CGEMM_DEFAULT_UNROLL_M 4
  601. #define ZGEMM_DEFAULT_UNROLL_M 2
  602. #define XGEMM_DEFAULT_UNROLL_M 1
  603. #define SGEMM_DEFAULT_UNROLL_N 4
  604. #define DGEMM_DEFAULT_UNROLL_N 4
  605. #define QGEMM_DEFAULT_UNROLL_N 2
  606. #define CGEMM_DEFAULT_UNROLL_N 2
  607. #define ZGEMM_DEFAULT_UNROLL_N 2
  608. #define XGEMM_DEFAULT_UNROLL_N 1
  609. #endif
  610. #define SGEMM_DEFAULT_P sgemm_p
  611. #define SGEMM_DEFAULT_R sgemm_r
  612. #define DGEMM_DEFAULT_P dgemm_p
  613. #define DGEMM_DEFAULT_R dgemm_r
  614. #define QGEMM_DEFAULT_P qgemm_p
  615. #define QGEMM_DEFAULT_R qgemm_r
  616. #define CGEMM_DEFAULT_P cgemm_p
  617. #define CGEMM_DEFAULT_R cgemm_r
  618. #define ZGEMM_DEFAULT_P zgemm_p
  619. #define ZGEMM_DEFAULT_R zgemm_r
  620. #define XGEMM_DEFAULT_P xgemm_p
  621. #define XGEMM_DEFAULT_R xgemm_r
  622. #define SGEMM_DEFAULT_Q 256
  623. #define DGEMM_DEFAULT_Q 256
  624. #define QGEMM_DEFAULT_Q 256
  625. #define CGEMM_DEFAULT_Q 256
  626. #define ZGEMM_DEFAULT_Q 256
  627. #define XGEMM_DEFAULT_Q 256
  628. #endif
  629. #ifdef PENRYN
  630. #define SNUMOPT 8
  631. #define DNUMOPT 4
  632. #define GEMM_DEFAULT_OFFSET_A 128
  633. #define GEMM_DEFAULT_OFFSET_B 0
  634. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  635. #define SYMV_P 8
  636. #define SWITCH_RATIO 4
  637. #ifdef ARCH_X86
  638. #define SGEMM_DEFAULT_UNROLL_M 4
  639. #define DGEMM_DEFAULT_UNROLL_M 2
  640. #define QGEMM_DEFAULT_UNROLL_M 2
  641. #define CGEMM_DEFAULT_UNROLL_M 2
  642. #define ZGEMM_DEFAULT_UNROLL_M 1
  643. #define XGEMM_DEFAULT_UNROLL_M 1
  644. #define SGEMM_DEFAULT_UNROLL_N 4
  645. #define DGEMM_DEFAULT_UNROLL_N 4
  646. #define QGEMM_DEFAULT_UNROLL_N 2
  647. #define CGEMM_DEFAULT_UNROLL_N 2
  648. #define ZGEMM_DEFAULT_UNROLL_N 2
  649. #define XGEMM_DEFAULT_UNROLL_N 1
  650. #else
  651. #define SGEMM_DEFAULT_UNROLL_M 8
  652. #define DGEMM_DEFAULT_UNROLL_M 4
  653. #define QGEMM_DEFAULT_UNROLL_M 2
  654. #define CGEMM_DEFAULT_UNROLL_M 4
  655. #define ZGEMM_DEFAULT_UNROLL_M 2
  656. #define XGEMM_DEFAULT_UNROLL_M 1
  657. #define SGEMM_DEFAULT_UNROLL_N 4
  658. #define DGEMM_DEFAULT_UNROLL_N 4
  659. #define QGEMM_DEFAULT_UNROLL_N 2
  660. #define CGEMM_DEFAULT_UNROLL_N 2
  661. #define ZGEMM_DEFAULT_UNROLL_N 2
  662. #define XGEMM_DEFAULT_UNROLL_N 1
  663. #endif
  664. #define SGEMM_DEFAULT_P sgemm_p
  665. #define SGEMM_DEFAULT_R sgemm_r
  666. #define DGEMM_DEFAULT_P dgemm_p
  667. #define DGEMM_DEFAULT_R dgemm_r
  668. #define QGEMM_DEFAULT_P qgemm_p
  669. #define QGEMM_DEFAULT_R qgemm_r
  670. #define CGEMM_DEFAULT_P cgemm_p
  671. #define CGEMM_DEFAULT_R cgemm_r
  672. #define ZGEMM_DEFAULT_P zgemm_p
  673. #define ZGEMM_DEFAULT_R zgemm_r
  674. #define XGEMM_DEFAULT_P xgemm_p
  675. #define XGEMM_DEFAULT_R xgemm_r
  676. #define SGEMM_DEFAULT_Q 512
  677. #define DGEMM_DEFAULT_Q 256
  678. #define QGEMM_DEFAULT_Q 128
  679. #define CGEMM_DEFAULT_Q 512
  680. #define ZGEMM_DEFAULT_Q 256
  681. #define XGEMM_DEFAULT_Q 128
  682. #define GETRF_FACTOR 0.75
  683. #endif
  684. #ifdef DUNNINGTON
  685. #define SNUMOPT 8
  686. #define DNUMOPT 4
  687. #define GEMM_DEFAULT_OFFSET_A 128
  688. #define GEMM_DEFAULT_OFFSET_B 0
  689. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  690. #define SYMV_P 8
  691. #define SWITCH_RATIO 4
  692. #ifdef ARCH_X86
  693. #define SGEMM_DEFAULT_UNROLL_M 4
  694. #define DGEMM_DEFAULT_UNROLL_M 2
  695. #define QGEMM_DEFAULT_UNROLL_M 2
  696. #define CGEMM_DEFAULT_UNROLL_M 2
  697. #define ZGEMM_DEFAULT_UNROLL_M 1
  698. #define XGEMM_DEFAULT_UNROLL_M 1
  699. #define SGEMM_DEFAULT_UNROLL_N 4
  700. #define DGEMM_DEFAULT_UNROLL_N 4
  701. #define QGEMM_DEFAULT_UNROLL_N 2
  702. #define CGEMM_DEFAULT_UNROLL_N 2
  703. #define ZGEMM_DEFAULT_UNROLL_N 2
  704. #define XGEMM_DEFAULT_UNROLL_N 1
  705. #else
  706. #define SGEMM_DEFAULT_UNROLL_M 8
  707. #define DGEMM_DEFAULT_UNROLL_M 4
  708. #define QGEMM_DEFAULT_UNROLL_M 2
  709. #define CGEMM_DEFAULT_UNROLL_M 4
  710. #define ZGEMM_DEFAULT_UNROLL_M 2
  711. #define XGEMM_DEFAULT_UNROLL_M 1
  712. #define SGEMM_DEFAULT_UNROLL_N 4
  713. #define DGEMM_DEFAULT_UNROLL_N 4
  714. #define QGEMM_DEFAULT_UNROLL_N 2
  715. #define CGEMM_DEFAULT_UNROLL_N 2
  716. #define ZGEMM_DEFAULT_UNROLL_N 2
  717. #define XGEMM_DEFAULT_UNROLL_N 1
  718. #endif
  719. #define SGEMM_DEFAULT_P sgemm_p
  720. #define SGEMM_DEFAULT_R sgemm_r
  721. #define DGEMM_DEFAULT_P dgemm_p
  722. #define DGEMM_DEFAULT_R dgemm_r
  723. #define QGEMM_DEFAULT_P qgemm_p
  724. #define QGEMM_DEFAULT_R qgemm_r
  725. #define CGEMM_DEFAULT_P cgemm_p
  726. #define CGEMM_DEFAULT_R cgemm_r
  727. #define ZGEMM_DEFAULT_P zgemm_p
  728. #define ZGEMM_DEFAULT_R zgemm_r
  729. #define XGEMM_DEFAULT_P xgemm_p
  730. #define XGEMM_DEFAULT_R xgemm_r
  731. #define SGEMM_DEFAULT_Q 768
  732. #define DGEMM_DEFAULT_Q 384
  733. #define QGEMM_DEFAULT_Q 192
  734. #define CGEMM_DEFAULT_Q 768
  735. #define ZGEMM_DEFAULT_Q 384
  736. #define XGEMM_DEFAULT_Q 192
  737. #define GETRF_FACTOR 0.75
  738. #define GEMM_THREAD gemm_thread_mn
  739. #endif
  740. #ifdef NEHALEM
  741. #define SNUMOPT 8
  742. #define DNUMOPT 4
  743. #define GEMM_DEFAULT_OFFSET_A 32
  744. #define GEMM_DEFAULT_OFFSET_B 0
  745. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  746. #define SYMV_P 8
  747. #define SWITCH_RATIO 4
  748. #ifdef ARCH_X86
  749. #define SGEMM_DEFAULT_UNROLL_M 4
  750. #define DGEMM_DEFAULT_UNROLL_M 2
  751. #define QGEMM_DEFAULT_UNROLL_M 2
  752. #define CGEMM_DEFAULT_UNROLL_M 2
  753. #define ZGEMM_DEFAULT_UNROLL_M 1
  754. #define XGEMM_DEFAULT_UNROLL_M 1
  755. #define SGEMM_DEFAULT_UNROLL_N 4
  756. #define DGEMM_DEFAULT_UNROLL_N 4
  757. #define QGEMM_DEFAULT_UNROLL_N 2
  758. #define CGEMM_DEFAULT_UNROLL_N 2
  759. #define ZGEMM_DEFAULT_UNROLL_N 2
  760. #define XGEMM_DEFAULT_UNROLL_N 1
  761. #else
  762. #define SGEMM_DEFAULT_UNROLL_M 4
  763. #define DGEMM_DEFAULT_UNROLL_M 2
  764. #define QGEMM_DEFAULT_UNROLL_M 2
  765. #define CGEMM_DEFAULT_UNROLL_M 2
  766. #define ZGEMM_DEFAULT_UNROLL_M 1
  767. #define XGEMM_DEFAULT_UNROLL_M 1
  768. #define SGEMM_DEFAULT_UNROLL_N 8
  769. #define DGEMM_DEFAULT_UNROLL_N 8
  770. #define QGEMM_DEFAULT_UNROLL_N 2
  771. #define CGEMM_DEFAULT_UNROLL_N 4
  772. #define ZGEMM_DEFAULT_UNROLL_N 4
  773. #define XGEMM_DEFAULT_UNROLL_N 1
  774. #endif
  775. #define SGEMM_DEFAULT_P 504
  776. #define SGEMM_DEFAULT_R sgemm_r
  777. #define DGEMM_DEFAULT_P 504
  778. #define DGEMM_DEFAULT_R dgemm_r
  779. #define QGEMM_DEFAULT_P 504
  780. #define QGEMM_DEFAULT_R qgemm_r
  781. #define CGEMM_DEFAULT_P 252
  782. #define CGEMM_DEFAULT_R cgemm_r
  783. #define ZGEMM_DEFAULT_P 252
  784. #define ZGEMM_DEFAULT_R zgemm_r
  785. #define XGEMM_DEFAULT_P 252
  786. #define XGEMM_DEFAULT_R xgemm_r
  787. #define SGEMM_DEFAULT_Q 512
  788. #define DGEMM_DEFAULT_Q 256
  789. #define QGEMM_DEFAULT_Q 128
  790. #define CGEMM_DEFAULT_Q 512
  791. #define ZGEMM_DEFAULT_Q 256
  792. #define XGEMM_DEFAULT_Q 128
  793. #define GETRF_FACTOR 0.72
  794. #endif
  795. #ifdef SANDYBRIDGE
  796. #define SNUMOPT 8
  797. #define DNUMOPT 4
  798. #define GEMM_DEFAULT_OFFSET_A 0
  799. #define GEMM_DEFAULT_OFFSET_B 0
  800. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  801. #define SYMV_P 8
  802. #define SWITCH_RATIO 4
  803. #ifdef ARCH_X86
  804. #define SGEMM_DEFAULT_UNROLL_M 4
  805. #define DGEMM_DEFAULT_UNROLL_M 2
  806. #define QGEMM_DEFAULT_UNROLL_M 2
  807. #define CGEMM_DEFAULT_UNROLL_M 2
  808. #define ZGEMM_DEFAULT_UNROLL_M 1
  809. #define XGEMM_DEFAULT_UNROLL_M 1
  810. #define SGEMM_DEFAULT_UNROLL_N 4
  811. #define DGEMM_DEFAULT_UNROLL_N 4
  812. #define QGEMM_DEFAULT_UNROLL_N 2
  813. #define CGEMM_DEFAULT_UNROLL_N 2
  814. #define ZGEMM_DEFAULT_UNROLL_N 2
  815. #define XGEMM_DEFAULT_UNROLL_N 1
  816. #else
  817. #define SGEMM_DEFAULT_UNROLL_M 8
  818. #define DGEMM_DEFAULT_UNROLL_M 8
  819. #define QGEMM_DEFAULT_UNROLL_M 2
  820. #define CGEMM_DEFAULT_UNROLL_M 8
  821. #define ZGEMM_DEFAULT_UNROLL_M 4
  822. #define XGEMM_DEFAULT_UNROLL_M 1
  823. #define SGEMM_DEFAULT_UNROLL_N 8
  824. #define DGEMM_DEFAULT_UNROLL_N 4
  825. #define QGEMM_DEFAULT_UNROLL_N 2
  826. #define CGEMM_DEFAULT_UNROLL_N 4
  827. #define ZGEMM_DEFAULT_UNROLL_N 4
  828. #define XGEMM_DEFAULT_UNROLL_N 1
  829. #endif
  830. #define SGEMM_DEFAULT_P 512
  831. #define SGEMM_DEFAULT_R sgemm_r
  832. //#define SGEMM_DEFAULT_R 1024
  833. #define DGEMM_DEFAULT_P 512
  834. #define DGEMM_DEFAULT_R dgemm_r
  835. //#define DGEMM_DEFAULT_R 1024
  836. #define QGEMM_DEFAULT_P 504
  837. #define QGEMM_DEFAULT_R qgemm_r
  838. #define CGEMM_DEFAULT_P 128
  839. //#define CGEMM_DEFAULT_R cgemm_r
  840. #define CGEMM_DEFAULT_R 1024
  841. #define ZGEMM_DEFAULT_P 512
  842. #define ZGEMM_DEFAULT_R zgemm_r
  843. //#define ZGEMM_DEFAULT_R 1024
  844. #define XGEMM_DEFAULT_P 252
  845. #define XGEMM_DEFAULT_R xgemm_r
  846. #define SGEMM_DEFAULT_Q 256
  847. #define DGEMM_DEFAULT_Q 256
  848. #define QGEMM_DEFAULT_Q 128
  849. #define CGEMM_DEFAULT_Q 256
  850. #define ZGEMM_DEFAULT_Q 192
  851. #define XGEMM_DEFAULT_Q 128
  852. #define GETRF_FACTOR 0.72
  853. #endif
  854. #ifdef ATOM
  855. #define SNUMOPT 2
  856. #define DNUMOPT 1
  857. #define GEMM_DEFAULT_OFFSET_A 64
  858. #define GEMM_DEFAULT_OFFSET_B 0
  859. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  860. #define SYMV_P 8
  861. #ifdef ARCH_X86
  862. #define SGEMM_DEFAULT_UNROLL_M 4
  863. #define DGEMM_DEFAULT_UNROLL_M 2
  864. #define QGEMM_DEFAULT_UNROLL_M 2
  865. #define CGEMM_DEFAULT_UNROLL_M 2
  866. #define ZGEMM_DEFAULT_UNROLL_M 1
  867. #define XGEMM_DEFAULT_UNROLL_M 1
  868. #else
  869. #define SGEMM_DEFAULT_UNROLL_M 8
  870. #define DGEMM_DEFAULT_UNROLL_M 4
  871. #define QGEMM_DEFAULT_UNROLL_M 2
  872. #define CGEMM_DEFAULT_UNROLL_M 4
  873. #define ZGEMM_DEFAULT_UNROLL_M 2
  874. #define XGEMM_DEFAULT_UNROLL_M 1
  875. #endif
  876. #define SGEMM_DEFAULT_UNROLL_N 4
  877. #define DGEMM_DEFAULT_UNROLL_N 2
  878. #define QGEMM_DEFAULT_UNROLL_N 2
  879. #define CGEMM_DEFAULT_UNROLL_N 2
  880. #define ZGEMM_DEFAULT_UNROLL_N 1
  881. #define XGEMM_DEFAULT_UNROLL_N 1
  882. #define SGEMM_DEFAULT_P sgemm_p
  883. #define SGEMM_DEFAULT_R sgemm_r
  884. #define DGEMM_DEFAULT_P dgemm_p
  885. #define DGEMM_DEFAULT_R dgemm_r
  886. #define QGEMM_DEFAULT_P qgemm_p
  887. #define QGEMM_DEFAULT_R qgemm_r
  888. #define CGEMM_DEFAULT_P cgemm_p
  889. #define CGEMM_DEFAULT_R cgemm_r
  890. #define ZGEMM_DEFAULT_P zgemm_p
  891. #define ZGEMM_DEFAULT_R zgemm_r
  892. #define XGEMM_DEFAULT_P xgemm_p
  893. #define XGEMM_DEFAULT_R xgemm_r
  894. #define SGEMM_DEFAULT_Q 256
  895. #define DGEMM_DEFAULT_Q 256
  896. #define QGEMM_DEFAULT_Q 256
  897. #define CGEMM_DEFAULT_Q 256
  898. #define ZGEMM_DEFAULT_Q 256
  899. #define XGEMM_DEFAULT_Q 256
  900. #endif
  901. #ifdef ITANIUM2
  902. #define SNUMOPT 4
  903. #define DNUMOPT 4
  904. #define GEMM_DEFAULT_OFFSET_A 0
  905. #define GEMM_DEFAULT_OFFSET_B 128
  906. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  907. #define SGEMM_DEFAULT_UNROLL_M 8
  908. #define SGEMM_DEFAULT_UNROLL_N 8
  909. #define DGEMM_DEFAULT_UNROLL_M 8
  910. #define DGEMM_DEFAULT_UNROLL_N 8
  911. #define QGEMM_DEFAULT_UNROLL_M 8
  912. #define QGEMM_DEFAULT_UNROLL_N 8
  913. #define CGEMM_DEFAULT_UNROLL_M 4
  914. #define CGEMM_DEFAULT_UNROLL_N 4
  915. #define ZGEMM_DEFAULT_UNROLL_M 4
  916. #define ZGEMM_DEFAULT_UNROLL_N 4
  917. #define XGEMM_DEFAULT_UNROLL_M 4
  918. #define XGEMM_DEFAULT_UNROLL_N 4
  919. #define SGEMM_DEFAULT_P sgemm_p
  920. #define DGEMM_DEFAULT_P dgemm_p
  921. #define QGEMM_DEFAULT_P qgemm_p
  922. #define CGEMM_DEFAULT_P cgemm_p
  923. #define ZGEMM_DEFAULT_P zgemm_p
  924. #define XGEMM_DEFAULT_P xgemm_p
  925. #define SGEMM_DEFAULT_Q 1024
  926. #define DGEMM_DEFAULT_Q 1024
  927. #define QGEMM_DEFAULT_Q 1024
  928. #define CGEMM_DEFAULT_Q 1024
  929. #define ZGEMM_DEFAULT_Q 1024
  930. #define XGEMM_DEFAULT_Q 1024
  931. #define SGEMM_DEFAULT_R sgemm_r
  932. #define DGEMM_DEFAULT_R dgemm_r
  933. #define QGEMM_DEFAULT_R qgemm_r
  934. #define CGEMM_DEFAULT_R cgemm_r
  935. #define ZGEMM_DEFAULT_R zgemm_r
  936. #define XGEMM_DEFAULT_R xgemm_r
  937. #define SYMV_P 16
  938. #define GETRF_FACTOR 0.65
  939. #endif
  940. #if defined(EV4) || defined(EV5) || defined(EV6)
  941. #ifdef EV4
  942. #define SNUMOPT 1
  943. #define DNUMOPT 1
  944. #else
  945. #define SNUMOPT 2
  946. #define DNUMOPT 2
  947. #endif
  948. #define GEMM_DEFAULT_OFFSET_A 512
  949. #define GEMM_DEFAULT_OFFSET_B 512
  950. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  951. #define SGEMM_DEFAULT_UNROLL_M 4
  952. #define SGEMM_DEFAULT_UNROLL_N 4
  953. #define DGEMM_DEFAULT_UNROLL_M 4
  954. #define DGEMM_DEFAULT_UNROLL_N 4
  955. #define CGEMM_DEFAULT_UNROLL_M 2
  956. #define CGEMM_DEFAULT_UNROLL_N 2
  957. #define ZGEMM_DEFAULT_UNROLL_M 2
  958. #define ZGEMM_DEFAULT_UNROLL_N 2
  959. #define SYMV_P 8
  960. #ifdef EV4
  961. #define SGEMM_DEFAULT_P 32
  962. #define SGEMM_DEFAULT_Q 112
  963. #define SGEMM_DEFAULT_R 256
  964. #define DGEMM_DEFAULT_P 32
  965. #define DGEMM_DEFAULT_Q 56
  966. #define DGEMM_DEFAULT_R 256
  967. #define CGEMM_DEFAULT_P 32
  968. #define CGEMM_DEFAULT_Q 64
  969. #define CGEMM_DEFAULT_R 240
  970. #define ZGEMM_DEFAULT_P 32
  971. #define ZGEMM_DEFAULT_Q 32
  972. #define ZGEMM_DEFAULT_R 240
  973. #endif
  974. #ifdef EV5
  975. #define SGEMM_DEFAULT_P 64
  976. #define SGEMM_DEFAULT_Q 256
  977. #define DGEMM_DEFAULT_P 64
  978. #define DGEMM_DEFAULT_Q 128
  979. #define CGEMM_DEFAULT_P 64
  980. #define CGEMM_DEFAULT_Q 128
  981. #define ZGEMM_DEFAULT_P 64
  982. #define ZGEMM_DEFAULT_Q 64
  983. #endif
  984. #ifdef EV6
  985. #define SGEMM_DEFAULT_P 256
  986. #define SGEMM_DEFAULT_Q 512
  987. #define DGEMM_DEFAULT_P 256
  988. #define DGEMM_DEFAULT_Q 256
  989. #define CGEMM_DEFAULT_P 256
  990. #define CGEMM_DEFAULT_Q 256
  991. #define ZGEMM_DEFAULT_P 128
  992. #define ZGEMM_DEFAULT_Q 256
  993. #endif
  994. #endif
  995. #ifdef CELL
  996. #define SNUMOPT 2
  997. #define DNUMOPT 2
  998. #define GEMM_DEFAULT_OFFSET_A 0
  999. #define GEMM_DEFAULT_OFFSET_B 8192
  1000. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1001. #define SGEMM_DEFAULT_UNROLL_M 16
  1002. #define SGEMM_DEFAULT_UNROLL_N 4
  1003. #define DGEMM_DEFAULT_UNROLL_M 4
  1004. #define DGEMM_DEFAULT_UNROLL_N 4
  1005. #define CGEMM_DEFAULT_UNROLL_M 8
  1006. #define CGEMM_DEFAULT_UNROLL_N 2
  1007. #define ZGEMM_DEFAULT_UNROLL_M 2
  1008. #define ZGEMM_DEFAULT_UNROLL_N 2
  1009. #define SGEMM_DEFAULT_P 128
  1010. #define DGEMM_DEFAULT_P 128
  1011. #define CGEMM_DEFAULT_P 128
  1012. #define ZGEMM_DEFAULT_P 128
  1013. #define SGEMM_DEFAULT_Q 512
  1014. #define DGEMM_DEFAULT_Q 256
  1015. #define CGEMM_DEFAULT_Q 256
  1016. #define ZGEMM_DEFAULT_Q 128
  1017. #define SYMV_P 4
  1018. #endif
  1019. #ifdef PPCG4
  1020. #define GEMM_DEFAULT_OFFSET_A 0
  1021. #define GEMM_DEFAULT_OFFSET_B 1024
  1022. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1023. #define SGEMM_DEFAULT_UNROLL_M 16
  1024. #define SGEMM_DEFAULT_UNROLL_N 4
  1025. #define DGEMM_DEFAULT_UNROLL_M 4
  1026. #define DGEMM_DEFAULT_UNROLL_N 4
  1027. #define CGEMM_DEFAULT_UNROLL_M 8
  1028. #define CGEMM_DEFAULT_UNROLL_N 2
  1029. #define ZGEMM_DEFAULT_UNROLL_M 2
  1030. #define ZGEMM_DEFAULT_UNROLL_N 2
  1031. #define SGEMM_DEFAULT_P 256
  1032. #define DGEMM_DEFAULT_P 128
  1033. #define CGEMM_DEFAULT_P 128
  1034. #define ZGEMM_DEFAULT_P 64
  1035. #define SGEMM_DEFAULT_Q 256
  1036. #define DGEMM_DEFAULT_Q 256
  1037. #define CGEMM_DEFAULT_Q 256
  1038. #define ZGEMM_DEFAULT_Q 256
  1039. #define SYMV_P 4
  1040. #endif
  1041. #ifdef PPC970
  1042. #define SNUMOPT 4
  1043. #define DNUMOPT 4
  1044. #define GEMM_DEFAULT_OFFSET_A 2688
  1045. #define GEMM_DEFAULT_OFFSET_B 3072
  1046. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1047. #define SGEMM_DEFAULT_UNROLL_M 16
  1048. #define SGEMM_DEFAULT_UNROLL_N 4
  1049. #define DGEMM_DEFAULT_UNROLL_M 4
  1050. #define DGEMM_DEFAULT_UNROLL_N 4
  1051. #define CGEMM_DEFAULT_UNROLL_M 8
  1052. #define CGEMM_DEFAULT_UNROLL_N 2
  1053. #define ZGEMM_DEFAULT_UNROLL_M 2
  1054. #define ZGEMM_DEFAULT_UNROLL_N 2
  1055. #ifdef OS_LINUX
  1056. #if L2_SIZE == 1024976
  1057. #define SGEMM_DEFAULT_P 320
  1058. #define DGEMM_DEFAULT_P 256
  1059. #define CGEMM_DEFAULT_P 256
  1060. #define ZGEMM_DEFAULT_P 256
  1061. #else
  1062. #define SGEMM_DEFAULT_P 176
  1063. #define DGEMM_DEFAULT_P 176
  1064. #define CGEMM_DEFAULT_P 176
  1065. #define ZGEMM_DEFAULT_P 176
  1066. #endif
  1067. #endif
  1068. #define SGEMM_DEFAULT_Q 512
  1069. #define DGEMM_DEFAULT_Q 256
  1070. #define CGEMM_DEFAULT_Q 256
  1071. #define ZGEMM_DEFAULT_Q 128
  1072. #define SYMV_P 4
  1073. #endif
  1074. #ifdef PPC440
  1075. #define SNUMOPT 2
  1076. #define DNUMOPT 2
  1077. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1078. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1079. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1080. #define SGEMM_DEFAULT_UNROLL_M 4
  1081. #define SGEMM_DEFAULT_UNROLL_N 4
  1082. #define DGEMM_DEFAULT_UNROLL_M 4
  1083. #define DGEMM_DEFAULT_UNROLL_N 4
  1084. #define CGEMM_DEFAULT_UNROLL_M 2
  1085. #define CGEMM_DEFAULT_UNROLL_N 2
  1086. #define ZGEMM_DEFAULT_UNROLL_M 2
  1087. #define ZGEMM_DEFAULT_UNROLL_N 2
  1088. #define SGEMM_DEFAULT_P 512
  1089. #define DGEMM_DEFAULT_P 512
  1090. #define CGEMM_DEFAULT_P 512
  1091. #define ZGEMM_DEFAULT_P 512
  1092. #define SGEMM_DEFAULT_Q 1024
  1093. #define DGEMM_DEFAULT_Q 512
  1094. #define CGEMM_DEFAULT_Q 512
  1095. #define ZGEMM_DEFAULT_Q 256
  1096. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1097. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1098. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1099. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1100. #define SYMV_P 4
  1101. #endif
  1102. #ifdef PPC440FP2
  1103. #define SNUMOPT 4
  1104. #define DNUMOPT 4
  1105. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1106. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1107. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1108. #define SGEMM_DEFAULT_UNROLL_M 8
  1109. #define SGEMM_DEFAULT_UNROLL_N 4
  1110. #define DGEMM_DEFAULT_UNROLL_M 8
  1111. #define DGEMM_DEFAULT_UNROLL_N 4
  1112. #define CGEMM_DEFAULT_UNROLL_M 4
  1113. #define CGEMM_DEFAULT_UNROLL_N 2
  1114. #define ZGEMM_DEFAULT_UNROLL_M 4
  1115. #define ZGEMM_DEFAULT_UNROLL_N 2
  1116. #define SGEMM_DEFAULT_P 128
  1117. #define DGEMM_DEFAULT_P 128
  1118. #define CGEMM_DEFAULT_P 128
  1119. #define ZGEMM_DEFAULT_P 128
  1120. #if 1
  1121. #define SGEMM_DEFAULT_Q 4096
  1122. #define DGEMM_DEFAULT_Q 3072
  1123. #define CGEMM_DEFAULT_Q 2048
  1124. #define ZGEMM_DEFAULT_Q 1024
  1125. #else
  1126. #define SGEMM_DEFAULT_Q 512
  1127. #define DGEMM_DEFAULT_Q 256
  1128. #define CGEMM_DEFAULT_Q 256
  1129. #define ZGEMM_DEFAULT_Q 128
  1130. #endif
  1131. #define SYMV_P 4
  1132. #endif
  1133. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1134. #define GEMM_DEFAULT_OFFSET_A 0
  1135. #define GEMM_DEFAULT_OFFSET_B 2048
  1136. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1137. #define SGEMM_DEFAULT_UNROLL_M 4
  1138. #define SGEMM_DEFAULT_UNROLL_N 4
  1139. #define DGEMM_DEFAULT_UNROLL_M 4
  1140. #define DGEMM_DEFAULT_UNROLL_N 4
  1141. #define CGEMM_DEFAULT_UNROLL_M 2
  1142. #define CGEMM_DEFAULT_UNROLL_N 2
  1143. #define ZGEMM_DEFAULT_UNROLL_M 2
  1144. #define ZGEMM_DEFAULT_UNROLL_N 2
  1145. #ifdef POWER3
  1146. #define SNUMOPT 4
  1147. #define DNUMOPT 4
  1148. #define SGEMM_DEFAULT_P 256
  1149. #define SGEMM_DEFAULT_Q 432
  1150. #define SGEMM_DEFAULT_R 1012
  1151. #define DGEMM_DEFAULT_P 256
  1152. #define DGEMM_DEFAULT_Q 216
  1153. #define DGEMM_DEFAULT_R 1012
  1154. #define ZGEMM_DEFAULT_P 256
  1155. #define ZGEMM_DEFAULT_Q 104
  1156. #define ZGEMM_DEFAULT_R 1012
  1157. #endif
  1158. #if defined(POWER4)
  1159. #ifdef ALLOC_HUGETLB
  1160. #define SGEMM_DEFAULT_P 184
  1161. #define DGEMM_DEFAULT_P 184
  1162. #define CGEMM_DEFAULT_P 184
  1163. #define ZGEMM_DEFAULT_P 184
  1164. #else
  1165. #define SGEMM_DEFAULT_P 144
  1166. #define DGEMM_DEFAULT_P 144
  1167. #define CGEMM_DEFAULT_P 144
  1168. #define ZGEMM_DEFAULT_P 144
  1169. #endif
  1170. #endif
  1171. #if defined(POWER5)
  1172. #ifdef ALLOC_HUGETLB
  1173. #define SGEMM_DEFAULT_P 512
  1174. #define DGEMM_DEFAULT_P 256
  1175. #define CGEMM_DEFAULT_P 256
  1176. #define ZGEMM_DEFAULT_P 128
  1177. #else
  1178. #define SGEMM_DEFAULT_P 320
  1179. #define DGEMM_DEFAULT_P 160
  1180. #define CGEMM_DEFAULT_P 160
  1181. #define ZGEMM_DEFAULT_P 80
  1182. #endif
  1183. #define SGEMM_DEFAULT_Q 256
  1184. #define CGEMM_DEFAULT_Q 256
  1185. #define DGEMM_DEFAULT_Q 256
  1186. #define ZGEMM_DEFAULT_Q 256
  1187. #endif
  1188. #define SYMV_P 8
  1189. #endif
  1190. #if defined(POWER6)
  1191. #define SNUMOPT 4
  1192. #define DNUMOPT 4
  1193. #define GEMM_DEFAULT_OFFSET_A 384
  1194. #define GEMM_DEFAULT_OFFSET_B 1024
  1195. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1196. #define SGEMM_DEFAULT_UNROLL_M 4
  1197. #define SGEMM_DEFAULT_UNROLL_N 4
  1198. #define DGEMM_DEFAULT_UNROLL_M 4
  1199. #define DGEMM_DEFAULT_UNROLL_N 4
  1200. #define CGEMM_DEFAULT_UNROLL_M 2
  1201. #define CGEMM_DEFAULT_UNROLL_N 4
  1202. #define ZGEMM_DEFAULT_UNROLL_M 2
  1203. #define ZGEMM_DEFAULT_UNROLL_N 4
  1204. #define SGEMM_DEFAULT_P 992
  1205. #define DGEMM_DEFAULT_P 480
  1206. #define CGEMM_DEFAULT_P 488
  1207. #define ZGEMM_DEFAULT_P 248
  1208. #define SGEMM_DEFAULT_Q 504
  1209. #define DGEMM_DEFAULT_Q 504
  1210. #define CGEMM_DEFAULT_Q 400
  1211. #define ZGEMM_DEFAULT_Q 400
  1212. #define SYMV_P 8
  1213. #endif
  1214. #if defined(SPARC) && defined(V7)
  1215. #define SNUMOPT 4
  1216. #define DNUMOPT 4
  1217. #define GEMM_DEFAULT_OFFSET_A 0
  1218. #define GEMM_DEFAULT_OFFSET_B 2048
  1219. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1220. #define SGEMM_DEFAULT_UNROLL_M 2
  1221. #define SGEMM_DEFAULT_UNROLL_N 8
  1222. #define DGEMM_DEFAULT_UNROLL_M 2
  1223. #define DGEMM_DEFAULT_UNROLL_N 8
  1224. #define CGEMM_DEFAULT_UNROLL_M 1
  1225. #define CGEMM_DEFAULT_UNROLL_N 4
  1226. #define ZGEMM_DEFAULT_UNROLL_M 1
  1227. #define ZGEMM_DEFAULT_UNROLL_N 4
  1228. #define SGEMM_DEFAULT_P 256
  1229. #define DGEMM_DEFAULT_P 256
  1230. #define CGEMM_DEFAULT_P 256
  1231. #define ZGEMM_DEFAULT_P 256
  1232. #define SGEMM_DEFAULT_Q 512
  1233. #define DGEMM_DEFAULT_Q 256
  1234. #define CGEMM_DEFAULT_Q 256
  1235. #define ZGEMM_DEFAULT_Q 128
  1236. #define SYMV_P 8
  1237. #define GEMM_THREAD gemm_thread_mn
  1238. #endif
  1239. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1240. #define SNUMOPT 2
  1241. #define DNUMOPT 2
  1242. #define GEMM_DEFAULT_OFFSET_A 0
  1243. #define GEMM_DEFAULT_OFFSET_B 2048
  1244. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1245. #define SGEMM_DEFAULT_UNROLL_M 4
  1246. #define SGEMM_DEFAULT_UNROLL_N 4
  1247. #define DGEMM_DEFAULT_UNROLL_M 4
  1248. #define DGEMM_DEFAULT_UNROLL_N 4
  1249. #define CGEMM_DEFAULT_UNROLL_M 2
  1250. #define CGEMM_DEFAULT_UNROLL_N 2
  1251. #define ZGEMM_DEFAULT_UNROLL_M 2
  1252. #define ZGEMM_DEFAULT_UNROLL_N 2
  1253. #define SGEMM_DEFAULT_P 512
  1254. #define DGEMM_DEFAULT_P 512
  1255. #define CGEMM_DEFAULT_P 512
  1256. #define ZGEMM_DEFAULT_P 512
  1257. #define SGEMM_DEFAULT_Q 1024
  1258. #define DGEMM_DEFAULT_Q 512
  1259. #define CGEMM_DEFAULT_Q 512
  1260. #define ZGEMM_DEFAULT_Q 256
  1261. #define SYMV_P 8
  1262. #endif
  1263. #ifdef SICORTEX
  1264. #define SNUMOPT 2
  1265. #define DNUMOPT 2
  1266. #define GEMM_DEFAULT_OFFSET_A 0
  1267. #define GEMM_DEFAULT_OFFSET_B 0
  1268. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1269. #define SGEMM_DEFAULT_UNROLL_M 2
  1270. #define SGEMM_DEFAULT_UNROLL_N 8
  1271. #define DGEMM_DEFAULT_UNROLL_M 2
  1272. #define DGEMM_DEFAULT_UNROLL_N 8
  1273. #define CGEMM_DEFAULT_UNROLL_M 1
  1274. #define CGEMM_DEFAULT_UNROLL_N 4
  1275. #define ZGEMM_DEFAULT_UNROLL_M 1
  1276. #define ZGEMM_DEFAULT_UNROLL_N 4
  1277. #define SGEMM_DEFAULT_P 108
  1278. #define DGEMM_DEFAULT_P 112
  1279. #define CGEMM_DEFAULT_P 108
  1280. #define ZGEMM_DEFAULT_P 112
  1281. #define SGEMM_DEFAULT_Q 288
  1282. #define DGEMM_DEFAULT_Q 144
  1283. #define CGEMM_DEFAULT_Q 144
  1284. #define ZGEMM_DEFAULT_Q 72
  1285. #define SGEMM_DEFAULT_R 2000
  1286. #define DGEMM_DEFAULT_R 2000
  1287. #define CGEMM_DEFAULT_R 2000
  1288. #define ZGEMM_DEFAULT_R 2000
  1289. #define SYMV_P 16
  1290. #endif
  1291. #ifdef LOONGSON3A
  1292. ////Copy from SICORTEX
  1293. #define SNUMOPT 2
  1294. #define DNUMOPT 2
  1295. #define GEMM_DEFAULT_OFFSET_A 0
  1296. #define GEMM_DEFAULT_OFFSET_B 0
  1297. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1298. #define SGEMM_DEFAULT_UNROLL_M 8
  1299. #define SGEMM_DEFAULT_UNROLL_N 4
  1300. #define DGEMM_DEFAULT_UNROLL_M 4
  1301. #define DGEMM_DEFAULT_UNROLL_N 4
  1302. #define CGEMM_DEFAULT_UNROLL_M 4
  1303. #define CGEMM_DEFAULT_UNROLL_N 2
  1304. #define ZGEMM_DEFAULT_UNROLL_M 2
  1305. #define ZGEMM_DEFAULT_UNROLL_N 2
  1306. #define SGEMM_DEFAULT_P 64
  1307. #define DGEMM_DEFAULT_P 44
  1308. #define CGEMM_DEFAULT_P 64
  1309. #define ZGEMM_DEFAULT_P 32
  1310. #define SGEMM_DEFAULT_Q 192
  1311. #define DGEMM_DEFAULT_Q 92
  1312. #define CGEMM_DEFAULT_Q 128
  1313. #define ZGEMM_DEFAULT_Q 80
  1314. #define SGEMM_DEFAULT_R 640
  1315. #define DGEMM_DEFAULT_R dgemm_r
  1316. #define CGEMM_DEFAULT_R 640
  1317. #define ZGEMM_DEFAULT_R 640
  1318. #define GEMM_OFFSET_A1 0x10000
  1319. #define GEMM_OFFSET_B1 0x100000
  1320. #define SYMV_P 16
  1321. #endif
  1322. #ifdef LOONGSON3B
  1323. #define SNUMOPT 2
  1324. #define DNUMOPT 2
  1325. #define GEMM_DEFAULT_OFFSET_A 0
  1326. #define GEMM_DEFAULT_OFFSET_B 0
  1327. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1328. #define SGEMM_DEFAULT_UNROLL_M 2
  1329. #define SGEMM_DEFAULT_UNROLL_N 2
  1330. #define DGEMM_DEFAULT_UNROLL_M 2
  1331. #define DGEMM_DEFAULT_UNROLL_N 2
  1332. #define CGEMM_DEFAULT_UNROLL_M 2
  1333. #define CGEMM_DEFAULT_UNROLL_N 2
  1334. #define ZGEMM_DEFAULT_UNROLL_M 2
  1335. #define ZGEMM_DEFAULT_UNROLL_N 2
  1336. #define SGEMM_DEFAULT_P 64
  1337. #define DGEMM_DEFAULT_P 24
  1338. #define CGEMM_DEFAULT_P 24
  1339. #define ZGEMM_DEFAULT_P 20
  1340. #define SGEMM_DEFAULT_Q 192
  1341. #define DGEMM_DEFAULT_Q 128
  1342. #define CGEMM_DEFAULT_Q 128
  1343. #define ZGEMM_DEFAULT_Q 64
  1344. #define SGEMM_DEFAULT_R 512
  1345. #define DGEMM_DEFAULT_R 512
  1346. #define CGEMM_DEFAULT_R 512
  1347. #define ZGEMM_DEFAULT_R 512
  1348. #define GEMM_OFFSET_A1 0x10000
  1349. #define GEMM_OFFSET_B1 0x100000
  1350. #define SYMV_P 16
  1351. #endif
  1352. #ifdef GENERIC
  1353. #define SNUMOPT 2
  1354. #define DNUMOPT 2
  1355. #define GEMM_DEFAULT_OFFSET_A 0
  1356. #define GEMM_DEFAULT_OFFSET_B 0
  1357. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1358. #define SGEMM_DEFAULT_UNROLL_N 4
  1359. #define DGEMM_DEFAULT_UNROLL_N 4
  1360. #define QGEMM_DEFAULT_UNROLL_N 2
  1361. #define CGEMM_DEFAULT_UNROLL_N 2
  1362. #define ZGEMM_DEFAULT_UNROLL_N 2
  1363. #define XGEMM_DEFAULT_UNROLL_N 1
  1364. #ifdef ARCH_X86
  1365. #define SGEMM_DEFAULT_UNROLL_M 4
  1366. #define DGEMM_DEFAULT_UNROLL_M 2
  1367. #define QGEMM_DEFAULT_UNROLL_M 2
  1368. #define CGEMM_DEFAULT_UNROLL_M 2
  1369. #define ZGEMM_DEFAULT_UNROLL_M 1
  1370. #define XGEMM_DEFAULT_UNROLL_M 1
  1371. #else
  1372. #define SGEMM_DEFAULT_UNROLL_M 8
  1373. #define DGEMM_DEFAULT_UNROLL_M 4
  1374. #define QGEMM_DEFAULT_UNROLL_M 2
  1375. #define CGEMM_DEFAULT_UNROLL_M 4
  1376. #define ZGEMM_DEFAULT_UNROLL_M 2
  1377. #define XGEMM_DEFAULT_UNROLL_M 1
  1378. #endif
  1379. #define SGEMM_DEFAULT_P sgemm_p
  1380. #define DGEMM_DEFAULT_P dgemm_p
  1381. #define QGEMM_DEFAULT_P qgemm_p
  1382. #define CGEMM_DEFAULT_P cgemm_p
  1383. #define ZGEMM_DEFAULT_P zgemm_p
  1384. #define XGEMM_DEFAULT_P xgemm_p
  1385. #define SGEMM_DEFAULT_R sgemm_r
  1386. #define DGEMM_DEFAULT_R dgemm_r
  1387. #define QGEMM_DEFAULT_R qgemm_r
  1388. #define CGEMM_DEFAULT_R cgemm_r
  1389. #define ZGEMM_DEFAULT_R zgemm_r
  1390. #define XGEMM_DEFAULT_R xgemm_r
  1391. #define SGEMM_DEFAULT_Q 128
  1392. #define DGEMM_DEFAULT_Q 128
  1393. #define QGEMM_DEFAULT_Q 128
  1394. #define CGEMM_DEFAULT_Q 128
  1395. #define ZGEMM_DEFAULT_Q 128
  1396. #define XGEMM_DEFAULT_Q 128
  1397. #define SYMV_P 16
  1398. #endif
  1399. #ifndef QGEMM_DEFAULT_UNROLL_M
  1400. #define QGEMM_DEFAULT_UNROLL_M 2
  1401. #endif
  1402. #ifndef QGEMM_DEFAULT_UNROLL_N
  1403. #define QGEMM_DEFAULT_UNROLL_N 2
  1404. #endif
  1405. #ifndef XGEMM_DEFAULT_UNROLL_M
  1406. #define XGEMM_DEFAULT_UNROLL_M 2
  1407. #endif
  1408. #ifndef XGEMM_DEFAULT_UNROLL_N
  1409. #define XGEMM_DEFAULT_UNROLL_N 2
  1410. #endif
  1411. #ifndef HAVE_SSE2
  1412. #define SHUFPD_0 shufps $0x44,
  1413. #define SHUFPD_1 shufps $0x4e,
  1414. #define SHUFPD_2 shufps $0xe4,
  1415. #define SHUFPD_3 shufps $0xee,
  1416. #endif
  1417. #ifndef SHUFPD_0
  1418. #define SHUFPD_0 shufpd $0,
  1419. #endif
  1420. #ifndef SHUFPD_1
  1421. #define SHUFPD_1 shufpd $1,
  1422. #endif
  1423. #ifndef SHUFPD_2
  1424. #define SHUFPD_2 shufpd $2,
  1425. #endif
  1426. #ifndef SHUFPD_3
  1427. #define SHUFPD_3 shufpd $3,
  1428. #endif
  1429. #ifndef SHUFPS_39
  1430. #define SHUFPS_39 shufps $0x39,
  1431. #endif
  1432. #endif