You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

param.h 41 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736
  1. /*****************************************************************************
  2. Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the ISCAS nor the names of its contributors may
  14. be used to endorse or promote products derived from this software
  15. without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************************/
  27. /*********************************************************************/
  28. /* Copyright 2009, 2010 The University of Texas at Austin. */
  29. /* All rights reserved. */
  30. /* */
  31. /* Redistribution and use in source and binary forms, with or */
  32. /* without modification, are permitted provided that the following */
  33. /* conditions are met: */
  34. /* */
  35. /* 1. Redistributions of source code must retain the above */
  36. /* copyright notice, this list of conditions and the following */
  37. /* disclaimer. */
  38. /* */
  39. /* 2. Redistributions in binary form must reproduce the above */
  40. /* copyright notice, this list of conditions and the following */
  41. /* disclaimer in the documentation and/or other materials */
  42. /* provided with the distribution. */
  43. /* */
  44. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  45. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  46. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  47. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  48. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  49. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  50. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  51. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  52. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  53. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  54. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  55. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  56. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  57. /* POSSIBILITY OF SUCH DAMAGE. */
  58. /* */
  59. /* The views and conclusions contained in the software and */
  60. /* documentation are those of the authors and should not be */
  61. /* interpreted as representing official policies, either expressed */
  62. /* or implied, of The University of Texas at Austin. */
  63. /*********************************************************************/
  64. #ifndef PARAM_H
  65. #define PARAM_H
  66. #ifdef OPTERON
  67. #define SNUMOPT 4
  68. #define DNUMOPT 2
  69. #define GEMM_DEFAULT_OFFSET_A 64
  70. #define GEMM_DEFAULT_OFFSET_B 256
  71. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  72. #define SGEMM_DEFAULT_UNROLL_N 4
  73. #define DGEMM_DEFAULT_UNROLL_N 4
  74. #define QGEMM_DEFAULT_UNROLL_N 2
  75. #define CGEMM_DEFAULT_UNROLL_N 2
  76. #define ZGEMM_DEFAULT_UNROLL_N 2
  77. #define XGEMM_DEFAULT_UNROLL_N 1
  78. #ifdef ARCH_X86
  79. #define SGEMM_DEFAULT_UNROLL_M 4
  80. #define DGEMM_DEFAULT_UNROLL_M 2
  81. #define QGEMM_DEFAULT_UNROLL_M 2
  82. #define CGEMM_DEFAULT_UNROLL_M 2
  83. #define ZGEMM_DEFAULT_UNROLL_M 1
  84. #define XGEMM_DEFAULT_UNROLL_M 1
  85. #else
  86. #define SGEMM_DEFAULT_UNROLL_M 8
  87. #define DGEMM_DEFAULT_UNROLL_M 4
  88. #define QGEMM_DEFAULT_UNROLL_M 2
  89. #define CGEMM_DEFAULT_UNROLL_M 4
  90. #define ZGEMM_DEFAULT_UNROLL_M 2
  91. #define XGEMM_DEFAULT_UNROLL_M 1
  92. #endif
  93. #define SGEMM_DEFAULT_P sgemm_p
  94. #define DGEMM_DEFAULT_P dgemm_p
  95. #define QGEMM_DEFAULT_P qgemm_p
  96. #define CGEMM_DEFAULT_P cgemm_p
  97. #define ZGEMM_DEFAULT_P zgemm_p
  98. #define XGEMM_DEFAULT_P xgemm_p
  99. #define SGEMM_DEFAULT_R sgemm_r
  100. #define DGEMM_DEFAULT_R dgemm_r
  101. #define QGEMM_DEFAULT_R qgemm_r
  102. #define CGEMM_DEFAULT_R cgemm_r
  103. #define ZGEMM_DEFAULT_R zgemm_r
  104. #define XGEMM_DEFAULT_R xgemm_r
  105. #ifdef ALLOC_HUGETLB
  106. #define SGEMM_DEFAULT_Q 248
  107. #define DGEMM_DEFAULT_Q 248
  108. #define QGEMM_DEFAULT_Q 248
  109. #define CGEMM_DEFAULT_Q 248
  110. #define ZGEMM_DEFAULT_Q 248
  111. #define XGEMM_DEFAULT_Q 248
  112. #else
  113. #define SGEMM_DEFAULT_Q 240
  114. #define DGEMM_DEFAULT_Q 240
  115. #define QGEMM_DEFAULT_Q 240
  116. #define CGEMM_DEFAULT_Q 240
  117. #define ZGEMM_DEFAULT_Q 240
  118. #define XGEMM_DEFAULT_Q 240
  119. #endif
  120. #define SYMV_P 16
  121. #define HAVE_EXCLUSIVE_CACHE
  122. #endif
  123. #if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
  124. #define SNUMOPT 8
  125. #define DNUMOPT 4
  126. #define GEMM_DEFAULT_OFFSET_A 64
  127. #define GEMM_DEFAULT_OFFSET_B 832
  128. #define GEMM_DEFAULT_ALIGN 0x0fffUL
  129. #define SGEMM_DEFAULT_UNROLL_N 4
  130. #define DGEMM_DEFAULT_UNROLL_N 4
  131. #define QGEMM_DEFAULT_UNROLL_N 2
  132. #define CGEMM_DEFAULT_UNROLL_N 2
  133. #define ZGEMM_DEFAULT_UNROLL_N 2
  134. #define XGEMM_DEFAULT_UNROLL_N 1
  135. #ifdef ARCH_X86
  136. #define SGEMM_DEFAULT_UNROLL_M 4
  137. #define DGEMM_DEFAULT_UNROLL_M 2
  138. #define QGEMM_DEFAULT_UNROLL_M 2
  139. #define CGEMM_DEFAULT_UNROLL_M 2
  140. #define ZGEMM_DEFAULT_UNROLL_M 1
  141. #define XGEMM_DEFAULT_UNROLL_M 1
  142. #else
  143. #define SGEMM_DEFAULT_UNROLL_M 8
  144. #define DGEMM_DEFAULT_UNROLL_M 4
  145. #define QGEMM_DEFAULT_UNROLL_M 2
  146. #define CGEMM_DEFAULT_UNROLL_M 4
  147. #define ZGEMM_DEFAULT_UNROLL_M 2
  148. #define XGEMM_DEFAULT_UNROLL_M 1
  149. #endif
  150. #if 0
  151. #define SGEMM_DEFAULT_P 496
  152. #define DGEMM_DEFAULT_P 248
  153. #define QGEMM_DEFAULT_P 124
  154. #define CGEMM_DEFAULT_P 248
  155. #define ZGEMM_DEFAULT_P 124
  156. #define XGEMM_DEFAULT_P 62
  157. #define SGEMM_DEFAULT_Q 248
  158. #define DGEMM_DEFAULT_Q 248
  159. #define QGEMM_DEFAULT_Q 248
  160. #define CGEMM_DEFAULT_Q 248
  161. #define ZGEMM_DEFAULT_Q 248
  162. #define XGEMM_DEFAULT_Q 248
  163. #else
  164. #define SGEMM_DEFAULT_P 448
  165. #define DGEMM_DEFAULT_P 224
  166. #define QGEMM_DEFAULT_P 112
  167. #define CGEMM_DEFAULT_P 224
  168. #define ZGEMM_DEFAULT_P 112
  169. #define XGEMM_DEFAULT_P 56
  170. #define SGEMM_DEFAULT_Q 224
  171. #define DGEMM_DEFAULT_Q 224
  172. #define QGEMM_DEFAULT_Q 224
  173. #define CGEMM_DEFAULT_Q 224
  174. #define ZGEMM_DEFAULT_Q 224
  175. #define XGEMM_DEFAULT_Q 224
  176. #endif
  177. #define SGEMM_DEFAULT_R sgemm_r
  178. #define QGEMM_DEFAULT_R qgemm_r
  179. #define DGEMM_DEFAULT_R dgemm_r
  180. #define CGEMM_DEFAULT_R cgemm_r
  181. #define ZGEMM_DEFAULT_R zgemm_r
  182. #define XGEMM_DEFAULT_R xgemm_r
  183. #define SYMV_P 16
  184. #define HAVE_EXCLUSIVE_CACHE
  185. #define GEMM_THREAD gemm_thread_mn
  186. #endif
  187. #ifdef ATHLON
  188. #define SNUMOPT 4
  189. #define DNUMOPT 2
  190. #define GEMM_DEFAULT_OFFSET_A 0
  191. #define GEMM_DEFAULT_OFFSET_B 384
  192. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  193. #define SGEMM_DEFAULT_UNROLL_N 4
  194. #define DGEMM_DEFAULT_UNROLL_N 4
  195. #define QGEMM_DEFAULT_UNROLL_N 2
  196. #define CGEMM_DEFAULT_UNROLL_N 2
  197. #define ZGEMM_DEFAULT_UNROLL_N 2
  198. #define XGEMM_DEFAULT_UNROLL_N 1
  199. #define SGEMM_DEFAULT_UNROLL_M 2
  200. #define DGEMM_DEFAULT_UNROLL_M 1
  201. #define QGEMM_DEFAULT_UNROLL_M 2
  202. #define CGEMM_DEFAULT_UNROLL_M 1
  203. #define ZGEMM_DEFAULT_UNROLL_M 1
  204. #define XGEMM_DEFAULT_UNROLL_M 1
  205. #define SGEMM_DEFAULT_R sgemm_r
  206. #define DGEMM_DEFAULT_R dgemm_r
  207. #define QGEMM_DEFAULT_R qgemm_r
  208. #define CGEMM_DEFAULT_R cgemm_r
  209. #define ZGEMM_DEFAULT_R zgemm_r
  210. #define XGEMM_DEFAULT_R xgemm_r
  211. #define SGEMM_DEFAULT_P 208
  212. #define DGEMM_DEFAULT_P 104
  213. #define QGEMM_DEFAULT_P 56
  214. #define CGEMM_DEFAULT_P 104
  215. #define ZGEMM_DEFAULT_P 56
  216. #define XGEMM_DEFAULT_P 28
  217. #define SGEMM_DEFAULT_Q 208
  218. #define DGEMM_DEFAULT_Q 208
  219. #define QGEMM_DEFAULT_Q 208
  220. #define CGEMM_DEFAULT_Q 208
  221. #define ZGEMM_DEFAULT_Q 208
  222. #define XGEMM_DEFAULT_Q 208
  223. #define SYMV_P 16
  224. #define HAVE_EXCLUSIVE_CACHE
  225. #endif
  226. #ifdef VIAC3
  227. #define SNUMOPT 2
  228. #define DNUMOPT 1
  229. #define GEMM_DEFAULT_OFFSET_A 0
  230. #define GEMM_DEFAULT_OFFSET_B 256
  231. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  232. #define SGEMM_DEFAULT_UNROLL_N 4
  233. #define DGEMM_DEFAULT_UNROLL_N 4
  234. #define QGEMM_DEFAULT_UNROLL_N 2
  235. #define CGEMM_DEFAULT_UNROLL_N 2
  236. #define ZGEMM_DEFAULT_UNROLL_N 2
  237. #define XGEMM_DEFAULT_UNROLL_N 1
  238. #define SGEMM_DEFAULT_UNROLL_M 2
  239. #define DGEMM_DEFAULT_UNROLL_M 1
  240. #define QGEMM_DEFAULT_UNROLL_M 2
  241. #define CGEMM_DEFAULT_UNROLL_M 1
  242. #define ZGEMM_DEFAULT_UNROLL_M 1
  243. #define XGEMM_DEFAULT_UNROLL_M 1
  244. #define SGEMM_DEFAULT_R sgemm_r
  245. #define DGEMM_DEFAULT_R dgemm_r
  246. #define QGEMM_DEFAULT_R qgemm_r
  247. #define CGEMM_DEFAULT_R cgemm_r
  248. #define ZGEMM_DEFAULT_R zgemm_r
  249. #define XGEMM_DEFAULT_R xgemm_r
  250. #define SGEMM_DEFAULT_P 128
  251. #define DGEMM_DEFAULT_P 128
  252. #define QGEMM_DEFAULT_P 128
  253. #define CGEMM_DEFAULT_P 128
  254. #define ZGEMM_DEFAULT_P 128
  255. #define XGEMM_DEFAULT_P 128
  256. #define SGEMM_DEFAULT_Q 512
  257. #define DGEMM_DEFAULT_Q 256
  258. #define QGEMM_DEFAULT_Q 256
  259. #define CGEMM_DEFAULT_Q 256
  260. #define ZGEMM_DEFAULT_Q 128
  261. #define XGEMM_DEFAULT_Q 128
  262. #define SYMV_P 16
  263. #endif
  264. #ifdef NANO
  265. #define SNUMOPT 4
  266. #define DNUMOPT 2
  267. #define GEMM_DEFAULT_OFFSET_A 64
  268. #define GEMM_DEFAULT_OFFSET_B 256
  269. #define GEMM_DEFAULT_ALIGN 0x01ffffUL
  270. #ifdef ARCH_X86
  271. #define SGEMM_DEFAULT_UNROLL_N 4
  272. #define DGEMM_DEFAULT_UNROLL_N 4
  273. #define QGEMM_DEFAULT_UNROLL_N 2
  274. #define CGEMM_DEFAULT_UNROLL_N 2
  275. #define ZGEMM_DEFAULT_UNROLL_N 2
  276. #define XGEMM_DEFAULT_UNROLL_N 1
  277. #define SGEMM_DEFAULT_UNROLL_M 4
  278. #define DGEMM_DEFAULT_UNROLL_M 2
  279. #define QGEMM_DEFAULT_UNROLL_M 2
  280. #define CGEMM_DEFAULT_UNROLL_M 2
  281. #define ZGEMM_DEFAULT_UNROLL_M 1
  282. #define XGEMM_DEFAULT_UNROLL_M 1
  283. #else
  284. #define SGEMM_DEFAULT_UNROLL_N 8
  285. #define DGEMM_DEFAULT_UNROLL_N 4
  286. #define QGEMM_DEFAULT_UNROLL_N 2
  287. #define CGEMM_DEFAULT_UNROLL_N 4
  288. #define ZGEMM_DEFAULT_UNROLL_N 2
  289. #define XGEMM_DEFAULT_UNROLL_N 1
  290. #define SGEMM_DEFAULT_UNROLL_M 4
  291. #define DGEMM_DEFAULT_UNROLL_M 4
  292. #define QGEMM_DEFAULT_UNROLL_M 2
  293. #define CGEMM_DEFAULT_UNROLL_M 2
  294. #define ZGEMM_DEFAULT_UNROLL_M 2
  295. #define XGEMM_DEFAULT_UNROLL_M 1
  296. #endif
  297. #define SGEMM_DEFAULT_P 288
  298. #define DGEMM_DEFAULT_P 288
  299. #define QGEMM_DEFAULT_P 288
  300. #define CGEMM_DEFAULT_P 288
  301. #define ZGEMM_DEFAULT_P 288
  302. #define XGEMM_DEFAULT_P 288
  303. #define SGEMM_DEFAULT_R sgemm_r
  304. #define DGEMM_DEFAULT_R dgemm_r
  305. #define QGEMM_DEFAULT_R qgemm_r
  306. #define CGEMM_DEFAULT_R cgemm_r
  307. #define ZGEMM_DEFAULT_R zgemm_r
  308. #define XGEMM_DEFAULT_R xgemm_r
  309. #define SGEMM_DEFAULT_Q 256
  310. #define DGEMM_DEFAULT_Q 128
  311. #define QGEMM_DEFAULT_Q 64
  312. #define CGEMM_DEFAULT_Q 128
  313. #define ZGEMM_DEFAULT_Q 64
  314. #define XGEMM_DEFAULT_Q 32
  315. #define SYMV_P 16
  316. #define HAVE_EXCLUSIVE_CACHE
  317. #endif
  318. #if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
  319. #ifdef HAVE_SSE
  320. #define SNUMOPT 2
  321. #else
  322. #define SNUMOPT 1
  323. #endif
  324. #define DNUMOPT 1
  325. #define GEMM_DEFAULT_OFFSET_A 0
  326. #define GEMM_DEFAULT_OFFSET_B 0
  327. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  328. #ifdef HAVE_SSE
  329. #define SGEMM_DEFAULT_UNROLL_M 8
  330. #define CGEMM_DEFAULT_UNROLL_M 4
  331. #else
  332. #define SGEMM_DEFAULT_UNROLL_M 4
  333. #define CGEMM_DEFAULT_UNROLL_M 2
  334. #endif
  335. #define DGEMM_DEFAULT_UNROLL_M 2
  336. #define SGEMM_DEFAULT_UNROLL_N 2
  337. #define DGEMM_DEFAULT_UNROLL_N 2
  338. #define QGEMM_DEFAULT_UNROLL_M 2
  339. #define QGEMM_DEFAULT_UNROLL_N 2
  340. #define CGEMM_DEFAULT_UNROLL_N 1
  341. #define ZGEMM_DEFAULT_UNROLL_M 1
  342. #define ZGEMM_DEFAULT_UNROLL_N 1
  343. #define XGEMM_DEFAULT_UNROLL_M 1
  344. #define XGEMM_DEFAULT_UNROLL_N 1
  345. #define SGEMM_DEFAULT_P sgemm_p
  346. #define SGEMM_DEFAULT_Q 256
  347. #define SGEMM_DEFAULT_R sgemm_r
  348. #define DGEMM_DEFAULT_P dgemm_p
  349. #define DGEMM_DEFAULT_Q 256
  350. #define DGEMM_DEFAULT_R dgemm_r
  351. #define QGEMM_DEFAULT_P qgemm_p
  352. #define QGEMM_DEFAULT_Q 256
  353. #define QGEMM_DEFAULT_R qgemm_r
  354. #define CGEMM_DEFAULT_P cgemm_p
  355. #define CGEMM_DEFAULT_Q 256
  356. #define CGEMM_DEFAULT_R cgemm_r
  357. #define ZGEMM_DEFAULT_P zgemm_p
  358. #define ZGEMM_DEFAULT_Q 256
  359. #define ZGEMM_DEFAULT_R zgemm_r
  360. #define XGEMM_DEFAULT_P xgemm_p
  361. #define XGEMM_DEFAULT_Q 256
  362. #define XGEMM_DEFAULT_R xgemm_r
  363. #define SYMV_P 4
  364. #endif
  365. #ifdef PENTIUMM
  366. #define SNUMOPT 2
  367. #define DNUMOPT 1
  368. #define GEMM_DEFAULT_OFFSET_A 0
  369. #define GEMM_DEFAULT_OFFSET_B 0
  370. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  371. #ifdef CORE_YONAH
  372. #define SGEMM_DEFAULT_UNROLL_M 4
  373. #define SGEMM_DEFAULT_UNROLL_N 4
  374. #define DGEMM_DEFAULT_UNROLL_M 2
  375. #define DGEMM_DEFAULT_UNROLL_N 4
  376. #define QGEMM_DEFAULT_UNROLL_M 2
  377. #define QGEMM_DEFAULT_UNROLL_N 2
  378. #define CGEMM_DEFAULT_UNROLL_M 2
  379. #define CGEMM_DEFAULT_UNROLL_N 2
  380. #define ZGEMM_DEFAULT_UNROLL_M 1
  381. #define ZGEMM_DEFAULT_UNROLL_N 2
  382. #define XGEMM_DEFAULT_UNROLL_M 1
  383. #define XGEMM_DEFAULT_UNROLL_N 1
  384. #else
  385. #define SGEMM_DEFAULT_UNROLL_M 8
  386. #define SGEMM_DEFAULT_UNROLL_N 2
  387. #define DGEMM_DEFAULT_UNROLL_M 2
  388. #define DGEMM_DEFAULT_UNROLL_N 2
  389. #define QGEMM_DEFAULT_UNROLL_M 2
  390. #define QGEMM_DEFAULT_UNROLL_N 2
  391. #define CGEMM_DEFAULT_UNROLL_M 4
  392. #define CGEMM_DEFAULT_UNROLL_N 1
  393. #define ZGEMM_DEFAULT_UNROLL_M 1
  394. #define ZGEMM_DEFAULT_UNROLL_N 1
  395. #define XGEMM_DEFAULT_UNROLL_M 1
  396. #define XGEMM_DEFAULT_UNROLL_N 1
  397. #endif
  398. #define SGEMM_DEFAULT_P sgemm_p
  399. #define SGEMM_DEFAULT_Q 256
  400. #define SGEMM_DEFAULT_R sgemm_r
  401. #define DGEMM_DEFAULT_P dgemm_p
  402. #define DGEMM_DEFAULT_Q 256
  403. #define DGEMM_DEFAULT_R dgemm_r
  404. #define QGEMM_DEFAULT_P qgemm_p
  405. #define QGEMM_DEFAULT_Q 256
  406. #define QGEMM_DEFAULT_R qgemm_r
  407. #define CGEMM_DEFAULT_P cgemm_p
  408. #define CGEMM_DEFAULT_Q 256
  409. #define CGEMM_DEFAULT_R cgemm_r
  410. #define ZGEMM_DEFAULT_P zgemm_p
  411. #define ZGEMM_DEFAULT_Q 256
  412. #define ZGEMM_DEFAULT_R zgemm_r
  413. #define XGEMM_DEFAULT_P xgemm_p
  414. #define XGEMM_DEFAULT_Q 256
  415. #define XGEMM_DEFAULT_R xgemm_r
  416. #define SYMV_P 4
  417. #endif
  418. #ifdef CORE_NORTHWOOD
  419. #define SNUMOPT 4
  420. #define DNUMOPT 2
  421. #define GEMM_DEFAULT_OFFSET_A 0
  422. #define GEMM_DEFAULT_OFFSET_B 32
  423. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  424. #define SYMV_P 8
  425. #define SGEMM_DEFAULT_UNROLL_M 8
  426. #define DGEMM_DEFAULT_UNROLL_M 4
  427. #define QGEMM_DEFAULT_UNROLL_M 2
  428. #define CGEMM_DEFAULT_UNROLL_M 4
  429. #define ZGEMM_DEFAULT_UNROLL_M 2
  430. #define XGEMM_DEFAULT_UNROLL_M 1
  431. #define SGEMM_DEFAULT_UNROLL_N 2
  432. #define DGEMM_DEFAULT_UNROLL_N 2
  433. #define QGEMM_DEFAULT_UNROLL_N 2
  434. #define CGEMM_DEFAULT_UNROLL_N 1
  435. #define ZGEMM_DEFAULT_UNROLL_N 1
  436. #define XGEMM_DEFAULT_UNROLL_N 1
  437. #define SGEMM_DEFAULT_P sgemm_p
  438. #define SGEMM_DEFAULT_R sgemm_r
  439. #define DGEMM_DEFAULT_P dgemm_p
  440. #define DGEMM_DEFAULT_R dgemm_r
  441. #define QGEMM_DEFAULT_P qgemm_p
  442. #define QGEMM_DEFAULT_R qgemm_r
  443. #define CGEMM_DEFAULT_P cgemm_p
  444. #define CGEMM_DEFAULT_R cgemm_r
  445. #define ZGEMM_DEFAULT_P zgemm_p
  446. #define ZGEMM_DEFAULT_R zgemm_r
  447. #define XGEMM_DEFAULT_P xgemm_p
  448. #define XGEMM_DEFAULT_R xgemm_r
  449. #define SGEMM_DEFAULT_Q 128
  450. #define DGEMM_DEFAULT_Q 128
  451. #define QGEMM_DEFAULT_Q 128
  452. #define CGEMM_DEFAULT_Q 128
  453. #define ZGEMM_DEFAULT_Q 128
  454. #define XGEMM_DEFAULT_Q 128
  455. #endif
  456. #ifdef CORE_PRESCOTT
  457. #define SNUMOPT 4
  458. #define DNUMOPT 2
  459. #ifndef __64BIT__
  460. #define GEMM_DEFAULT_OFFSET_A 128
  461. #define GEMM_DEFAULT_OFFSET_B 192
  462. #else
  463. #define GEMM_DEFAULT_OFFSET_A 0
  464. #define GEMM_DEFAULT_OFFSET_B 256
  465. #endif
  466. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  467. #define SYMV_P 8
  468. #ifdef ARCH_X86
  469. #define SGEMM_DEFAULT_UNROLL_M 4
  470. #define DGEMM_DEFAULT_UNROLL_M 2
  471. #define QGEMM_DEFAULT_UNROLL_M 2
  472. #define CGEMM_DEFAULT_UNROLL_M 2
  473. #define ZGEMM_DEFAULT_UNROLL_M 1
  474. #define XGEMM_DEFAULT_UNROLL_M 1
  475. #else
  476. #define SGEMM_DEFAULT_UNROLL_M 8
  477. #define DGEMM_DEFAULT_UNROLL_M 4
  478. #define QGEMM_DEFAULT_UNROLL_M 2
  479. #define CGEMM_DEFAULT_UNROLL_M 4
  480. #define ZGEMM_DEFAULT_UNROLL_M 2
  481. #define XGEMM_DEFAULT_UNROLL_M 1
  482. #endif
  483. #define SGEMM_DEFAULT_UNROLL_N 4
  484. #define DGEMM_DEFAULT_UNROLL_N 4
  485. #define QGEMM_DEFAULT_UNROLL_N 2
  486. #define CGEMM_DEFAULT_UNROLL_N 2
  487. #define ZGEMM_DEFAULT_UNROLL_N 2
  488. #define XGEMM_DEFAULT_UNROLL_N 1
  489. #define SGEMM_DEFAULT_P sgemm_p
  490. #define SGEMM_DEFAULT_R sgemm_r
  491. #define DGEMM_DEFAULT_P dgemm_p
  492. #define DGEMM_DEFAULT_R dgemm_r
  493. #define QGEMM_DEFAULT_P qgemm_p
  494. #define QGEMM_DEFAULT_R qgemm_r
  495. #define CGEMM_DEFAULT_P cgemm_p
  496. #define CGEMM_DEFAULT_R cgemm_r
  497. #define ZGEMM_DEFAULT_P zgemm_p
  498. #define ZGEMM_DEFAULT_R zgemm_r
  499. #define XGEMM_DEFAULT_P xgemm_p
  500. #define XGEMM_DEFAULT_R xgemm_r
  501. #define SGEMM_DEFAULT_Q 128
  502. #define DGEMM_DEFAULT_Q 128
  503. #define QGEMM_DEFAULT_Q 128
  504. #define CGEMM_DEFAULT_Q 128
  505. #define ZGEMM_DEFAULT_Q 128
  506. #define XGEMM_DEFAULT_Q 128
  507. #endif
  508. #ifdef CORE2
  509. #define SNUMOPT 8
  510. #define DNUMOPT 4
  511. #define GEMM_DEFAULT_OFFSET_A 448
  512. #define GEMM_DEFAULT_OFFSET_B 128
  513. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  514. #define SYMV_P 8
  515. #define SWITCH_RATIO 4
  516. #ifdef ARCH_X86
  517. #define SGEMM_DEFAULT_UNROLL_M 8
  518. #define DGEMM_DEFAULT_UNROLL_M 4
  519. #define QGEMM_DEFAULT_UNROLL_M 2
  520. #define CGEMM_DEFAULT_UNROLL_M 4
  521. #define ZGEMM_DEFAULT_UNROLL_M 2
  522. #define XGEMM_DEFAULT_UNROLL_M 1
  523. #define SGEMM_DEFAULT_UNROLL_N 2
  524. #define DGEMM_DEFAULT_UNROLL_N 2
  525. #define QGEMM_DEFAULT_UNROLL_N 2
  526. #define CGEMM_DEFAULT_UNROLL_N 1
  527. #define ZGEMM_DEFAULT_UNROLL_N 1
  528. #define XGEMM_DEFAULT_UNROLL_N 1
  529. #define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
  530. #else
  531. #define SGEMM_DEFAULT_UNROLL_M 8
  532. #define DGEMM_DEFAULT_UNROLL_M 4
  533. #define QGEMM_DEFAULT_UNROLL_M 2
  534. #define CGEMM_DEFAULT_UNROLL_M 4
  535. #define ZGEMM_DEFAULT_UNROLL_M 2
  536. #define XGEMM_DEFAULT_UNROLL_M 1
  537. #define SGEMM_DEFAULT_UNROLL_N 4
  538. #define DGEMM_DEFAULT_UNROLL_N 4
  539. #define QGEMM_DEFAULT_UNROLL_N 2
  540. #define CGEMM_DEFAULT_UNROLL_N 2
  541. #define ZGEMM_DEFAULT_UNROLL_N 2
  542. #define XGEMM_DEFAULT_UNROLL_N 1
  543. #endif
  544. #define SGEMM_DEFAULT_P sgemm_p
  545. #define SGEMM_DEFAULT_R sgemm_r
  546. #define DGEMM_DEFAULT_P dgemm_p
  547. #define DGEMM_DEFAULT_R dgemm_r
  548. #define QGEMM_DEFAULT_P qgemm_p
  549. #define QGEMM_DEFAULT_R qgemm_r
  550. #define CGEMM_DEFAULT_P cgemm_p
  551. #define CGEMM_DEFAULT_R cgemm_r
  552. #define ZGEMM_DEFAULT_P zgemm_p
  553. #define ZGEMM_DEFAULT_R zgemm_r
  554. #define XGEMM_DEFAULT_P xgemm_p
  555. #define XGEMM_DEFAULT_R xgemm_r
  556. #define SGEMM_DEFAULT_Q 256
  557. #define DGEMM_DEFAULT_Q 256
  558. #define QGEMM_DEFAULT_Q 256
  559. #define CGEMM_DEFAULT_Q 256
  560. #define ZGEMM_DEFAULT_Q 256
  561. #define XGEMM_DEFAULT_Q 256
  562. #endif
  563. #ifdef PENRYN
  564. #define SNUMOPT 8
  565. #define DNUMOPT 4
  566. #define GEMM_DEFAULT_OFFSET_A 128
  567. #define GEMM_DEFAULT_OFFSET_B 0
  568. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  569. #define SYMV_P 8
  570. #define SWITCH_RATIO 4
  571. #ifdef ARCH_X86
  572. #define SGEMM_DEFAULT_UNROLL_M 4
  573. #define DGEMM_DEFAULT_UNROLL_M 2
  574. #define QGEMM_DEFAULT_UNROLL_M 2
  575. #define CGEMM_DEFAULT_UNROLL_M 2
  576. #define ZGEMM_DEFAULT_UNROLL_M 1
  577. #define XGEMM_DEFAULT_UNROLL_M 1
  578. #define SGEMM_DEFAULT_UNROLL_N 4
  579. #define DGEMM_DEFAULT_UNROLL_N 4
  580. #define QGEMM_DEFAULT_UNROLL_N 2
  581. #define CGEMM_DEFAULT_UNROLL_N 2
  582. #define ZGEMM_DEFAULT_UNROLL_N 2
  583. #define XGEMM_DEFAULT_UNROLL_N 1
  584. #else
  585. #define SGEMM_DEFAULT_UNROLL_M 8
  586. #define DGEMM_DEFAULT_UNROLL_M 4
  587. #define QGEMM_DEFAULT_UNROLL_M 2
  588. #define CGEMM_DEFAULT_UNROLL_M 4
  589. #define ZGEMM_DEFAULT_UNROLL_M 2
  590. #define XGEMM_DEFAULT_UNROLL_M 1
  591. #define SGEMM_DEFAULT_UNROLL_N 4
  592. #define DGEMM_DEFAULT_UNROLL_N 4
  593. #define QGEMM_DEFAULT_UNROLL_N 2
  594. #define CGEMM_DEFAULT_UNROLL_N 2
  595. #define ZGEMM_DEFAULT_UNROLL_N 2
  596. #define XGEMM_DEFAULT_UNROLL_N 1
  597. #endif
  598. #define SGEMM_DEFAULT_P sgemm_p
  599. #define SGEMM_DEFAULT_R sgemm_r
  600. #define DGEMM_DEFAULT_P dgemm_p
  601. #define DGEMM_DEFAULT_R dgemm_r
  602. #define QGEMM_DEFAULT_P qgemm_p
  603. #define QGEMM_DEFAULT_R qgemm_r
  604. #define CGEMM_DEFAULT_P cgemm_p
  605. #define CGEMM_DEFAULT_R cgemm_r
  606. #define ZGEMM_DEFAULT_P zgemm_p
  607. #define ZGEMM_DEFAULT_R zgemm_r
  608. #define XGEMM_DEFAULT_P xgemm_p
  609. #define XGEMM_DEFAULT_R xgemm_r
  610. #define SGEMM_DEFAULT_Q 512
  611. #define DGEMM_DEFAULT_Q 256
  612. #define QGEMM_DEFAULT_Q 128
  613. #define CGEMM_DEFAULT_Q 512
  614. #define ZGEMM_DEFAULT_Q 256
  615. #define XGEMM_DEFAULT_Q 128
  616. #define GETRF_FACTOR 0.75
  617. #endif
  618. #ifdef DUNNINGTON
  619. #define SNUMOPT 8
  620. #define DNUMOPT 4
  621. #define GEMM_DEFAULT_OFFSET_A 128
  622. #define GEMM_DEFAULT_OFFSET_B 0
  623. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  624. #define SYMV_P 8
  625. #define SWITCH_RATIO 4
  626. #ifdef ARCH_X86
  627. #define SGEMM_DEFAULT_UNROLL_M 4
  628. #define DGEMM_DEFAULT_UNROLL_M 2
  629. #define QGEMM_DEFAULT_UNROLL_M 2
  630. #define CGEMM_DEFAULT_UNROLL_M 2
  631. #define ZGEMM_DEFAULT_UNROLL_M 1
  632. #define XGEMM_DEFAULT_UNROLL_M 1
  633. #define SGEMM_DEFAULT_UNROLL_N 4
  634. #define DGEMM_DEFAULT_UNROLL_N 4
  635. #define QGEMM_DEFAULT_UNROLL_N 2
  636. #define CGEMM_DEFAULT_UNROLL_N 2
  637. #define ZGEMM_DEFAULT_UNROLL_N 2
  638. #define XGEMM_DEFAULT_UNROLL_N 1
  639. #else
  640. #define SGEMM_DEFAULT_UNROLL_M 8
  641. #define DGEMM_DEFAULT_UNROLL_M 4
  642. #define QGEMM_DEFAULT_UNROLL_M 2
  643. #define CGEMM_DEFAULT_UNROLL_M 4
  644. #define ZGEMM_DEFAULT_UNROLL_M 2
  645. #define XGEMM_DEFAULT_UNROLL_M 1
  646. #define SGEMM_DEFAULT_UNROLL_N 4
  647. #define DGEMM_DEFAULT_UNROLL_N 4
  648. #define QGEMM_DEFAULT_UNROLL_N 2
  649. #define CGEMM_DEFAULT_UNROLL_N 2
  650. #define ZGEMM_DEFAULT_UNROLL_N 2
  651. #define XGEMM_DEFAULT_UNROLL_N 1
  652. #endif
  653. #define SGEMM_DEFAULT_P sgemm_p
  654. #define SGEMM_DEFAULT_R sgemm_r
  655. #define DGEMM_DEFAULT_P dgemm_p
  656. #define DGEMM_DEFAULT_R dgemm_r
  657. #define QGEMM_DEFAULT_P qgemm_p
  658. #define QGEMM_DEFAULT_R qgemm_r
  659. #define CGEMM_DEFAULT_P cgemm_p
  660. #define CGEMM_DEFAULT_R cgemm_r
  661. #define ZGEMM_DEFAULT_P zgemm_p
  662. #define ZGEMM_DEFAULT_R zgemm_r
  663. #define XGEMM_DEFAULT_P xgemm_p
  664. #define XGEMM_DEFAULT_R xgemm_r
  665. #define SGEMM_DEFAULT_Q 768
  666. #define DGEMM_DEFAULT_Q 384
  667. #define QGEMM_DEFAULT_Q 192
  668. #define CGEMM_DEFAULT_Q 768
  669. #define ZGEMM_DEFAULT_Q 384
  670. #define XGEMM_DEFAULT_Q 192
  671. #define GETRF_FACTOR 0.75
  672. #define GEMM_THREAD gemm_thread_mn
  673. #endif
  674. #ifdef NEHALEM
  675. #define SNUMOPT 8
  676. #define DNUMOPT 4
  677. #define GEMM_DEFAULT_OFFSET_A 32
  678. #define GEMM_DEFAULT_OFFSET_B 0
  679. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  680. #define SYMV_P 8
  681. #define SWITCH_RATIO 4
  682. #ifdef ARCH_X86
  683. #define SGEMM_DEFAULT_UNROLL_M 4
  684. #define DGEMM_DEFAULT_UNROLL_M 2
  685. #define QGEMM_DEFAULT_UNROLL_M 2
  686. #define CGEMM_DEFAULT_UNROLL_M 2
  687. #define ZGEMM_DEFAULT_UNROLL_M 1
  688. #define XGEMM_DEFAULT_UNROLL_M 1
  689. #define SGEMM_DEFAULT_UNROLL_N 4
  690. #define DGEMM_DEFAULT_UNROLL_N 4
  691. #define QGEMM_DEFAULT_UNROLL_N 2
  692. #define CGEMM_DEFAULT_UNROLL_N 2
  693. #define ZGEMM_DEFAULT_UNROLL_N 2
  694. #define XGEMM_DEFAULT_UNROLL_N 1
  695. #else
  696. #define SGEMM_DEFAULT_UNROLL_M 4
  697. #define DGEMM_DEFAULT_UNROLL_M 2
  698. #define QGEMM_DEFAULT_UNROLL_M 2
  699. #define CGEMM_DEFAULT_UNROLL_M 2
  700. #define ZGEMM_DEFAULT_UNROLL_M 1
  701. #define XGEMM_DEFAULT_UNROLL_M 1
  702. #define SGEMM_DEFAULT_UNROLL_N 8
  703. #define DGEMM_DEFAULT_UNROLL_N 8
  704. #define QGEMM_DEFAULT_UNROLL_N 2
  705. #define CGEMM_DEFAULT_UNROLL_N 4
  706. #define ZGEMM_DEFAULT_UNROLL_N 4
  707. #define XGEMM_DEFAULT_UNROLL_N 1
  708. #endif
  709. #define SGEMM_DEFAULT_P 504
  710. #define SGEMM_DEFAULT_R sgemm_r
  711. #define DGEMM_DEFAULT_P 504
  712. #define DGEMM_DEFAULT_R dgemm_r
  713. #define QGEMM_DEFAULT_P 504
  714. #define QGEMM_DEFAULT_R qgemm_r
  715. #define CGEMM_DEFAULT_P 252
  716. #define CGEMM_DEFAULT_R cgemm_r
  717. #define ZGEMM_DEFAULT_P 252
  718. #define ZGEMM_DEFAULT_R zgemm_r
  719. #define XGEMM_DEFAULT_P 252
  720. #define XGEMM_DEFAULT_R xgemm_r
  721. #define SGEMM_DEFAULT_Q 512
  722. #define DGEMM_DEFAULT_Q 256
  723. #define QGEMM_DEFAULT_Q 128
  724. #define CGEMM_DEFAULT_Q 512
  725. #define ZGEMM_DEFAULT_Q 256
  726. #define XGEMM_DEFAULT_Q 128
  727. #define GETRF_FACTOR 0.72
  728. #endif
  729. #ifdef SANDYBRIDGE
  730. #define SNUMOPT 8
  731. #define DNUMOPT 4
  732. #define GEMM_DEFAULT_OFFSET_A 0
  733. #define GEMM_DEFAULT_OFFSET_B 0
  734. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  735. #define SYMV_P 8
  736. #define SWITCH_RATIO 4
  737. #ifdef ARCH_X86
  738. #define SGEMM_DEFAULT_UNROLL_M 4
  739. #define DGEMM_DEFAULT_UNROLL_M 2
  740. #define QGEMM_DEFAULT_UNROLL_M 2
  741. #define CGEMM_DEFAULT_UNROLL_M 2
  742. #define ZGEMM_DEFAULT_UNROLL_M 1
  743. #define XGEMM_DEFAULT_UNROLL_M 1
  744. #define SGEMM_DEFAULT_UNROLL_N 4
  745. #define DGEMM_DEFAULT_UNROLL_N 4
  746. #define QGEMM_DEFAULT_UNROLL_N 2
  747. #define CGEMM_DEFAULT_UNROLL_N 2
  748. #define ZGEMM_DEFAULT_UNROLL_N 2
  749. #define XGEMM_DEFAULT_UNROLL_N 1
  750. #else
  751. #define SGEMM_DEFAULT_UNROLL_M 8
  752. #define DGEMM_DEFAULT_UNROLL_M 8
  753. #define QGEMM_DEFAULT_UNROLL_M 2
  754. #define CGEMM_DEFAULT_UNROLL_M 8
  755. #define ZGEMM_DEFAULT_UNROLL_M 4
  756. #define XGEMM_DEFAULT_UNROLL_M 1
  757. #define SGEMM_DEFAULT_UNROLL_N 8
  758. #define DGEMM_DEFAULT_UNROLL_N 4
  759. #define QGEMM_DEFAULT_UNROLL_N 2
  760. #define CGEMM_DEFAULT_UNROLL_N 4
  761. #define ZGEMM_DEFAULT_UNROLL_N 4
  762. #define XGEMM_DEFAULT_UNROLL_N 1
  763. #endif
  764. #define SGEMM_DEFAULT_P 512
  765. #define SGEMM_DEFAULT_R sgemm_r
  766. //#define SGEMM_DEFAULT_R 1024
  767. #define DGEMM_DEFAULT_P 512
  768. #define DGEMM_DEFAULT_R dgemm_r
  769. //#define DGEMM_DEFAULT_R 1024
  770. #define QGEMM_DEFAULT_P 504
  771. #define QGEMM_DEFAULT_R qgemm_r
  772. #define CGEMM_DEFAULT_P 128
  773. //#define CGEMM_DEFAULT_R cgemm_r
  774. #define CGEMM_DEFAULT_R 1024
  775. #define ZGEMM_DEFAULT_P 512
  776. #define ZGEMM_DEFAULT_R zgemm_r
  777. //#define ZGEMM_DEFAULT_R 1024
  778. #define XGEMM_DEFAULT_P 252
  779. #define XGEMM_DEFAULT_R xgemm_r
  780. #define SGEMM_DEFAULT_Q 256
  781. #define DGEMM_DEFAULT_Q 256
  782. #define QGEMM_DEFAULT_Q 128
  783. #define CGEMM_DEFAULT_Q 256
  784. #define ZGEMM_DEFAULT_Q 192
  785. #define XGEMM_DEFAULT_Q 128
  786. #define GETRF_FACTOR 0.72
  787. #endif
  788. #ifdef ATOM
  789. #define SNUMOPT 2
  790. #define DNUMOPT 1
  791. #define GEMM_DEFAULT_OFFSET_A 64
  792. #define GEMM_DEFAULT_OFFSET_B 0
  793. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  794. #define SYMV_P 8
  795. #ifdef ARCH_X86
  796. #define SGEMM_DEFAULT_UNROLL_M 4
  797. #define DGEMM_DEFAULT_UNROLL_M 2
  798. #define QGEMM_DEFAULT_UNROLL_M 2
  799. #define CGEMM_DEFAULT_UNROLL_M 2
  800. #define ZGEMM_DEFAULT_UNROLL_M 1
  801. #define XGEMM_DEFAULT_UNROLL_M 1
  802. #else
  803. #define SGEMM_DEFAULT_UNROLL_M 8
  804. #define DGEMM_DEFAULT_UNROLL_M 4
  805. #define QGEMM_DEFAULT_UNROLL_M 2
  806. #define CGEMM_DEFAULT_UNROLL_M 4
  807. #define ZGEMM_DEFAULT_UNROLL_M 2
  808. #define XGEMM_DEFAULT_UNROLL_M 1
  809. #endif
  810. #define SGEMM_DEFAULT_UNROLL_N 4
  811. #define DGEMM_DEFAULT_UNROLL_N 2
  812. #define QGEMM_DEFAULT_UNROLL_N 2
  813. #define CGEMM_DEFAULT_UNROLL_N 2
  814. #define ZGEMM_DEFAULT_UNROLL_N 1
  815. #define XGEMM_DEFAULT_UNROLL_N 1
  816. #define SGEMM_DEFAULT_P sgemm_p
  817. #define SGEMM_DEFAULT_R sgemm_r
  818. #define DGEMM_DEFAULT_P dgemm_p
  819. #define DGEMM_DEFAULT_R dgemm_r
  820. #define QGEMM_DEFAULT_P qgemm_p
  821. #define QGEMM_DEFAULT_R qgemm_r
  822. #define CGEMM_DEFAULT_P cgemm_p
  823. #define CGEMM_DEFAULT_R cgemm_r
  824. #define ZGEMM_DEFAULT_P zgemm_p
  825. #define ZGEMM_DEFAULT_R zgemm_r
  826. #define XGEMM_DEFAULT_P xgemm_p
  827. #define XGEMM_DEFAULT_R xgemm_r
  828. #define SGEMM_DEFAULT_Q 256
  829. #define DGEMM_DEFAULT_Q 256
  830. #define QGEMM_DEFAULT_Q 256
  831. #define CGEMM_DEFAULT_Q 256
  832. #define ZGEMM_DEFAULT_Q 256
  833. #define XGEMM_DEFAULT_Q 256
  834. #endif
  835. #ifdef ITANIUM2
  836. #define SNUMOPT 4
  837. #define DNUMOPT 4
  838. #define GEMM_DEFAULT_OFFSET_A 0
  839. #define GEMM_DEFAULT_OFFSET_B 128
  840. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  841. #define SGEMM_DEFAULT_UNROLL_M 8
  842. #define SGEMM_DEFAULT_UNROLL_N 8
  843. #define DGEMM_DEFAULT_UNROLL_M 8
  844. #define DGEMM_DEFAULT_UNROLL_N 8
  845. #define QGEMM_DEFAULT_UNROLL_M 8
  846. #define QGEMM_DEFAULT_UNROLL_N 8
  847. #define CGEMM_DEFAULT_UNROLL_M 4
  848. #define CGEMM_DEFAULT_UNROLL_N 4
  849. #define ZGEMM_DEFAULT_UNROLL_M 4
  850. #define ZGEMM_DEFAULT_UNROLL_N 4
  851. #define XGEMM_DEFAULT_UNROLL_M 4
  852. #define XGEMM_DEFAULT_UNROLL_N 4
  853. #define SGEMM_DEFAULT_P sgemm_p
  854. #define DGEMM_DEFAULT_P dgemm_p
  855. #define QGEMM_DEFAULT_P qgemm_p
  856. #define CGEMM_DEFAULT_P cgemm_p
  857. #define ZGEMM_DEFAULT_P zgemm_p
  858. #define XGEMM_DEFAULT_P xgemm_p
  859. #define SGEMM_DEFAULT_Q 1024
  860. #define DGEMM_DEFAULT_Q 1024
  861. #define QGEMM_DEFAULT_Q 1024
  862. #define CGEMM_DEFAULT_Q 1024
  863. #define ZGEMM_DEFAULT_Q 1024
  864. #define XGEMM_DEFAULT_Q 1024
  865. #define SGEMM_DEFAULT_R sgemm_r
  866. #define DGEMM_DEFAULT_R dgemm_r
  867. #define QGEMM_DEFAULT_R qgemm_r
  868. #define CGEMM_DEFAULT_R cgemm_r
  869. #define ZGEMM_DEFAULT_R zgemm_r
  870. #define XGEMM_DEFAULT_R xgemm_r
  871. #define SYMV_P 16
  872. #define GETRF_FACTOR 0.65
  873. #endif
  874. #if defined(EV4) || defined(EV5) || defined(EV6)
  875. #ifdef EV4
  876. #define SNUMOPT 1
  877. #define DNUMOPT 1
  878. #else
  879. #define SNUMOPT 2
  880. #define DNUMOPT 2
  881. #endif
  882. #define GEMM_DEFAULT_OFFSET_A 512
  883. #define GEMM_DEFAULT_OFFSET_B 512
  884. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  885. #define SGEMM_DEFAULT_UNROLL_M 4
  886. #define SGEMM_DEFAULT_UNROLL_N 4
  887. #define DGEMM_DEFAULT_UNROLL_M 4
  888. #define DGEMM_DEFAULT_UNROLL_N 4
  889. #define CGEMM_DEFAULT_UNROLL_M 2
  890. #define CGEMM_DEFAULT_UNROLL_N 2
  891. #define ZGEMM_DEFAULT_UNROLL_M 2
  892. #define ZGEMM_DEFAULT_UNROLL_N 2
  893. #define SYMV_P 8
  894. #ifdef EV4
  895. #define SGEMM_DEFAULT_P 32
  896. #define SGEMM_DEFAULT_Q 112
  897. #define SGEMM_DEFAULT_R 256
  898. #define DGEMM_DEFAULT_P 32
  899. #define DGEMM_DEFAULT_Q 56
  900. #define DGEMM_DEFAULT_R 256
  901. #define CGEMM_DEFAULT_P 32
  902. #define CGEMM_DEFAULT_Q 64
  903. #define CGEMM_DEFAULT_R 240
  904. #define ZGEMM_DEFAULT_P 32
  905. #define ZGEMM_DEFAULT_Q 32
  906. #define ZGEMM_DEFAULT_R 240
  907. #endif
  908. #ifdef EV5
  909. #define SGEMM_DEFAULT_P 64
  910. #define SGEMM_DEFAULT_Q 256
  911. #define DGEMM_DEFAULT_P 64
  912. #define DGEMM_DEFAULT_Q 128
  913. #define CGEMM_DEFAULT_P 64
  914. #define CGEMM_DEFAULT_Q 128
  915. #define ZGEMM_DEFAULT_P 64
  916. #define ZGEMM_DEFAULT_Q 64
  917. #endif
  918. #ifdef EV6
  919. #define SGEMM_DEFAULT_P 256
  920. #define SGEMM_DEFAULT_Q 512
  921. #define DGEMM_DEFAULT_P 256
  922. #define DGEMM_DEFAULT_Q 256
  923. #define CGEMM_DEFAULT_P 256
  924. #define CGEMM_DEFAULT_Q 256
  925. #define ZGEMM_DEFAULT_P 128
  926. #define ZGEMM_DEFAULT_Q 256
  927. #endif
  928. #endif
  929. #ifdef CELL
  930. #define SNUMOPT 2
  931. #define DNUMOPT 2
  932. #define GEMM_DEFAULT_OFFSET_A 0
  933. #define GEMM_DEFAULT_OFFSET_B 8192
  934. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  935. #define SGEMM_DEFAULT_UNROLL_M 16
  936. #define SGEMM_DEFAULT_UNROLL_N 4
  937. #define DGEMM_DEFAULT_UNROLL_M 4
  938. #define DGEMM_DEFAULT_UNROLL_N 4
  939. #define CGEMM_DEFAULT_UNROLL_M 8
  940. #define CGEMM_DEFAULT_UNROLL_N 2
  941. #define ZGEMM_DEFAULT_UNROLL_M 2
  942. #define ZGEMM_DEFAULT_UNROLL_N 2
  943. #define SGEMM_DEFAULT_P 128
  944. #define DGEMM_DEFAULT_P 128
  945. #define CGEMM_DEFAULT_P 128
  946. #define ZGEMM_DEFAULT_P 128
  947. #define SGEMM_DEFAULT_Q 512
  948. #define DGEMM_DEFAULT_Q 256
  949. #define CGEMM_DEFAULT_Q 256
  950. #define ZGEMM_DEFAULT_Q 128
  951. #define SYMV_P 4
  952. #endif
  953. #ifdef PPCG4
  954. #define GEMM_DEFAULT_OFFSET_A 0
  955. #define GEMM_DEFAULT_OFFSET_B 1024
  956. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  957. #define SGEMM_DEFAULT_UNROLL_M 16
  958. #define SGEMM_DEFAULT_UNROLL_N 4
  959. #define DGEMM_DEFAULT_UNROLL_M 4
  960. #define DGEMM_DEFAULT_UNROLL_N 4
  961. #define CGEMM_DEFAULT_UNROLL_M 8
  962. #define CGEMM_DEFAULT_UNROLL_N 2
  963. #define ZGEMM_DEFAULT_UNROLL_M 2
  964. #define ZGEMM_DEFAULT_UNROLL_N 2
  965. #define SGEMM_DEFAULT_P 256
  966. #define DGEMM_DEFAULT_P 128
  967. #define CGEMM_DEFAULT_P 128
  968. #define ZGEMM_DEFAULT_P 64
  969. #define SGEMM_DEFAULT_Q 256
  970. #define DGEMM_DEFAULT_Q 256
  971. #define CGEMM_DEFAULT_Q 256
  972. #define ZGEMM_DEFAULT_Q 256
  973. #define SYMV_P 4
  974. #endif
  975. #ifdef PPC970
  976. #define SNUMOPT 4
  977. #define DNUMOPT 4
  978. #define GEMM_DEFAULT_OFFSET_A 2688
  979. #define GEMM_DEFAULT_OFFSET_B 3072
  980. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  981. #define SGEMM_DEFAULT_UNROLL_M 16
  982. #define SGEMM_DEFAULT_UNROLL_N 4
  983. #define DGEMM_DEFAULT_UNROLL_M 4
  984. #define DGEMM_DEFAULT_UNROLL_N 4
  985. #define CGEMM_DEFAULT_UNROLL_M 8
  986. #define CGEMM_DEFAULT_UNROLL_N 2
  987. #define ZGEMM_DEFAULT_UNROLL_M 2
  988. #define ZGEMM_DEFAULT_UNROLL_N 2
  989. #ifdef OS_LINUX
  990. #if L2_SIZE == 1024976
  991. #define SGEMM_DEFAULT_P 320
  992. #define DGEMM_DEFAULT_P 256
  993. #define CGEMM_DEFAULT_P 256
  994. #define ZGEMM_DEFAULT_P 256
  995. #else
  996. #define SGEMM_DEFAULT_P 176
  997. #define DGEMM_DEFAULT_P 176
  998. #define CGEMM_DEFAULT_P 176
  999. #define ZGEMM_DEFAULT_P 176
  1000. #endif
  1001. #endif
  1002. #define SGEMM_DEFAULT_Q 512
  1003. #define DGEMM_DEFAULT_Q 256
  1004. #define CGEMM_DEFAULT_Q 256
  1005. #define ZGEMM_DEFAULT_Q 128
  1006. #define SYMV_P 4
  1007. #endif
  1008. #ifdef PPC440
  1009. #define SNUMOPT 2
  1010. #define DNUMOPT 2
  1011. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1012. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1013. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1014. #define SGEMM_DEFAULT_UNROLL_M 4
  1015. #define SGEMM_DEFAULT_UNROLL_N 4
  1016. #define DGEMM_DEFAULT_UNROLL_M 4
  1017. #define DGEMM_DEFAULT_UNROLL_N 4
  1018. #define CGEMM_DEFAULT_UNROLL_M 2
  1019. #define CGEMM_DEFAULT_UNROLL_N 2
  1020. #define ZGEMM_DEFAULT_UNROLL_M 2
  1021. #define ZGEMM_DEFAULT_UNROLL_N 2
  1022. #define SGEMM_DEFAULT_P 512
  1023. #define DGEMM_DEFAULT_P 512
  1024. #define CGEMM_DEFAULT_P 512
  1025. #define ZGEMM_DEFAULT_P 512
  1026. #define SGEMM_DEFAULT_Q 1024
  1027. #define DGEMM_DEFAULT_Q 512
  1028. #define CGEMM_DEFAULT_Q 512
  1029. #define ZGEMM_DEFAULT_Q 256
  1030. #define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
  1031. #define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
  1032. #define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
  1033. #define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
  1034. #define SYMV_P 4
  1035. #endif
  1036. #ifdef PPC440FP2
  1037. #define SNUMOPT 4
  1038. #define DNUMOPT 4
  1039. #define GEMM_DEFAULT_OFFSET_A (32 * 0)
  1040. #define GEMM_DEFAULT_OFFSET_B (32 * 0)
  1041. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1042. #define SGEMM_DEFAULT_UNROLL_M 8
  1043. #define SGEMM_DEFAULT_UNROLL_N 4
  1044. #define DGEMM_DEFAULT_UNROLL_M 8
  1045. #define DGEMM_DEFAULT_UNROLL_N 4
  1046. #define CGEMM_DEFAULT_UNROLL_M 4
  1047. #define CGEMM_DEFAULT_UNROLL_N 2
  1048. #define ZGEMM_DEFAULT_UNROLL_M 4
  1049. #define ZGEMM_DEFAULT_UNROLL_N 2
  1050. #define SGEMM_DEFAULT_P 128
  1051. #define DGEMM_DEFAULT_P 128
  1052. #define CGEMM_DEFAULT_P 128
  1053. #define ZGEMM_DEFAULT_P 128
  1054. #if 1
  1055. #define SGEMM_DEFAULT_Q 4096
  1056. #define DGEMM_DEFAULT_Q 3072
  1057. #define CGEMM_DEFAULT_Q 2048
  1058. #define ZGEMM_DEFAULT_Q 1024
  1059. #else
  1060. #define SGEMM_DEFAULT_Q 512
  1061. #define DGEMM_DEFAULT_Q 256
  1062. #define CGEMM_DEFAULT_Q 256
  1063. #define ZGEMM_DEFAULT_Q 128
  1064. #endif
  1065. #define SYMV_P 4
  1066. #endif
  1067. #if defined(POWER3) || defined(POWER4) || defined(POWER5)
  1068. #define GEMM_DEFAULT_OFFSET_A 0
  1069. #define GEMM_DEFAULT_OFFSET_B 2048
  1070. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1071. #define SGEMM_DEFAULT_UNROLL_M 4
  1072. #define SGEMM_DEFAULT_UNROLL_N 4
  1073. #define DGEMM_DEFAULT_UNROLL_M 4
  1074. #define DGEMM_DEFAULT_UNROLL_N 4
  1075. #define CGEMM_DEFAULT_UNROLL_M 2
  1076. #define CGEMM_DEFAULT_UNROLL_N 2
  1077. #define ZGEMM_DEFAULT_UNROLL_M 2
  1078. #define ZGEMM_DEFAULT_UNROLL_N 2
  1079. #ifdef POWER3
  1080. #define SNUMOPT 4
  1081. #define DNUMOPT 4
  1082. #define SGEMM_DEFAULT_P 256
  1083. #define SGEMM_DEFAULT_Q 432
  1084. #define SGEMM_DEFAULT_R 1012
  1085. #define DGEMM_DEFAULT_P 256
  1086. #define DGEMM_DEFAULT_Q 216
  1087. #define DGEMM_DEFAULT_R 1012
  1088. #define ZGEMM_DEFAULT_P 256
  1089. #define ZGEMM_DEFAULT_Q 104
  1090. #define ZGEMM_DEFAULT_R 1012
  1091. #endif
  1092. #if defined(POWER4)
  1093. #ifdef ALLOC_HUGETLB
  1094. #define SGEMM_DEFAULT_P 184
  1095. #define DGEMM_DEFAULT_P 184
  1096. #define CGEMM_DEFAULT_P 184
  1097. #define ZGEMM_DEFAULT_P 184
  1098. #else
  1099. #define SGEMM_DEFAULT_P 144
  1100. #define DGEMM_DEFAULT_P 144
  1101. #define CGEMM_DEFAULT_P 144
  1102. #define ZGEMM_DEFAULT_P 144
  1103. #endif
  1104. #endif
  1105. #if defined(POWER5)
  1106. #ifdef ALLOC_HUGETLB
  1107. #define SGEMM_DEFAULT_P 512
  1108. #define DGEMM_DEFAULT_P 256
  1109. #define CGEMM_DEFAULT_P 256
  1110. #define ZGEMM_DEFAULT_P 128
  1111. #else
  1112. #define SGEMM_DEFAULT_P 320
  1113. #define DGEMM_DEFAULT_P 160
  1114. #define CGEMM_DEFAULT_P 160
  1115. #define ZGEMM_DEFAULT_P 80
  1116. #endif
  1117. #define SGEMM_DEFAULT_Q 256
  1118. #define CGEMM_DEFAULT_Q 256
  1119. #define DGEMM_DEFAULT_Q 256
  1120. #define ZGEMM_DEFAULT_Q 256
  1121. #endif
  1122. #define SYMV_P 8
  1123. #endif
  1124. #if defined(POWER6)
  1125. #define SNUMOPT 4
  1126. #define DNUMOPT 4
  1127. #define GEMM_DEFAULT_OFFSET_A 384
  1128. #define GEMM_DEFAULT_OFFSET_B 1024
  1129. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1130. #define SGEMM_DEFAULT_UNROLL_M 4
  1131. #define SGEMM_DEFAULT_UNROLL_N 4
  1132. #define DGEMM_DEFAULT_UNROLL_M 4
  1133. #define DGEMM_DEFAULT_UNROLL_N 4
  1134. #define CGEMM_DEFAULT_UNROLL_M 2
  1135. #define CGEMM_DEFAULT_UNROLL_N 4
  1136. #define ZGEMM_DEFAULT_UNROLL_M 2
  1137. #define ZGEMM_DEFAULT_UNROLL_N 4
  1138. #define SGEMM_DEFAULT_P 992
  1139. #define DGEMM_DEFAULT_P 480
  1140. #define CGEMM_DEFAULT_P 488
  1141. #define ZGEMM_DEFAULT_P 248
  1142. #define SGEMM_DEFAULT_Q 504
  1143. #define DGEMM_DEFAULT_Q 504
  1144. #define CGEMM_DEFAULT_Q 400
  1145. #define ZGEMM_DEFAULT_Q 400
  1146. #define SYMV_P 8
  1147. #endif
  1148. #if defined(SPARC) && defined(V7)
  1149. #define SNUMOPT 4
  1150. #define DNUMOPT 4
  1151. #define GEMM_DEFAULT_OFFSET_A 0
  1152. #define GEMM_DEFAULT_OFFSET_B 2048
  1153. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1154. #define SGEMM_DEFAULT_UNROLL_M 2
  1155. #define SGEMM_DEFAULT_UNROLL_N 8
  1156. #define DGEMM_DEFAULT_UNROLL_M 2
  1157. #define DGEMM_DEFAULT_UNROLL_N 8
  1158. #define CGEMM_DEFAULT_UNROLL_M 1
  1159. #define CGEMM_DEFAULT_UNROLL_N 4
  1160. #define ZGEMM_DEFAULT_UNROLL_M 1
  1161. #define ZGEMM_DEFAULT_UNROLL_N 4
  1162. #define SGEMM_DEFAULT_P 256
  1163. #define DGEMM_DEFAULT_P 256
  1164. #define CGEMM_DEFAULT_P 256
  1165. #define ZGEMM_DEFAULT_P 256
  1166. #define SGEMM_DEFAULT_Q 512
  1167. #define DGEMM_DEFAULT_Q 256
  1168. #define CGEMM_DEFAULT_Q 256
  1169. #define ZGEMM_DEFAULT_Q 128
  1170. #define SYMV_P 8
  1171. #define GEMM_THREAD gemm_thread_mn
  1172. #endif
  1173. #if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
  1174. #define SNUMOPT 2
  1175. #define DNUMOPT 2
  1176. #define GEMM_DEFAULT_OFFSET_A 0
  1177. #define GEMM_DEFAULT_OFFSET_B 2048
  1178. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1179. #define SGEMM_DEFAULT_UNROLL_M 4
  1180. #define SGEMM_DEFAULT_UNROLL_N 4
  1181. #define DGEMM_DEFAULT_UNROLL_M 4
  1182. #define DGEMM_DEFAULT_UNROLL_N 4
  1183. #define CGEMM_DEFAULT_UNROLL_M 2
  1184. #define CGEMM_DEFAULT_UNROLL_N 2
  1185. #define ZGEMM_DEFAULT_UNROLL_M 2
  1186. #define ZGEMM_DEFAULT_UNROLL_N 2
  1187. #define SGEMM_DEFAULT_P 512
  1188. #define DGEMM_DEFAULT_P 512
  1189. #define CGEMM_DEFAULT_P 512
  1190. #define ZGEMM_DEFAULT_P 512
  1191. #define SGEMM_DEFAULT_Q 1024
  1192. #define DGEMM_DEFAULT_Q 512
  1193. #define CGEMM_DEFAULT_Q 512
  1194. #define ZGEMM_DEFAULT_Q 256
  1195. #define SYMV_P 8
  1196. #endif
  1197. #ifdef SICORTEX
  1198. #define SNUMOPT 2
  1199. #define DNUMOPT 2
  1200. #define GEMM_DEFAULT_OFFSET_A 0
  1201. #define GEMM_DEFAULT_OFFSET_B 0
  1202. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1203. #define SGEMM_DEFAULT_UNROLL_M 2
  1204. #define SGEMM_DEFAULT_UNROLL_N 8
  1205. #define DGEMM_DEFAULT_UNROLL_M 2
  1206. #define DGEMM_DEFAULT_UNROLL_N 8
  1207. #define CGEMM_DEFAULT_UNROLL_M 1
  1208. #define CGEMM_DEFAULT_UNROLL_N 4
  1209. #define ZGEMM_DEFAULT_UNROLL_M 1
  1210. #define ZGEMM_DEFAULT_UNROLL_N 4
  1211. #define SGEMM_DEFAULT_P 108
  1212. #define DGEMM_DEFAULT_P 112
  1213. #define CGEMM_DEFAULT_P 108
  1214. #define ZGEMM_DEFAULT_P 112
  1215. #define SGEMM_DEFAULT_Q 288
  1216. #define DGEMM_DEFAULT_Q 144
  1217. #define CGEMM_DEFAULT_Q 144
  1218. #define ZGEMM_DEFAULT_Q 72
  1219. #define SGEMM_DEFAULT_R 2000
  1220. #define DGEMM_DEFAULT_R 2000
  1221. #define CGEMM_DEFAULT_R 2000
  1222. #define ZGEMM_DEFAULT_R 2000
  1223. #define SYMV_P 16
  1224. #endif
  1225. #ifdef LOONGSON3A
  1226. ////Copy from SICORTEX
  1227. #define SNUMOPT 2
  1228. #define DNUMOPT 2
  1229. #define GEMM_DEFAULT_OFFSET_A 0
  1230. #define GEMM_DEFAULT_OFFSET_B 0
  1231. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1232. #define SGEMM_DEFAULT_UNROLL_M 8
  1233. #define SGEMM_DEFAULT_UNROLL_N 4
  1234. #define DGEMM_DEFAULT_UNROLL_M 4
  1235. #define DGEMM_DEFAULT_UNROLL_N 4
  1236. #define CGEMM_DEFAULT_UNROLL_M 4
  1237. #define CGEMM_DEFAULT_UNROLL_N 2
  1238. #define ZGEMM_DEFAULT_UNROLL_M 2
  1239. #define ZGEMM_DEFAULT_UNROLL_N 2
  1240. #define SGEMM_DEFAULT_P 64
  1241. #define DGEMM_DEFAULT_P 44
  1242. #define CGEMM_DEFAULT_P 64
  1243. #define ZGEMM_DEFAULT_P 32
  1244. #define SGEMM_DEFAULT_Q 192
  1245. #define DGEMM_DEFAULT_Q 92
  1246. #define CGEMM_DEFAULT_Q 128
  1247. #define ZGEMM_DEFAULT_Q 80
  1248. #define SGEMM_DEFAULT_R 640
  1249. #define DGEMM_DEFAULT_R dgemm_r
  1250. #define CGEMM_DEFAULT_R 640
  1251. #define ZGEMM_DEFAULT_R 640
  1252. #define GEMM_OFFSET_A1 0x10000
  1253. #define GEMM_OFFSET_B1 0x100000
  1254. #define SYMV_P 16
  1255. #endif
  1256. #ifdef LOONGSON3B
  1257. #define SNUMOPT 2
  1258. #define DNUMOPT 2
  1259. #define GEMM_DEFAULT_OFFSET_A 0
  1260. #define GEMM_DEFAULT_OFFSET_B 0
  1261. #define GEMM_DEFAULT_ALIGN 0x03fffUL
  1262. #define SGEMM_DEFAULT_UNROLL_M 2
  1263. #define SGEMM_DEFAULT_UNROLL_N 2
  1264. #define DGEMM_DEFAULT_UNROLL_M 2
  1265. #define DGEMM_DEFAULT_UNROLL_N 2
  1266. #define CGEMM_DEFAULT_UNROLL_M 2
  1267. #define CGEMM_DEFAULT_UNROLL_N 2
  1268. #define ZGEMM_DEFAULT_UNROLL_M 2
  1269. #define ZGEMM_DEFAULT_UNROLL_N 2
  1270. #define SGEMM_DEFAULT_P 64
  1271. #define DGEMM_DEFAULT_P 24
  1272. #define CGEMM_DEFAULT_P 24
  1273. #define ZGEMM_DEFAULT_P 20
  1274. #define SGEMM_DEFAULT_Q 192
  1275. #define DGEMM_DEFAULT_Q 128
  1276. #define CGEMM_DEFAULT_Q 128
  1277. #define ZGEMM_DEFAULT_Q 64
  1278. #define SGEMM_DEFAULT_R 512
  1279. #define DGEMM_DEFAULT_R 512
  1280. #define CGEMM_DEFAULT_R 512
  1281. #define ZGEMM_DEFAULT_R 512
  1282. #define GEMM_OFFSET_A1 0x10000
  1283. #define GEMM_OFFSET_B1 0x100000
  1284. #define SYMV_P 16
  1285. #endif
  1286. #ifdef GENERIC
  1287. #define SNUMOPT 2
  1288. #define DNUMOPT 2
  1289. #define GEMM_DEFAULT_OFFSET_A 0
  1290. #define GEMM_DEFAULT_OFFSET_B 0
  1291. #define GEMM_DEFAULT_ALIGN 0x0ffffUL
  1292. #define SGEMM_DEFAULT_UNROLL_N 4
  1293. #define DGEMM_DEFAULT_UNROLL_N 4
  1294. #define QGEMM_DEFAULT_UNROLL_N 2
  1295. #define CGEMM_DEFAULT_UNROLL_N 2
  1296. #define ZGEMM_DEFAULT_UNROLL_N 2
  1297. #define XGEMM_DEFAULT_UNROLL_N 1
  1298. #ifdef ARCH_X86
  1299. #define SGEMM_DEFAULT_UNROLL_M 4
  1300. #define DGEMM_DEFAULT_UNROLL_M 2
  1301. #define QGEMM_DEFAULT_UNROLL_M 2
  1302. #define CGEMM_DEFAULT_UNROLL_M 2
  1303. #define ZGEMM_DEFAULT_UNROLL_M 1
  1304. #define XGEMM_DEFAULT_UNROLL_M 1
  1305. #else
  1306. #define SGEMM_DEFAULT_UNROLL_M 8
  1307. #define DGEMM_DEFAULT_UNROLL_M 4
  1308. #define QGEMM_DEFAULT_UNROLL_M 2
  1309. #define CGEMM_DEFAULT_UNROLL_M 4
  1310. #define ZGEMM_DEFAULT_UNROLL_M 2
  1311. #define XGEMM_DEFAULT_UNROLL_M 1
  1312. #endif
  1313. #define SGEMM_P sgemm_p
  1314. #define DGEMM_P dgemm_p
  1315. #define QGEMM_P qgemm_p
  1316. #define CGEMM_P cgemm_p
  1317. #define ZGEMM_P zgemm_p
  1318. #define XGEMM_P xgemm_p
  1319. #define SGEMM_R sgemm_r
  1320. #define DGEMM_R dgemm_r
  1321. #define QGEMM_R qgemm_r
  1322. #define CGEMM_R cgemm_r
  1323. #define ZGEMM_R zgemm_r
  1324. #define XGEMM_R xgemm_r
  1325. #define SGEMM_Q 128
  1326. #define DGEMM_Q 128
  1327. #define QGEMM_Q 128
  1328. #define CGEMM_Q 128
  1329. #define ZGEMM_Q 128
  1330. #define XGEMM_Q 128
  1331. #define SYMV_P 16
  1332. #endif
  1333. #ifndef QGEMM_DEFAULT_UNROLL_M
  1334. #define QGEMM_DEFAULT_UNROLL_M 2
  1335. #endif
  1336. #ifndef QGEMM_DEFAULT_UNROLL_N
  1337. #define QGEMM_DEFAULT_UNROLL_N 2
  1338. #endif
  1339. #ifndef XGEMM_DEFAULT_UNROLL_M
  1340. #define XGEMM_DEFAULT_UNROLL_M 2
  1341. #endif
  1342. #ifndef XGEMM_DEFAULT_UNROLL_N
  1343. #define XGEMM_DEFAULT_UNROLL_N 2
  1344. #endif
  1345. #ifndef HAVE_SSE2
  1346. #define SHUFPD_0 shufps $0x44,
  1347. #define SHUFPD_1 shufps $0x4e,
  1348. #define SHUFPD_2 shufps $0xe4,
  1349. #define SHUFPD_3 shufps $0xee,
  1350. #endif
  1351. #ifndef SHUFPD_0
  1352. #define SHUFPD_0 shufpd $0,
  1353. #endif
  1354. #ifndef SHUFPD_1
  1355. #define SHUFPD_1 shufpd $1,
  1356. #endif
  1357. #ifndef SHUFPD_2
  1358. #define SHUFPD_2 shufpd $2,
  1359. #endif
  1360. #ifndef SHUFPD_3
  1361. #define SHUFPD_3 shufpd $3,
  1362. #endif
  1363. #ifndef SHUFPS_39
  1364. #define SHUFPS_39 shufps $0x39,
  1365. #endif
  1366. #endif