You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 58 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  114. 0, 0, 0,
  115. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SGEMM_DEFAULT_UNROLL_MN
  117. SGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. #endif
  122. #ifdef HAVE_EXCLUSIVE_CACHE
  123. 1,
  124. #else
  125. 0,
  126. #endif
  127. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  128. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  129. #endif
  130. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  131. isamax_kTS,
  132. #endif
  133. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  134. isamin_kTS, ismax_kTS, ismin_kTS,
  135. snrm2_kTS, sasum_kTS,
  136. #endif
  137. #if BUILD_SINGLE == 1
  138. ssum_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. scopy_kTS, sdot_kTS,
  142. // dsdot_kTS,
  143. srot_kTS, srotm_kTS, saxpy_kTS,
  144. #endif
  145. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  146. sscal_kTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. sswap_kTS,
  150. sgemv_nTS, sgemv_tTS,
  151. #endif
  152. #if BUILD_SINGLE == 1
  153. sger_kTS,
  154. #endif
  155. #if BUILD_SINGLE == 1
  156. ssymv_LTS, ssymv_UTS,
  157. #endif
  158. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  159. #ifdef ARCH_X86_64
  160. sgemm_directTS,
  161. sgemm_direct_performantTS,
  162. #endif
  163. #ifdef ARCH_ARM64
  164. sgemm_directTS,
  165. #endif
  166. sgemm_kernelTS, sgemm_betaTS,
  167. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  168. sgemm_incopyTS, sgemm_itcopyTS,
  169. #else
  170. sgemm_oncopyTS, sgemm_otcopyTS,
  171. #endif
  172. sgemm_oncopyTS, sgemm_otcopyTS,
  173. #endif
  174. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  175. #ifdef SMALL_MATRIX_OPT
  176. sgemm_small_matrix_permitTS,
  177. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  178. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  179. #endif
  180. #endif
  181. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  182. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  183. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  184. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  185. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  186. #else
  187. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  188. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  189. #endif
  190. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  191. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  192. #endif
  193. #if (BUILD_SINGLE==1)
  194. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  195. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  196. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  197. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  198. #else
  199. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  200. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  201. #endif
  202. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  203. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  204. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  205. ssymm_iutcopyTS, ssymm_iltcopyTS,
  206. #else
  207. ssymm_outcopyTS, ssymm_oltcopyTS,
  208. #endif
  209. ssymm_outcopyTS, ssymm_oltcopyTS,
  210. #ifndef NO_LAPACK
  211. sneg_tcopyTS, slaswp_ncopyTS,
  212. #else
  213. NULL,NULL,
  214. #endif
  215. #endif
  216. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  217. 0, 0, 0,
  218. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  219. #ifdef DGEMM_DEFAULT_UNROLL_MN
  220. DGEMM_DEFAULT_UNROLL_MN,
  221. #else
  222. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  223. #endif
  224. #endif
  225. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  226. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  227. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  228. dnrm2_kTS, dasum_kTS,
  229. #endif
  230. #if (BUILD_DOUBLE==1)
  231. dsum_kTS,
  232. #endif
  233. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  234. dcopy_kTS, ddot_kTS,
  235. #endif
  236. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  237. dsdot_kTS,
  238. #endif
  239. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  240. drot_kTS,
  241. drotm_kTS,
  242. daxpy_kTS,
  243. dscal_kTS,
  244. dswap_kTS,
  245. dgemv_nTS, dgemv_tTS,
  246. #endif
  247. #if (BUILD_DOUBLE==1)
  248. dger_kTS,
  249. dsymv_LTS, dsymv_UTS,
  250. #endif
  251. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  252. dgemm_kernelTS, dgemm_betaTS,
  253. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  254. dgemm_incopyTS, dgemm_itcopyTS,
  255. #else
  256. dgemm_oncopyTS, dgemm_otcopyTS,
  257. #endif
  258. dgemm_oncopyTS, dgemm_otcopyTS,
  259. #endif
  260. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  261. #ifdef SMALL_MATRIX_OPT
  262. dgemm_small_matrix_permitTS,
  263. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  264. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  265. #endif
  266. #endif
  267. #if (BUILD_DOUBLE==1)
  268. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  269. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  270. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  271. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  272. #else
  273. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  274. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  275. #endif
  276. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  277. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  278. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  279. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  280. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  281. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  282. #else
  283. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  284. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  285. #endif
  286. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  287. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  288. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  289. dsymm_iutcopyTS, dsymm_iltcopyTS,
  290. #else
  291. dsymm_outcopyTS, dsymm_oltcopyTS,
  292. #endif
  293. dsymm_outcopyTS, dsymm_oltcopyTS,
  294. #ifndef NO_LAPACK
  295. dneg_tcopyTS, dlaswp_ncopyTS,
  296. #else
  297. NULL, NULL,
  298. #endif
  299. #endif
  300. #ifdef EXPRECISION
  301. 0, 0, 0,
  302. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  303. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  304. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  305. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  306. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  307. qgemv_nTS, qgemv_tTS, qger_kTS,
  308. qsymv_LTS, qsymv_UTS,
  309. qgemm_kernelTS, qgemm_betaTS,
  310. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  311. qgemm_incopyTS, qgemm_itcopyTS,
  312. #else
  313. qgemm_oncopyTS, qgemm_otcopyTS,
  314. #endif
  315. qgemm_oncopyTS, qgemm_otcopyTS,
  316. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  317. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  318. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  319. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  320. #else
  321. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  322. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  323. #endif
  324. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  325. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  326. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  327. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  328. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  329. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  330. #else
  331. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  332. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  333. #endif
  334. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  335. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  336. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  337. qsymm_iutcopyTS, qsymm_iltcopyTS,
  338. #else
  339. qsymm_outcopyTS, qsymm_oltcopyTS,
  340. #endif
  341. qsymm_outcopyTS, qsymm_oltcopyTS,
  342. #ifndef NO_LAPACK
  343. qneg_tcopyTS, qlaswp_ncopyTS,
  344. #else
  345. NULL, NULL,
  346. #endif
  347. #endif
  348. #if (BUILD_COMPLEX)
  349. 0, 0, 0,
  350. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  351. #ifdef CGEMM_DEFAULT_UNROLL_MN
  352. CGEMM_DEFAULT_UNROLL_MN,
  353. #else
  354. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  355. #endif
  356. #if (BUILD_COMPLEX)
  357. camax_kTS, camin_kTS,
  358. #endif
  359. #if (BUILD_COMPLEX)
  360. icamax_kTS,
  361. #endif
  362. #if (BUILD_COMPLEX)
  363. icamin_kTS,
  364. cnrm2_kTS, casum_kTS, csum_kTS,
  365. #endif
  366. #if (BUILD_COMPLEX)
  367. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  368. #endif
  369. #if (BUILD_COMPLEX)
  370. csrot_kTS,
  371. #endif
  372. #if (BUILD_COMPLEX)
  373. caxpy_kTS,
  374. caxpyc_kTS,
  375. cscal_kTS,
  376. cswap_kTS,
  377. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  378. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  379. #endif
  380. #if (BUILD_COMPLEX)
  381. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  382. csymv_LTS, csymv_UTS,
  383. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  384. #endif
  385. #if (BUILD_COMPLEX)
  386. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  387. cgemm_betaTS,
  388. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  389. cgemm_incopyTS, cgemm_itcopyTS,
  390. #else
  391. cgemm_oncopyTS, cgemm_otcopyTS,
  392. #endif
  393. cgemm_oncopyTS, cgemm_otcopyTS,
  394. #ifdef SMALL_MATRIX_OPT
  395. cgemm_small_matrix_permitTS,
  396. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  397. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  398. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  399. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  400. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  401. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  402. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  403. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  404. #endif
  405. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  406. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  407. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  408. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  409. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  410. #else
  411. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  412. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  413. #endif
  414. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  415. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  416. #endif
  417. #endif
  418. #if (BUILD_COMPLEX)
  419. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  420. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  421. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  422. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  423. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  424. #else
  425. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  426. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  427. #endif
  428. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  429. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  430. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  431. csymm_iutcopyTS, csymm_iltcopyTS,
  432. #else
  433. csymm_outcopyTS, csymm_oltcopyTS,
  434. #endif
  435. csymm_outcopyTS, csymm_oltcopyTS,
  436. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  437. chemm_iutcopyTS, chemm_iltcopyTS,
  438. #else
  439. chemm_outcopyTS, chemm_oltcopyTS,
  440. #endif
  441. chemm_outcopyTS, chemm_oltcopyTS,
  442. 0, 0, 0,
  443. #if (USE_GEMM3M)
  444. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  445. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  446. #else
  447. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  448. #endif
  449. cgemm3m_kernelTS,
  450. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  451. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  452. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  453. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  454. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  455. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  456. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  457. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  458. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  459. csymm3m_oucopybTS, csymm3m_olcopybTS,
  460. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  461. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  462. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  463. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  464. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  465. chemm3m_oucopybTS, chemm3m_olcopybTS,
  466. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  467. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  468. #else
  469. 0, 0, 0,
  470. NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. NULL, NULL,
  483. NULL, NULL,
  484. NULL, NULL,
  485. NULL, NULL,
  486. NULL, NULL,
  487. NULL, NULL,
  488. NULL, NULL,
  489. #endif
  490. #endif
  491. #if (BUILD_COMPLEX)
  492. #ifndef NO_LAPACK
  493. cneg_tcopyTS,
  494. claswp_ncopyTS,
  495. #else
  496. NULL, NULL,
  497. #endif
  498. #endif
  499. #if BUILD_COMPLEX16 == 1
  500. 0, 0, 0,
  501. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  502. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  503. ZGEMM_DEFAULT_UNROLL_MN,
  504. #else
  505. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  506. #endif
  507. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  508. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  509. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  510. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  511. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  512. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  513. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  514. zsymv_LTS, zsymv_UTS,
  515. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  516. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  517. zgemm_betaTS,
  518. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  519. zgemm_incopyTS, zgemm_itcopyTS,
  520. #else
  521. zgemm_oncopyTS, zgemm_otcopyTS,
  522. #endif
  523. zgemm_oncopyTS, zgemm_otcopyTS,
  524. #ifdef SMALL_MATRIX_OPT
  525. zgemm_small_matrix_permitTS,
  526. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  527. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  528. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  529. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  530. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  531. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  532. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  533. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  534. #endif
  535. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  536. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  537. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  538. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  539. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  540. #else
  541. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  542. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  543. #endif
  544. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  545. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  546. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  547. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  548. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  549. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  550. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  551. #else
  552. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  553. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  554. #endif
  555. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  556. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  557. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  558. zsymm_iutcopyTS, zsymm_iltcopyTS,
  559. #else
  560. zsymm_outcopyTS, zsymm_oltcopyTS,
  561. #endif
  562. zsymm_outcopyTS, zsymm_oltcopyTS,
  563. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  564. zhemm_iutcopyTS, zhemm_iltcopyTS,
  565. #else
  566. zhemm_outcopyTS, zhemm_oltcopyTS,
  567. #endif
  568. zhemm_outcopyTS, zhemm_oltcopyTS,
  569. 0, 0, 0,
  570. #if (USE_GEMM3M)
  571. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  572. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  573. #else
  574. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  575. #endif
  576. zgemm3m_kernelTS,
  577. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  578. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  579. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  580. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  581. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  582. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  583. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  584. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  585. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  586. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  587. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  588. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  589. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  590. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  591. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  592. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  593. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  594. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  595. #else
  596. 0, 0, 0,
  597. NULL,
  598. NULL, NULL,
  599. NULL, NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. NULL, NULL,
  610. NULL, NULL,
  611. NULL, NULL,
  612. NULL, NULL,
  613. NULL, NULL,
  614. NULL, NULL,
  615. NULL, NULL,
  616. #endif
  617. #ifndef NO_LAPACK
  618. zneg_tcopyTS, zlaswp_ncopyTS,
  619. #else
  620. NULL, NULL,
  621. #endif
  622. #endif
  623. #ifdef EXPRECISION
  624. 0, 0, 0,
  625. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  626. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  627. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  628. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  629. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  630. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  631. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  632. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  633. xsymv_LTS, xsymv_UTS,
  634. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  635. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  636. xgemm_betaTS,
  637. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  638. xgemm_incopyTS, xgemm_itcopyTS,
  639. #else
  640. xgemm_oncopyTS, xgemm_otcopyTS,
  641. #endif
  642. xgemm_oncopyTS, xgemm_otcopyTS,
  643. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  644. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  645. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  646. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  647. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  648. #else
  649. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  650. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  651. #endif
  652. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  653. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  654. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  655. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  656. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  657. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  658. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  659. #else
  660. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  661. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  662. #endif
  663. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  664. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  665. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  666. xsymm_iutcopyTS, xsymm_iltcopyTS,
  667. #else
  668. xsymm_outcopyTS, xsymm_oltcopyTS,
  669. #endif
  670. xsymm_outcopyTS, xsymm_oltcopyTS,
  671. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  672. xhemm_iutcopyTS, xhemm_iltcopyTS,
  673. #else
  674. xhemm_outcopyTS, xhemm_oltcopyTS,
  675. #endif
  676. xhemm_outcopyTS, xhemm_oltcopyTS,
  677. 0, 0, 0,
  678. #if (USE_GEMM3M)
  679. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  680. xgemm3m_kernelTS,
  681. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  682. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  683. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  684. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  685. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  686. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  687. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  688. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  689. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  690. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  691. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  692. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  693. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  694. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  695. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  696. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  697. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  698. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  699. #else
  700. 0, 0, 0,
  701. NULL,
  702. NULL, NULL,
  703. NULL, NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. NULL, NULL,
  714. NULL, NULL,
  715. NULL, NULL,
  716. NULL, NULL,
  717. NULL, NULL,
  718. NULL, NULL,
  719. NULL, NULL,
  720. #endif
  721. #ifndef NO_LAPACK
  722. xneg_tcopyTS, xlaswp_ncopyTS,
  723. #else
  724. NULL, NULL,
  725. #endif
  726. #endif
  727. init_parameter,
  728. SNUMOPT, DNUMOPT, QNUMOPT,
  729. #if BUILD_SINGLE == 1
  730. saxpby_kTS,
  731. #endif
  732. #if BUILD_DOUBLE == 1
  733. daxpby_kTS,
  734. #endif
  735. #if BUILD_COMPLEX == 1
  736. caxpby_kTS,
  737. #endif
  738. #if BUILD_COMPLEX16== 1
  739. zaxpby_kTS,
  740. #endif
  741. #if BUILD_SINGLE == 1
  742. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  743. #endif
  744. #if BUILD_DOUBLE== 1
  745. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  746. #endif
  747. #if BUILD_COMPLEX == 1
  748. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  749. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  750. #endif
  751. #if BUILD_COMPLEX16 == 1
  752. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  753. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  754. #endif
  755. #if BUILD_SINGLE == 1
  756. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  757. #endif
  758. #if BUILD_DOUBLE== 1
  759. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  760. #endif
  761. #if BUILD_COMPLEX== 1
  762. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  763. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  764. #endif
  765. #if BUILD_COMPLEX16==1
  766. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  767. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  768. #endif
  769. #if BUILD_SINGLE == 1
  770. sgeadd_kTS,
  771. #endif
  772. #if BUILD_DOUBLE==1
  773. dgeadd_kTS,
  774. #endif
  775. #if BUILD_COMPLEX==1
  776. cgeadd_kTS,
  777. #endif
  778. #if BUILD_COMPLEX16==1
  779. zgeadd_kTS,
  780. #endif
  781. };
  782. #if (ARCH_ARM64)
  783. static void init_parameter(void) {
  784. #if (BUILD_BFLOAT16)
  785. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  786. #endif
  787. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  788. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  789. #endif
  790. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  791. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  792. #endif
  793. #if BUILD_COMPLEX==1
  794. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  795. #endif
  796. #if BUILD_COMPLEX16==1
  797. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  798. #endif
  799. #if (BUILD_BFLOAT16)
  800. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  801. #endif
  802. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  803. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  804. #endif
  805. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  806. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  807. #endif
  808. #if BUILD_COMPLEX== 1
  809. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  810. #endif
  811. #if BUILD_COMPLEX16==1
  812. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  813. #endif
  814. #if (BUILD_BFLOAT16)
  815. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  816. #endif
  817. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  818. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  819. #endif
  820. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  821. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  822. #endif
  823. #if BUILD_COMPLEX==1
  824. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  825. #endif
  826. #if BUILD_COMPLEX16==1
  827. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  828. #endif
  829. #ifdef EXPRECISION
  830. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  831. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  832. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  833. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  834. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  835. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  836. #endif
  837. #if (USE_GEMM3M)
  838. #ifdef CGEMM3M_DEFAULT_P
  839. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  840. #else
  841. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  842. #endif
  843. #ifdef ZGEMM3M_DEFAULT_P
  844. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  845. #else
  846. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  847. #endif
  848. #ifdef CGEMM3M_DEFAULT_Q
  849. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  850. #else
  851. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  852. #endif
  853. #ifdef ZGEMM3M_DEFAULT_Q
  854. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  855. #else
  856. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  857. #endif
  858. #ifdef CGEMM3M_DEFAULT_R
  859. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  860. #else
  861. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  862. #endif
  863. #ifdef ZGEMM3M_DEFAULT_R
  864. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  865. #else
  866. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  867. #endif
  868. #ifdef EXPRECISION
  869. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  870. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  871. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  872. #endif
  873. #endif
  874. }
  875. #else // (ARCH_ARM64)
  876. #if defined(ARCH_MIPS64)
  877. static void init_parameter(void) {
  878. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  879. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  880. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  881. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  882. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  883. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  884. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  885. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  886. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  887. TABLE_NAME.dgemm_r = 640;
  888. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  889. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  890. #ifdef EXPRECISION
  891. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  892. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  893. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  894. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  895. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  896. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  897. #endif
  898. #if defined(USE_GEMM3M)
  899. #ifdef CGEMM3M_DEFAULT_P
  900. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  901. #else
  902. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  903. #endif
  904. #ifdef ZGEMM3M_DEFAULT_P
  905. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  906. #else
  907. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  908. #endif
  909. #ifdef CGEMM3M_DEFAULT_Q
  910. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  911. #else
  912. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  913. #endif
  914. #ifdef ZGEMM3M_DEFAULT_Q
  915. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  916. #else
  917. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  918. #endif
  919. #ifdef CGEMM3M_DEFAULT_R
  920. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  921. #else
  922. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  923. #endif
  924. #ifdef ZGEMM3M_DEFAULT_R
  925. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  926. #else
  927. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  928. #endif
  929. #ifdef EXPRECISION
  930. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  931. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  932. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  933. #endif
  934. #endif
  935. }
  936. #else // (ARCH_MIPS64)
  937. #if (ARCH_LOONGARCH64)
  938. static int get_L3_size() {
  939. int ret = 0, id = 0x14;
  940. __asm__ volatile (
  941. "cpucfg %[ret], %[id]"
  942. : [ret]"=r"(ret)
  943. : [id]"r"(id)
  944. : "memory"
  945. );
  946. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  947. }
  948. static void init_parameter(void) {
  949. #ifdef BUILD_BFLOAT16
  950. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  951. #endif
  952. #ifdef BUILD_BFLOAT16
  953. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  954. #endif
  955. #if defined(LA464)
  956. int L3_size = get_L3_size();
  957. #ifdef SMP
  958. if(blas_num_threads == 1){
  959. #endif
  960. //single thread
  961. if (L3_size == 32){ // 3C5000 and 3D5000
  962. TABLE_NAME.sgemm_p = 256;
  963. TABLE_NAME.sgemm_q = 384;
  964. TABLE_NAME.sgemm_r = 8192;
  965. TABLE_NAME.dgemm_p = 112;
  966. TABLE_NAME.dgemm_q = 289;
  967. TABLE_NAME.dgemm_r = 4096;
  968. TABLE_NAME.cgemm_p = 128;
  969. TABLE_NAME.cgemm_q = 256;
  970. TABLE_NAME.cgemm_r = 4096;
  971. TABLE_NAME.zgemm_p = 128;
  972. TABLE_NAME.zgemm_q = 128;
  973. TABLE_NAME.zgemm_r = 2048;
  974. } else { // 3A5000 and 3C5000L
  975. TABLE_NAME.sgemm_p = 256;
  976. TABLE_NAME.sgemm_q = 384;
  977. TABLE_NAME.sgemm_r = 4096;
  978. TABLE_NAME.dgemm_p = 112;
  979. TABLE_NAME.dgemm_q = 300;
  980. TABLE_NAME.dgemm_r = 3024;
  981. TABLE_NAME.cgemm_p = 128;
  982. TABLE_NAME.cgemm_q = 256;
  983. TABLE_NAME.cgemm_r = 2048;
  984. TABLE_NAME.zgemm_p = 128;
  985. TABLE_NAME.zgemm_q = 128;
  986. TABLE_NAME.zgemm_r = 1024;
  987. }
  988. #ifdef SMP
  989. }else{
  990. //multi thread
  991. if (L3_size == 32){ // 3C5000 and 3D5000
  992. TABLE_NAME.sgemm_p = 256;
  993. TABLE_NAME.sgemm_q = 384;
  994. TABLE_NAME.sgemm_r = 1024;
  995. TABLE_NAME.dgemm_p = 112;
  996. TABLE_NAME.dgemm_q = 289;
  997. TABLE_NAME.dgemm_r = 342;
  998. TABLE_NAME.cgemm_p = 128;
  999. TABLE_NAME.cgemm_q = 256;
  1000. TABLE_NAME.cgemm_r = 512;
  1001. TABLE_NAME.zgemm_p = 128;
  1002. TABLE_NAME.zgemm_q = 128;
  1003. TABLE_NAME.zgemm_r = 512;
  1004. } else { // 3A5000 and 3C5000L
  1005. TABLE_NAME.sgemm_p = 256;
  1006. TABLE_NAME.sgemm_q = 384;
  1007. TABLE_NAME.sgemm_r = 2048;
  1008. TABLE_NAME.dgemm_p = 112;
  1009. TABLE_NAME.dgemm_q = 300;
  1010. TABLE_NAME.dgemm_r = 738;
  1011. TABLE_NAME.cgemm_p = 128;
  1012. TABLE_NAME.cgemm_q = 256;
  1013. TABLE_NAME.cgemm_r = 1024;
  1014. TABLE_NAME.zgemm_p = 128;
  1015. TABLE_NAME.zgemm_q = 128;
  1016. TABLE_NAME.zgemm_r = 1024;
  1017. }
  1018. }
  1019. #endif
  1020. #else
  1021. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1022. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1023. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1024. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1025. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1026. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1027. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1028. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1029. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1030. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1031. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1032. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1033. #endif
  1034. #ifdef BUILD_BFLOAT16
  1035. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1036. #endif
  1037. }
  1038. #else // (ARCH_LOONGARCH64)
  1039. #if (ARCH_POWER)
  1040. static void init_parameter(void) {
  1041. #ifdef BUILD_BFLOAT16
  1042. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1043. #endif
  1044. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1045. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1046. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1047. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1048. #ifdef BUILD_BFLOAT16
  1049. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1050. #endif
  1051. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1052. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1053. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1054. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1055. #ifdef BUILD_BFLOAT16
  1056. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1057. #endif
  1058. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1059. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1060. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1061. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1062. }
  1063. #else //POWER
  1064. #if (ARCH_ZARCH)
  1065. static void init_parameter(void) {
  1066. #ifdef BUILD_BFLOAT16
  1067. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1068. #endif
  1069. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1070. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1071. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1072. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1073. #ifdef BUILD_BFLOAT16
  1074. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1075. #endif
  1076. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1077. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1078. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1079. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1080. #ifdef BUILD_BFLOAT16
  1081. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1082. #endif
  1083. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1084. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1085. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1086. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1087. }
  1088. #else //ZARCH
  1089. #if (ARCH_RISCV64)
  1090. static void init_parameter(void) {
  1091. #ifdef BUILD_BFLOAT16
  1092. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1093. #endif
  1094. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1095. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1096. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1097. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1098. #ifdef BUILD_BFLOAT16
  1099. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1100. #endif
  1101. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1102. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1103. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1104. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1105. #ifdef BUILD_BFLOAT16
  1106. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1107. #endif
  1108. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1109. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1110. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1111. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1112. }
  1113. #else //RISCV64
  1114. #ifdef ARCH_X86
  1115. static int get_l2_size_old(void){
  1116. int i, eax, ebx, ecx, edx, cpuid_level;
  1117. int info[15];
  1118. cpuid(2, &eax, &ebx, &ecx, &edx);
  1119. info[ 0] = BITMASK(eax, 8, 0xff);
  1120. info[ 1] = BITMASK(eax, 16, 0xff);
  1121. info[ 2] = BITMASK(eax, 24, 0xff);
  1122. info[ 3] = BITMASK(ebx, 0, 0xff);
  1123. info[ 4] = BITMASK(ebx, 8, 0xff);
  1124. info[ 5] = BITMASK(ebx, 16, 0xff);
  1125. info[ 6] = BITMASK(ebx, 24, 0xff);
  1126. info[ 7] = BITMASK(ecx, 0, 0xff);
  1127. info[ 8] = BITMASK(ecx, 8, 0xff);
  1128. info[ 9] = BITMASK(ecx, 16, 0xff);
  1129. info[10] = BITMASK(ecx, 24, 0xff);
  1130. info[11] = BITMASK(edx, 0, 0xff);
  1131. info[12] = BITMASK(edx, 8, 0xff);
  1132. info[13] = BITMASK(edx, 16, 0xff);
  1133. info[14] = BITMASK(edx, 24, 0xff);
  1134. for (i = 0; i < 15; i++){
  1135. switch (info[i]){
  1136. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1137. case 0x1a :
  1138. return 96;
  1139. case 0x39 :
  1140. case 0x3b :
  1141. case 0x41 :
  1142. case 0x79 :
  1143. case 0x81 :
  1144. return 128;
  1145. case 0x3a :
  1146. return 192;
  1147. case 0x21 :
  1148. case 0x3c :
  1149. case 0x42 :
  1150. case 0x7a :
  1151. case 0x7e :
  1152. case 0x82 :
  1153. return 256;
  1154. case 0x3d :
  1155. return 384;
  1156. case 0x3e :
  1157. case 0x43 :
  1158. case 0x7b :
  1159. case 0x7f :
  1160. case 0x83 :
  1161. case 0x86 :
  1162. return 512;
  1163. case 0x44 :
  1164. case 0x78 :
  1165. case 0x7c :
  1166. case 0x84 :
  1167. case 0x87 :
  1168. return 1024;
  1169. case 0x45 :
  1170. case 0x7d :
  1171. case 0x85 :
  1172. return 2048;
  1173. case 0x48 :
  1174. return 3184;
  1175. case 0x49 :
  1176. return 4096;
  1177. case 0x4e :
  1178. return 6144;
  1179. }
  1180. }
  1181. // return 0;
  1182. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1183. return 256;
  1184. }
  1185. #endif
  1186. static __inline__ int get_l2_size(void){
  1187. int eax, ebx, ecx, edx, l2;
  1188. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1189. if (l2 != 0)
  1190. return l2;
  1191. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1192. l2 = BITMASK(ecx, 16, 0xffff);
  1193. #ifndef ARCH_X86
  1194. if (l2 <= 0) {
  1195. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1196. return 256;
  1197. }
  1198. return l2;
  1199. #else
  1200. if (l2 > 0) return l2;
  1201. return get_l2_size_old();
  1202. #endif
  1203. }
  1204. static __inline__ int get_l3_size(void){
  1205. int eax, ebx, ecx, edx;
  1206. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1207. return BITMASK(edx, 18, 0x3fff) * 512;
  1208. }
  1209. static void init_parameter(void) {
  1210. int l2 = get_l2_size();
  1211. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1212. /* where the GEMM unrolling parameters do not depend on l2 */
  1213. #ifdef BUILD_BFLOAT16
  1214. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1215. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1216. #endif
  1217. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1218. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1219. #endif
  1220. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1221. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1222. #endif
  1223. #if BUILD_COMPLEX == 1
  1224. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1225. #endif
  1226. #if BUILD_COMPLEX16==1
  1227. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1228. #endif
  1229. #if BUILD_COMPLEX == 1
  1230. #ifdef CGEMM3M_DEFAULT_Q
  1231. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1232. #else
  1233. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1234. #endif
  1235. #endif
  1236. #if BUILD_COMPLEX16 == 1
  1237. #ifdef ZGEMM3M_DEFAULT_Q
  1238. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1239. #else
  1240. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1241. #endif
  1242. #endif
  1243. #ifdef EXPRECISION
  1244. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1245. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1246. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1247. #endif
  1248. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1249. #ifdef DEBUG
  1250. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1251. #endif
  1252. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1253. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1254. #endif
  1255. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1256. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1257. #endif
  1258. #if BUILD_COMPLEX==1
  1259. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1260. #endif
  1261. #if BUILD_COMPLEX16==1
  1262. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1263. #endif
  1264. #ifdef EXPRECISION
  1265. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1266. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1267. #endif
  1268. #endif
  1269. #ifdef CORE_NORTHWOOD
  1270. #ifdef DEBUG
  1271. fprintf(stderr, "Northwood\n");
  1272. #endif
  1273. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1274. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1275. #endif
  1276. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1277. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1278. #endif
  1279. #if BUILD_COMPLEX==1
  1280. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1281. #endif
  1282. #if BUILD_COMPLEX16==1
  1283. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1284. #endif
  1285. #ifdef EXPRECISION
  1286. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1287. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1288. #endif
  1289. #endif
  1290. #ifdef ATOM
  1291. #ifdef DEBUG
  1292. fprintf(stderr, "Atom\n");
  1293. #endif
  1294. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1295. TABLE_NAME.sgemm_p = 256;
  1296. #endif
  1297. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1298. TABLE_NAME.dgemm_p = 128;
  1299. #endif
  1300. #if BUILD_COMPLEX==1
  1301. TABLE_NAME.cgemm_p = 128;
  1302. #endif
  1303. #if BUILD_COMPLEX16==1
  1304. TABLE_NAME.zgemm_p = 64;
  1305. #endif
  1306. #ifdef EXPRECISION
  1307. TABLE_NAME.qgemm_p = 64;
  1308. TABLE_NAME.xgemm_p = 32;
  1309. #endif
  1310. #endif
  1311. #ifdef CORE_PRESCOTT
  1312. #ifdef DEBUG
  1313. fprintf(stderr, "Prescott\n");
  1314. #endif
  1315. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1316. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1317. #endif
  1318. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1319. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1320. #endif
  1321. #if BUILD_COMPLEX==1
  1322. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1323. #endif
  1324. #if BUILD_COMPLEX16 == 1
  1325. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1326. #endif
  1327. #ifdef EXPRECISION
  1328. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1329. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1330. #endif
  1331. #endif
  1332. #ifdef CORE2
  1333. #ifdef DEBUG
  1334. fprintf(stderr, "Core2\n");
  1335. #endif
  1336. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1337. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1338. #endif
  1339. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1340. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1341. #endif
  1342. #if BUILD_COMPLEX==1
  1343. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1344. #endif
  1345. #if BUILD_COMPLEX16==1
  1346. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1347. #endif
  1348. #ifdef EXPRECISION
  1349. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1350. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1351. #endif
  1352. #endif
  1353. #ifdef PENRYN
  1354. #ifdef DEBUG
  1355. fprintf(stderr, "Penryn\n");
  1356. #endif
  1357. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1358. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1359. #endif
  1360. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1361. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1362. #endif
  1363. #if BUILD_COMPLEX==1
  1364. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1365. #endif
  1366. #if BUILD_COMPLEX16==1
  1367. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1368. #endif
  1369. #ifdef EXPRECISION
  1370. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1371. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1372. #endif
  1373. #endif
  1374. #ifdef DUNNINGTON
  1375. #ifdef DEBUG
  1376. fprintf(stderr, "Dunnington\n");
  1377. #endif
  1378. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1379. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1380. #endif
  1381. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1382. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1383. #endif
  1384. #if BUILD_COMPLEX==1
  1385. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1386. #endif
  1387. #if BUILD_COMPLEX16==1
  1388. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1389. #endif
  1390. #ifdef EXPRECISION
  1391. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1392. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1393. #endif
  1394. #endif
  1395. #ifdef NEHALEM
  1396. #ifdef DEBUG
  1397. fprintf(stderr, "Nehalem\n");
  1398. #endif
  1399. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1400. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1401. #endif
  1402. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1403. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1404. #endif
  1405. #if BUILD_COMPLEX
  1406. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1407. #endif
  1408. #if BUILD_COMPLEX16
  1409. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1410. #endif
  1411. #ifdef EXPRECISION
  1412. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1413. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1414. #endif
  1415. #endif
  1416. #ifdef SANDYBRIDGE
  1417. #ifdef DEBUG
  1418. fprintf(stderr, "Sandybridge\n");
  1419. #endif
  1420. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1421. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1422. #endif
  1423. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1424. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1425. #endif
  1426. #if BUILD_COMPLEX
  1427. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1428. #endif
  1429. #if BUILD_COMPLEX16
  1430. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1431. #endif
  1432. #ifdef EXPRECISION
  1433. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1434. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1435. #endif
  1436. #endif
  1437. #ifdef HASWELL
  1438. #ifdef DEBUG
  1439. fprintf(stderr, "Haswell\n");
  1440. #endif
  1441. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1442. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1443. #endif
  1444. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1445. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1446. #endif
  1447. #if BUILD_COMPLEX
  1448. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1449. #endif
  1450. #if BUILD_COMPLEX16
  1451. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1452. #endif
  1453. #ifdef EXPRECISION
  1454. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1455. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1456. #endif
  1457. #endif
  1458. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1459. #ifdef DEBUG
  1460. fprintf(stderr, "SkylakeX\n");
  1461. #endif
  1462. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1463. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1464. #endif
  1465. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1466. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1467. #endif
  1468. #if BUILD_COMPLEX
  1469. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1470. #endif
  1471. #if BUILD_COMPLEX16
  1472. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1473. #endif
  1474. #ifdef EXPRECISION
  1475. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1476. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1477. #endif
  1478. #endif
  1479. #ifdef OPTERON
  1480. #ifdef DEBUG
  1481. fprintf(stderr, "Opteron\n");
  1482. #endif
  1483. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1484. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1485. #endif
  1486. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1487. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1488. #endif
  1489. #if BUILD_COMPLEX
  1490. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1491. #endif
  1492. #if BUILD_COMPLEX16
  1493. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1494. #endif
  1495. #ifdef EXPRECISION
  1496. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1497. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1498. #endif
  1499. #endif
  1500. #ifdef BARCELONA
  1501. #ifdef DEBUG
  1502. fprintf(stderr, "Barcelona\n");
  1503. #endif
  1504. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1505. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1506. #endif
  1507. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1508. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1509. #endif
  1510. #if BUILD_COMPLEX
  1511. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1512. #endif
  1513. #if BUILD_COMPLEX16
  1514. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1515. #endif
  1516. #ifdef EXPRECISION
  1517. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1518. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1519. #endif
  1520. #endif
  1521. #ifdef BOBCAT
  1522. #ifdef DEBUG
  1523. fprintf(stderr, "Bobcate\n");
  1524. #endif
  1525. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1526. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1527. #endif
  1528. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1529. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1530. #endif
  1531. #if BUILD_COMPLEX
  1532. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1533. #endif
  1534. #if BUILD_COMPLEX16
  1535. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1536. #endif
  1537. #ifdef EXPRECISION
  1538. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1539. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1540. #endif
  1541. #endif
  1542. #ifdef BULLDOZER
  1543. #ifdef DEBUG
  1544. fprintf(stderr, "Bulldozer\n");
  1545. #endif
  1546. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1547. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1548. #endif
  1549. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1550. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1551. #endif
  1552. #if BUILD_COMPLEX
  1553. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1554. #endif
  1555. #if BUILD_COMPLEX16
  1556. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1557. #endif
  1558. #ifdef EXPRECISION
  1559. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1560. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1561. #endif
  1562. #endif
  1563. #ifdef EXCAVATOR
  1564. #ifdef DEBUG
  1565. fprintf(stderr, "Excavator\n");
  1566. #endif
  1567. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1568. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1569. #endif
  1570. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1571. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1572. #endif
  1573. #if BUILD_COMPLEX
  1574. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1575. #endif
  1576. #if BUILD_COMPLEX16
  1577. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1578. #endif
  1579. #ifdef EXPRECISION
  1580. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1581. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1582. #endif
  1583. #endif
  1584. #ifdef PILEDRIVER
  1585. #ifdef DEBUG
  1586. fprintf(stderr, "Piledriver\n");
  1587. #endif
  1588. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1589. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1590. #endif
  1591. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1592. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1593. #endif
  1594. #if BUILD_COMPLEX
  1595. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1596. #endif
  1597. #if BUILD_COMPLEX16
  1598. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1599. #endif
  1600. #ifdef EXPRECISION
  1601. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1602. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1603. #endif
  1604. #endif
  1605. #ifdef STEAMROLLER
  1606. #ifdef DEBUG
  1607. fprintf(stderr, "Steamroller\n");
  1608. #endif
  1609. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1610. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1611. #endif
  1612. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1613. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1614. #endif
  1615. #if BUILD_COMPLEX
  1616. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1617. #endif
  1618. #if BUILD_COMPLEX16
  1619. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1620. #endif
  1621. #ifdef EXPRECISION
  1622. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1623. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1624. #endif
  1625. #endif
  1626. #ifdef ZEN
  1627. #ifdef DEBUG
  1628. fprintf(stderr, "Zen\n");
  1629. #endif
  1630. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1631. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1632. #endif
  1633. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1634. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1635. #endif
  1636. #if BUILD_COMPLEX
  1637. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1638. #endif
  1639. #if BUILD_COMPLEX16
  1640. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1641. #endif
  1642. #ifdef EXPRECISION
  1643. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1644. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1645. #endif
  1646. #endif
  1647. #ifdef NANO
  1648. #ifdef DEBUG
  1649. fprintf(stderr, "NANO\n");
  1650. #endif
  1651. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1652. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1653. #endif
  1654. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1655. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1656. #endif
  1657. #if (BUILD_COMPLEX==1)
  1658. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1659. #endif
  1660. #if (BUILD_COMPLEX16==1)
  1661. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1662. #endif
  1663. #ifdef EXPRECISION
  1664. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1665. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1666. #endif
  1667. #endif
  1668. #ifdef SAPPHIRERAPIDS
  1669. #if (BUILD_BFLOAT16 == 1)
  1670. TABLE_NAME.need_amxtile_permission = 1;
  1671. #endif
  1672. #endif
  1673. #if BUILD_COMPLEX==1
  1674. #ifdef CGEMM3M_DEFAULT_P
  1675. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1676. #else
  1677. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1678. #endif
  1679. #endif
  1680. #if BUILD_COMPLEX16==1
  1681. #ifdef ZGEMM3M_DEFAULT_P
  1682. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1683. #else
  1684. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1685. #endif
  1686. #endif
  1687. #ifdef EXPRECISION
  1688. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1689. #endif
  1690. #if BUILD_SINGLE == 1
  1691. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1692. #endif
  1693. #if BUILD_DOUBLE== 1
  1694. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1695. #endif
  1696. #if BUILD_COMPLEX==1
  1697. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1698. #endif
  1699. #if BUILD_COMPLEX16==1
  1700. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1701. #endif
  1702. #if BUILD_COMPLEX==1
  1703. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1704. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1705. #else
  1706. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1707. #endif
  1708. #endif
  1709. #if BUILD_COMPLEX16==1
  1710. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1711. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1712. #else
  1713. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1714. #endif
  1715. #endif
  1716. #ifdef QUAD_PRECISION
  1717. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1718. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1719. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1720. #endif
  1721. #ifdef DEBUG
  1722. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1723. #endif
  1724. #if BUILD_BFLOAT16==1
  1725. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1726. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1727. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1728. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1729. #endif
  1730. #if BUILD_SINGLE==1
  1731. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1732. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1733. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1734. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1735. #endif
  1736. #if BUILD_DOUBLE==1
  1737. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1738. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1739. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1740. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1741. #endif
  1742. #ifdef EXPRECISION
  1743. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1744. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1745. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1746. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1747. #endif
  1748. #if BUILD_COMPLEX ==1
  1749. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1750. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1751. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1752. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1753. #endif
  1754. #if BUILD_COMPLEX16 ==1
  1755. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1756. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1757. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1758. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1759. #endif
  1760. #if BUILD_COMPLEX == 1
  1761. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1762. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1763. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1764. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1765. #endif
  1766. #if BUILD_COMPLEX16 == 1
  1767. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1768. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1769. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1770. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1771. #endif
  1772. #ifdef EXPRECISION
  1773. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1774. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1775. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1776. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1777. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1778. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1779. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1780. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1781. #endif
  1782. }
  1783. #endif //RISCV64
  1784. #endif //POWER
  1785. #endif //ZARCH
  1786. #endif //(ARCH_LOONGARCH64)
  1787. #endif //(ARCH_MIPS64)
  1788. #endif //(ARCH_ARM64)