You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 57 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  114. 0, 0, 0,
  115. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SGEMM_DEFAULT_UNROLL_MN
  117. SGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. #endif
  122. #ifdef HAVE_EXCLUSIVE_CACHE
  123. 1,
  124. #else
  125. 0,
  126. #endif
  127. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  128. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  129. #endif
  130. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  131. isamax_kTS,
  132. #endif
  133. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  134. isamin_kTS, ismax_kTS, ismin_kTS,
  135. snrm2_kTS, sasum_kTS,
  136. #endif
  137. #if BUILD_SINGLE == 1
  138. ssum_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. scopy_kTS, sdot_kTS,
  142. // dsdot_kTS,
  143. srot_kTS, saxpy_kTS,
  144. #endif
  145. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  146. sscal_kTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. sswap_kTS,
  150. sgemv_nTS, sgemv_tTS,
  151. #endif
  152. #if BUILD_SINGLE == 1
  153. sger_kTS,
  154. #endif
  155. #if BUILD_SINGLE == 1
  156. ssymv_LTS, ssymv_UTS,
  157. #endif
  158. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  159. #ifdef ARCH_X86_64
  160. sgemm_directTS,
  161. sgemm_direct_performantTS,
  162. #endif
  163. sgemm_kernelTS, sgemm_betaTS,
  164. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  165. sgemm_incopyTS, sgemm_itcopyTS,
  166. #else
  167. sgemm_oncopyTS, sgemm_otcopyTS,
  168. #endif
  169. sgemm_oncopyTS, sgemm_otcopyTS,
  170. #endif
  171. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  172. #ifdef SMALL_MATRIX_OPT
  173. sgemm_small_matrix_permitTS,
  174. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  175. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  176. #endif
  177. #endif
  178. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  179. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  180. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  181. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  182. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  183. #else
  184. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  185. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  186. #endif
  187. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  188. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  189. #endif
  190. #if (BUILD_SINGLE==1)
  191. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  192. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  193. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  194. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  195. #else
  196. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  197. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  198. #endif
  199. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  200. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  201. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  202. ssymm_iutcopyTS, ssymm_iltcopyTS,
  203. #else
  204. ssymm_outcopyTS, ssymm_oltcopyTS,
  205. #endif
  206. ssymm_outcopyTS, ssymm_oltcopyTS,
  207. #ifndef NO_LAPACK
  208. sneg_tcopyTS, slaswp_ncopyTS,
  209. #else
  210. NULL,NULL,
  211. #endif
  212. #endif
  213. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  214. 0, 0, 0,
  215. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  216. #ifdef DGEMM_DEFAULT_UNROLL_MN
  217. DGEMM_DEFAULT_UNROLL_MN,
  218. #else
  219. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  220. #endif
  221. #endif
  222. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  223. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  224. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  225. dnrm2_kTS, dasum_kTS,
  226. #endif
  227. #if (BUILD_DOUBLE==1)
  228. dsum_kTS,
  229. #endif
  230. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  231. dcopy_kTS, ddot_kTS,
  232. #endif
  233. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  234. dsdot_kTS,
  235. #endif
  236. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  237. drot_kTS,
  238. daxpy_kTS,
  239. dscal_kTS,
  240. dswap_kTS,
  241. dgemv_nTS, dgemv_tTS,
  242. #endif
  243. #if (BUILD_DOUBLE==1)
  244. dger_kTS,
  245. dsymv_LTS, dsymv_UTS,
  246. #endif
  247. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  248. dgemm_kernelTS, dgemm_betaTS,
  249. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  250. dgemm_incopyTS, dgemm_itcopyTS,
  251. #else
  252. dgemm_oncopyTS, dgemm_otcopyTS,
  253. #endif
  254. dgemm_oncopyTS, dgemm_otcopyTS,
  255. #endif
  256. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  257. #ifdef SMALL_MATRIX_OPT
  258. dgemm_small_matrix_permitTS,
  259. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  260. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  261. #endif
  262. #endif
  263. #if (BUILD_DOUBLE==1)
  264. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  265. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  266. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  267. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  268. #else
  269. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  270. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  271. #endif
  272. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  273. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  274. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  275. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  276. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  277. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  278. #else
  279. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  280. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  281. #endif
  282. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  283. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  284. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  285. dsymm_iutcopyTS, dsymm_iltcopyTS,
  286. #else
  287. dsymm_outcopyTS, dsymm_oltcopyTS,
  288. #endif
  289. dsymm_outcopyTS, dsymm_oltcopyTS,
  290. #ifndef NO_LAPACK
  291. dneg_tcopyTS, dlaswp_ncopyTS,
  292. #else
  293. NULL, NULL,
  294. #endif
  295. #endif
  296. #ifdef EXPRECISION
  297. 0, 0, 0,
  298. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  299. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  300. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  301. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  302. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  303. qgemv_nTS, qgemv_tTS, qger_kTS,
  304. qsymv_LTS, qsymv_UTS,
  305. qgemm_kernelTS, qgemm_betaTS,
  306. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  307. qgemm_incopyTS, qgemm_itcopyTS,
  308. #else
  309. qgemm_oncopyTS, qgemm_otcopyTS,
  310. #endif
  311. qgemm_oncopyTS, qgemm_otcopyTS,
  312. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  313. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  314. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  315. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  316. #else
  317. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  318. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  319. #endif
  320. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  321. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  322. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  323. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  324. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  325. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  326. #else
  327. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  328. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  329. #endif
  330. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  331. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  332. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  333. qsymm_iutcopyTS, qsymm_iltcopyTS,
  334. #else
  335. qsymm_outcopyTS, qsymm_oltcopyTS,
  336. #endif
  337. qsymm_outcopyTS, qsymm_oltcopyTS,
  338. #ifndef NO_LAPACK
  339. qneg_tcopyTS, qlaswp_ncopyTS,
  340. #else
  341. NULL, NULL,
  342. #endif
  343. #endif
  344. #if (BUILD_COMPLEX)
  345. 0, 0, 0,
  346. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  347. #ifdef CGEMM_DEFAULT_UNROLL_MN
  348. CGEMM_DEFAULT_UNROLL_MN,
  349. #else
  350. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  351. #endif
  352. #if (BUILD_COMPLEX)
  353. camax_kTS, camin_kTS,
  354. #endif
  355. #if (BUILD_COMPLEX)
  356. icamax_kTS,
  357. #endif
  358. #if (BUILD_COMPLEX)
  359. icamin_kTS,
  360. cnrm2_kTS, casum_kTS, csum_kTS,
  361. #endif
  362. #if (BUILD_COMPLEX)
  363. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  364. #endif
  365. #if (BUILD_COMPLEX)
  366. csrot_kTS,
  367. #endif
  368. #if (BUILD_COMPLEX)
  369. caxpy_kTS,
  370. caxpyc_kTS,
  371. cscal_kTS,
  372. cswap_kTS,
  373. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  374. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  375. #endif
  376. #if (BUILD_COMPLEX)
  377. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  378. csymv_LTS, csymv_UTS,
  379. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  380. #endif
  381. #if (BUILD_COMPLEX)
  382. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  383. cgemm_betaTS,
  384. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  385. cgemm_incopyTS, cgemm_itcopyTS,
  386. #else
  387. cgemm_oncopyTS, cgemm_otcopyTS,
  388. #endif
  389. cgemm_oncopyTS, cgemm_otcopyTS,
  390. #ifdef SMALL_MATRIX_OPT
  391. cgemm_small_matrix_permitTS,
  392. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  393. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  394. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  395. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  396. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  397. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  398. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  399. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  400. #endif
  401. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  402. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  403. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  404. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  405. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  406. #else
  407. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  408. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  409. #endif
  410. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  411. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  412. #endif
  413. #endif
  414. #if (BUILD_COMPLEX)
  415. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  416. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  417. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  418. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  419. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  420. #else
  421. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  422. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  423. #endif
  424. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  425. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  426. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  427. csymm_iutcopyTS, csymm_iltcopyTS,
  428. #else
  429. csymm_outcopyTS, csymm_oltcopyTS,
  430. #endif
  431. csymm_outcopyTS, csymm_oltcopyTS,
  432. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  433. chemm_iutcopyTS, chemm_iltcopyTS,
  434. #else
  435. chemm_outcopyTS, chemm_oltcopyTS,
  436. #endif
  437. chemm_outcopyTS, chemm_oltcopyTS,
  438. 0, 0, 0,
  439. #if (USE_GEMM3M)
  440. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  441. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  442. #else
  443. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  444. #endif
  445. cgemm3m_kernelTS,
  446. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  447. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  448. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  449. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  450. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  451. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  452. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  453. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  454. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  455. csymm3m_oucopybTS, csymm3m_olcopybTS,
  456. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  457. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  458. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  459. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  460. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  461. chemm3m_oucopybTS, chemm3m_olcopybTS,
  462. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  463. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  464. #else
  465. 0, 0, 0,
  466. NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. NULL, NULL,
  483. NULL, NULL,
  484. NULL, NULL,
  485. #endif
  486. #endif
  487. #if (BUILD_COMPLEX)
  488. #ifndef NO_LAPACK
  489. cneg_tcopyTS,
  490. claswp_ncopyTS,
  491. #else
  492. NULL, NULL,
  493. #endif
  494. #endif
  495. #if BUILD_COMPLEX16 == 1
  496. 0, 0, 0,
  497. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  498. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  499. ZGEMM_DEFAULT_UNROLL_MN,
  500. #else
  501. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  502. #endif
  503. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  504. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  505. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  506. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  507. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  508. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  509. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  510. zsymv_LTS, zsymv_UTS,
  511. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  512. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  513. zgemm_betaTS,
  514. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  515. zgemm_incopyTS, zgemm_itcopyTS,
  516. #else
  517. zgemm_oncopyTS, zgemm_otcopyTS,
  518. #endif
  519. zgemm_oncopyTS, zgemm_otcopyTS,
  520. #ifdef SMALL_MATRIX_OPT
  521. zgemm_small_matrix_permitTS,
  522. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  523. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  524. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  525. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  526. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  527. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  528. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  529. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  530. #endif
  531. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  532. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  533. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  534. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  535. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  536. #else
  537. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  538. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  539. #endif
  540. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  541. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  542. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  543. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  544. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  545. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  546. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  547. #else
  548. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  549. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  550. #endif
  551. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  552. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  553. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  554. zsymm_iutcopyTS, zsymm_iltcopyTS,
  555. #else
  556. zsymm_outcopyTS, zsymm_oltcopyTS,
  557. #endif
  558. zsymm_outcopyTS, zsymm_oltcopyTS,
  559. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  560. zhemm_iutcopyTS, zhemm_iltcopyTS,
  561. #else
  562. zhemm_outcopyTS, zhemm_oltcopyTS,
  563. #endif
  564. zhemm_outcopyTS, zhemm_oltcopyTS,
  565. 0, 0, 0,
  566. #if (USE_GEMM3M)
  567. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  568. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  569. #else
  570. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  571. #endif
  572. zgemm3m_kernelTS,
  573. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  574. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  575. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  576. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  577. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  578. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  579. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  580. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  581. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  582. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  583. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  584. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  585. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  586. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  587. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  588. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  589. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  590. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  591. #else
  592. 0, 0, 0,
  593. NULL,
  594. NULL, NULL,
  595. NULL, NULL,
  596. NULL, NULL,
  597. NULL, NULL,
  598. NULL, NULL,
  599. NULL, NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. NULL, NULL,
  610. NULL, NULL,
  611. NULL, NULL,
  612. #endif
  613. #ifndef NO_LAPACK
  614. zneg_tcopyTS, zlaswp_ncopyTS,
  615. #else
  616. NULL, NULL,
  617. #endif
  618. #endif
  619. #ifdef EXPRECISION
  620. 0, 0, 0,
  621. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  622. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  623. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  624. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  625. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  626. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  627. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  628. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  629. xsymv_LTS, xsymv_UTS,
  630. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  631. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  632. xgemm_betaTS,
  633. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  634. xgemm_incopyTS, xgemm_itcopyTS,
  635. #else
  636. xgemm_oncopyTS, xgemm_otcopyTS,
  637. #endif
  638. xgemm_oncopyTS, xgemm_otcopyTS,
  639. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  640. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  641. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  642. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  643. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  644. #else
  645. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  646. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  647. #endif
  648. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  649. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  650. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  651. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  652. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  653. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  654. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  655. #else
  656. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  657. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  658. #endif
  659. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  660. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  661. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  662. xsymm_iutcopyTS, xsymm_iltcopyTS,
  663. #else
  664. xsymm_outcopyTS, xsymm_oltcopyTS,
  665. #endif
  666. xsymm_outcopyTS, xsymm_oltcopyTS,
  667. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  668. xhemm_iutcopyTS, xhemm_iltcopyTS,
  669. #else
  670. xhemm_outcopyTS, xhemm_oltcopyTS,
  671. #endif
  672. xhemm_outcopyTS, xhemm_oltcopyTS,
  673. 0, 0, 0,
  674. #if (USE_GEMM3M)
  675. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  676. xgemm3m_kernelTS,
  677. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  678. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  679. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  680. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  681. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  682. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  683. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  684. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  685. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  686. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  687. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  688. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  689. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  690. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  691. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  692. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  693. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  694. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  695. #else
  696. 0, 0, 0,
  697. NULL,
  698. NULL, NULL,
  699. NULL, NULL,
  700. NULL, NULL,
  701. NULL, NULL,
  702. NULL, NULL,
  703. NULL, NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. NULL, NULL,
  714. NULL, NULL,
  715. NULL, NULL,
  716. #endif
  717. #ifndef NO_LAPACK
  718. xneg_tcopyTS, xlaswp_ncopyTS,
  719. #else
  720. NULL, NULL,
  721. #endif
  722. #endif
  723. init_parameter,
  724. SNUMOPT, DNUMOPT, QNUMOPT,
  725. #if BUILD_SINGLE == 1
  726. saxpby_kTS,
  727. #endif
  728. #if BUILD_DOUBLE == 1
  729. daxpby_kTS,
  730. #endif
  731. #if BUILD_COMPLEX == 1
  732. caxpby_kTS,
  733. #endif
  734. #if BUILD_COMPLEX16== 1
  735. zaxpby_kTS,
  736. #endif
  737. #if BUILD_SINGLE == 1
  738. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  739. #endif
  740. #if BUILD_DOUBLE== 1
  741. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  742. #endif
  743. #if BUILD_COMPLEX == 1
  744. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  745. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  746. #endif
  747. #if BUILD_COMPLEX16 == 1
  748. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  749. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  750. #endif
  751. #if BUILD_SINGLE == 1
  752. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  753. #endif
  754. #if BUILD_DOUBLE== 1
  755. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  756. #endif
  757. #if BUILD_COMPLEX== 1
  758. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  759. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  760. #endif
  761. #if BUILD_COMPLEX16==1
  762. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  763. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  764. #endif
  765. #if BUILD_SINGLE == 1
  766. sgeadd_kTS,
  767. #endif
  768. #if BUILD_DOUBLE==1
  769. dgeadd_kTS,
  770. #endif
  771. #if BUILD_COMPLEX==1
  772. cgeadd_kTS,
  773. #endif
  774. #if BUILD_COMPLEX16==1
  775. zgeadd_kTS,
  776. #endif
  777. };
  778. #if (ARCH_ARM64)
  779. static void init_parameter(void) {
  780. #if (BUILD_BFLOAT16)
  781. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  782. #endif
  783. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  784. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  785. #endif
  786. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  787. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  788. #endif
  789. #if BUILD_COMPLEX==1
  790. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  791. #endif
  792. #if BUILD_COMPLEX16==1
  793. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  794. #endif
  795. #if (BUILD_BFLOAT16)
  796. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  797. #endif
  798. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  799. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  800. #endif
  801. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  802. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  803. #endif
  804. #if BUILD_COMPLEX== 1
  805. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  806. #endif
  807. #if BUILD_COMPLEX16==1
  808. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  809. #endif
  810. #if (BUILD_BFLOAT16)
  811. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  812. #endif
  813. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  814. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  815. #endif
  816. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  817. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  818. #endif
  819. #if BUILD_COMPLEX==1
  820. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  821. #endif
  822. #if BUILD_COMPLEX16==1
  823. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  824. #endif
  825. #ifdef EXPRECISION
  826. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  827. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  828. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  829. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  830. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  831. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  832. #endif
  833. #if (USE_GEMM3M)
  834. #ifdef CGEMM3M_DEFAULT_P
  835. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  836. #else
  837. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  838. #endif
  839. #ifdef ZGEMM3M_DEFAULT_P
  840. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  841. #else
  842. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  843. #endif
  844. #ifdef CGEMM3M_DEFAULT_Q
  845. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  846. #else
  847. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  848. #endif
  849. #ifdef ZGEMM3M_DEFAULT_Q
  850. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  851. #else
  852. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  853. #endif
  854. #ifdef CGEMM3M_DEFAULT_R
  855. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  856. #else
  857. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  858. #endif
  859. #ifdef ZGEMM3M_DEFAULT_R
  860. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  861. #else
  862. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  863. #endif
  864. #ifdef EXPRECISION
  865. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  866. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  867. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  868. #endif
  869. #endif
  870. }
  871. #else // (ARCH_ARM64)
  872. #if defined(ARCH_MIPS64)
  873. static void init_parameter(void) {
  874. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  875. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  876. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  877. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  878. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  879. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  880. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  881. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  882. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  883. TABLE_NAME.dgemm_r = 640;
  884. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  885. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  886. #ifdef EXPRECISION
  887. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  888. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  889. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  890. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  891. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  892. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  893. #endif
  894. #if defined(USE_GEMM3M)
  895. #ifdef CGEMM3M_DEFAULT_P
  896. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  897. #else
  898. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  899. #endif
  900. #ifdef ZGEMM3M_DEFAULT_P
  901. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  902. #else
  903. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  904. #endif
  905. #ifdef CGEMM3M_DEFAULT_Q
  906. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  907. #else
  908. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  909. #endif
  910. #ifdef ZGEMM3M_DEFAULT_Q
  911. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  912. #else
  913. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  914. #endif
  915. #ifdef CGEMM3M_DEFAULT_R
  916. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  917. #else
  918. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  919. #endif
  920. #ifdef ZGEMM3M_DEFAULT_R
  921. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  922. #else
  923. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  924. #endif
  925. #ifdef EXPRECISION
  926. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  927. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  928. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  929. #endif
  930. #endif
  931. }
  932. #else // (ARCH_MIPS64)
  933. #if (ARCH_LOONGARCH64)
  934. static int get_L3_size() {
  935. int ret = 0, id = 0x14;
  936. __asm__ volatile (
  937. "cpucfg %[ret], %[id]"
  938. : [ret]"=r"(ret)
  939. : [id]"r"(id)
  940. : "memory"
  941. );
  942. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  943. }
  944. static void init_parameter(void) {
  945. #ifdef BUILD_BFLOAT16
  946. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  947. #endif
  948. #ifdef BUILD_BFLOAT16
  949. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  950. #endif
  951. #if defined(LA464)
  952. int L3_size = get_L3_size();
  953. #ifdef SMP
  954. if(blas_num_threads == 1){
  955. #endif
  956. //single thread
  957. if (L3_size == 32){ // 3C5000 and 3D5000
  958. TABLE_NAME.sgemm_p = 256;
  959. TABLE_NAME.sgemm_q = 384;
  960. TABLE_NAME.sgemm_r = 8192;
  961. TABLE_NAME.dgemm_p = 112;
  962. TABLE_NAME.dgemm_q = 289;
  963. TABLE_NAME.dgemm_r = 4096;
  964. TABLE_NAME.cgemm_p = 128;
  965. TABLE_NAME.cgemm_q = 256;
  966. TABLE_NAME.cgemm_r = 4096;
  967. TABLE_NAME.zgemm_p = 128;
  968. TABLE_NAME.zgemm_q = 128;
  969. TABLE_NAME.zgemm_r = 2048;
  970. } else { // 3A5000 and 3C5000L
  971. TABLE_NAME.sgemm_p = 256;
  972. TABLE_NAME.sgemm_q = 384;
  973. TABLE_NAME.sgemm_r = 4096;
  974. TABLE_NAME.dgemm_p = 112;
  975. TABLE_NAME.dgemm_q = 300;
  976. TABLE_NAME.dgemm_r = 3024;
  977. TABLE_NAME.cgemm_p = 128;
  978. TABLE_NAME.cgemm_q = 256;
  979. TABLE_NAME.cgemm_r = 2048;
  980. TABLE_NAME.zgemm_p = 128;
  981. TABLE_NAME.zgemm_q = 128;
  982. TABLE_NAME.zgemm_r = 1024;
  983. }
  984. #ifdef SMP
  985. }else{
  986. //multi thread
  987. if (L3_size == 32){ // 3C5000 and 3D5000
  988. TABLE_NAME.sgemm_p = 256;
  989. TABLE_NAME.sgemm_q = 384;
  990. TABLE_NAME.sgemm_r = 1024;
  991. TABLE_NAME.dgemm_p = 112;
  992. TABLE_NAME.dgemm_q = 289;
  993. TABLE_NAME.dgemm_r = 342;
  994. TABLE_NAME.cgemm_p = 128;
  995. TABLE_NAME.cgemm_q = 256;
  996. TABLE_NAME.cgemm_r = 512;
  997. TABLE_NAME.zgemm_p = 128;
  998. TABLE_NAME.zgemm_q = 128;
  999. TABLE_NAME.zgemm_r = 512;
  1000. } else { // 3A5000 and 3C5000L
  1001. TABLE_NAME.sgemm_p = 256;
  1002. TABLE_NAME.sgemm_q = 384;
  1003. TABLE_NAME.sgemm_r = 2048;
  1004. TABLE_NAME.dgemm_p = 112;
  1005. TABLE_NAME.dgemm_q = 300;
  1006. TABLE_NAME.dgemm_r = 738;
  1007. TABLE_NAME.cgemm_p = 128;
  1008. TABLE_NAME.cgemm_q = 256;
  1009. TABLE_NAME.cgemm_r = 1024;
  1010. TABLE_NAME.zgemm_p = 128;
  1011. TABLE_NAME.zgemm_q = 128;
  1012. TABLE_NAME.zgemm_r = 1024;
  1013. }
  1014. }
  1015. #endif
  1016. #else
  1017. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1018. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1019. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1020. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1021. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1022. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1023. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1024. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1025. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1026. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1027. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1028. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1029. #endif
  1030. #ifdef BUILD_BFLOAT16
  1031. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1032. #endif
  1033. }
  1034. #else // (ARCH_LOONGARCH64)
  1035. #if (ARCH_POWER)
  1036. static void init_parameter(void) {
  1037. #ifdef BUILD_BFLOAT16
  1038. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1039. #endif
  1040. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1041. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1042. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1043. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1044. #ifdef BUILD_BFLOAT16
  1045. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1046. #endif
  1047. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1048. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1049. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1050. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1051. #ifdef BUILD_BFLOAT16
  1052. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1053. #endif
  1054. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1055. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1056. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1057. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1058. }
  1059. #else //POWER
  1060. #if (ARCH_ZARCH)
  1061. static void init_parameter(void) {
  1062. #ifdef BUILD_BFLOAT16
  1063. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1064. #endif
  1065. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1066. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1067. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1068. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1069. #ifdef BUILD_BFLOAT16
  1070. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1071. #endif
  1072. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1073. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1074. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1075. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1076. #ifdef BUILD_BFLOAT16
  1077. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1078. #endif
  1079. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1080. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1081. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1082. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1083. }
  1084. #else //ZARCH
  1085. #if (ARCH_RISCV64)
  1086. static void init_parameter(void) {
  1087. #ifdef BUILD_BFLOAT16
  1088. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1089. #endif
  1090. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1091. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1092. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1093. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1094. #ifdef BUILD_BFLOAT16
  1095. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1096. #endif
  1097. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1098. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1099. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1100. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1101. #ifdef BUILD_BFLOAT16
  1102. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1103. #endif
  1104. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1105. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1106. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1107. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1108. }
  1109. #else //RISCV64
  1110. #ifdef ARCH_X86
  1111. static int get_l2_size_old(void){
  1112. int i, eax, ebx, ecx, edx, cpuid_level;
  1113. int info[15];
  1114. cpuid(2, &eax, &ebx, &ecx, &edx);
  1115. info[ 0] = BITMASK(eax, 8, 0xff);
  1116. info[ 1] = BITMASK(eax, 16, 0xff);
  1117. info[ 2] = BITMASK(eax, 24, 0xff);
  1118. info[ 3] = BITMASK(ebx, 0, 0xff);
  1119. info[ 4] = BITMASK(ebx, 8, 0xff);
  1120. info[ 5] = BITMASK(ebx, 16, 0xff);
  1121. info[ 6] = BITMASK(ebx, 24, 0xff);
  1122. info[ 7] = BITMASK(ecx, 0, 0xff);
  1123. info[ 8] = BITMASK(ecx, 8, 0xff);
  1124. info[ 9] = BITMASK(ecx, 16, 0xff);
  1125. info[10] = BITMASK(ecx, 24, 0xff);
  1126. info[11] = BITMASK(edx, 0, 0xff);
  1127. info[12] = BITMASK(edx, 8, 0xff);
  1128. info[13] = BITMASK(edx, 16, 0xff);
  1129. info[14] = BITMASK(edx, 24, 0xff);
  1130. for (i = 0; i < 15; i++){
  1131. switch (info[i]){
  1132. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1133. case 0x1a :
  1134. return 96;
  1135. case 0x39 :
  1136. case 0x3b :
  1137. case 0x41 :
  1138. case 0x79 :
  1139. case 0x81 :
  1140. return 128;
  1141. case 0x3a :
  1142. return 192;
  1143. case 0x21 :
  1144. case 0x3c :
  1145. case 0x42 :
  1146. case 0x7a :
  1147. case 0x7e :
  1148. case 0x82 :
  1149. return 256;
  1150. case 0x3d :
  1151. return 384;
  1152. case 0x3e :
  1153. case 0x43 :
  1154. case 0x7b :
  1155. case 0x7f :
  1156. case 0x83 :
  1157. case 0x86 :
  1158. return 512;
  1159. case 0x44 :
  1160. case 0x78 :
  1161. case 0x7c :
  1162. case 0x84 :
  1163. case 0x87 :
  1164. return 1024;
  1165. case 0x45 :
  1166. case 0x7d :
  1167. case 0x85 :
  1168. return 2048;
  1169. case 0x48 :
  1170. return 3184;
  1171. case 0x49 :
  1172. return 4096;
  1173. case 0x4e :
  1174. return 6144;
  1175. }
  1176. }
  1177. // return 0;
  1178. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1179. return 256;
  1180. }
  1181. #endif
  1182. static __inline__ int get_l2_size(void){
  1183. int eax, ebx, ecx, edx, l2;
  1184. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1185. if (l2 != 0)
  1186. return l2;
  1187. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1188. l2 = BITMASK(ecx, 16, 0xffff);
  1189. #ifndef ARCH_X86
  1190. if (l2 <= 0) {
  1191. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1192. return 256;
  1193. }
  1194. return l2;
  1195. #else
  1196. if (l2 > 0) return l2;
  1197. return get_l2_size_old();
  1198. #endif
  1199. }
  1200. static __inline__ int get_l3_size(void){
  1201. int eax, ebx, ecx, edx;
  1202. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1203. return BITMASK(edx, 18, 0x3fff) * 512;
  1204. }
  1205. static void init_parameter(void) {
  1206. int l2 = get_l2_size();
  1207. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1208. /* where the GEMM unrolling parameters do not depend on l2 */
  1209. #ifdef BUILD_BFLOAT16
  1210. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1211. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1212. #endif
  1213. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1214. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1215. #endif
  1216. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1217. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1218. #endif
  1219. #if BUILD_COMPLEX == 1
  1220. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1221. #endif
  1222. #if BUILD_COMPLEX16==1
  1223. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1224. #endif
  1225. #if BUILD_COMPLEX == 1
  1226. #ifdef CGEMM3M_DEFAULT_Q
  1227. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1228. #else
  1229. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1230. #endif
  1231. #endif
  1232. #if BUILD_COMPLEX16 == 1
  1233. #ifdef ZGEMM3M_DEFAULT_Q
  1234. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1235. #else
  1236. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1237. #endif
  1238. #endif
  1239. #ifdef EXPRECISION
  1240. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1241. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1242. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1243. #endif
  1244. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1245. #ifdef DEBUG
  1246. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1247. #endif
  1248. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1249. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1250. #endif
  1251. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1252. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1253. #endif
  1254. #if BUILD_COMPLEX==1
  1255. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1256. #endif
  1257. #if BUILD_COMPLEX16==1
  1258. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1259. #endif
  1260. #ifdef EXPRECISION
  1261. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1262. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1263. #endif
  1264. #endif
  1265. #ifdef CORE_NORTHWOOD
  1266. #ifdef DEBUG
  1267. fprintf(stderr, "Northwood\n");
  1268. #endif
  1269. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1270. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1271. #endif
  1272. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1273. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1274. #endif
  1275. #if BUILD_COMPLEX==1
  1276. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1277. #endif
  1278. #if BUILD_COMPLEX16==1
  1279. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1280. #endif
  1281. #ifdef EXPRECISION
  1282. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1283. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1284. #endif
  1285. #endif
  1286. #ifdef ATOM
  1287. #ifdef DEBUG
  1288. fprintf(stderr, "Atom\n");
  1289. #endif
  1290. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1291. TABLE_NAME.sgemm_p = 256;
  1292. #endif
  1293. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1294. TABLE_NAME.dgemm_p = 128;
  1295. #endif
  1296. #if BUILD_COMPLEX==1
  1297. TABLE_NAME.cgemm_p = 128;
  1298. #endif
  1299. #if BUILD_COMPLEX16==1
  1300. TABLE_NAME.zgemm_p = 64;
  1301. #endif
  1302. #ifdef EXPRECISION
  1303. TABLE_NAME.qgemm_p = 64;
  1304. TABLE_NAME.xgemm_p = 32;
  1305. #endif
  1306. #endif
  1307. #ifdef CORE_PRESCOTT
  1308. #ifdef DEBUG
  1309. fprintf(stderr, "Prescott\n");
  1310. #endif
  1311. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1312. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1313. #endif
  1314. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1315. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1316. #endif
  1317. #if BUILD_COMPLEX==1
  1318. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1319. #endif
  1320. #if BUILD_COMPLEX16 == 1
  1321. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1322. #endif
  1323. #ifdef EXPRECISION
  1324. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1325. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1326. #endif
  1327. #endif
  1328. #ifdef CORE2
  1329. #ifdef DEBUG
  1330. fprintf(stderr, "Core2\n");
  1331. #endif
  1332. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1333. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1334. #endif
  1335. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1336. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1337. #endif
  1338. #if BUILD_COMPLEX==1
  1339. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1340. #endif
  1341. #if BUILD_COMPLEX16==1
  1342. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1343. #endif
  1344. #ifdef EXPRECISION
  1345. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1346. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1347. #endif
  1348. #endif
  1349. #ifdef PENRYN
  1350. #ifdef DEBUG
  1351. fprintf(stderr, "Penryn\n");
  1352. #endif
  1353. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1354. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1355. #endif
  1356. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1357. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1358. #endif
  1359. #if BUILD_COMPLEX==1
  1360. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1361. #endif
  1362. #if BUILD_COMPLEX16==1
  1363. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1364. #endif
  1365. #ifdef EXPRECISION
  1366. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1367. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1368. #endif
  1369. #endif
  1370. #ifdef DUNNINGTON
  1371. #ifdef DEBUG
  1372. fprintf(stderr, "Dunnington\n");
  1373. #endif
  1374. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1375. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1376. #endif
  1377. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1378. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1379. #endif
  1380. #if BUILD_COMPLEX==1
  1381. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1382. #endif
  1383. #if BUILD_COMPLEX16==1
  1384. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1385. #endif
  1386. #ifdef EXPRECISION
  1387. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1388. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1389. #endif
  1390. #endif
  1391. #ifdef NEHALEM
  1392. #ifdef DEBUG
  1393. fprintf(stderr, "Nehalem\n");
  1394. #endif
  1395. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1396. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1397. #endif
  1398. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1399. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1400. #endif
  1401. #if BUILD_COMPLEX
  1402. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1403. #endif
  1404. #if BUILD_COMPLEX16
  1405. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1406. #endif
  1407. #ifdef EXPRECISION
  1408. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1409. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1410. #endif
  1411. #endif
  1412. #ifdef SANDYBRIDGE
  1413. #ifdef DEBUG
  1414. fprintf(stderr, "Sandybridge\n");
  1415. #endif
  1416. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1417. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1418. #endif
  1419. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1420. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1421. #endif
  1422. #if BUILD_COMPLEX
  1423. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1424. #endif
  1425. #if BUILD_COMPLEX16
  1426. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1427. #endif
  1428. #ifdef EXPRECISION
  1429. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1430. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1431. #endif
  1432. #endif
  1433. #ifdef HASWELL
  1434. #ifdef DEBUG
  1435. fprintf(stderr, "Haswell\n");
  1436. #endif
  1437. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1438. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1439. #endif
  1440. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1441. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1442. #endif
  1443. #if BUILD_COMPLEX
  1444. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1445. #endif
  1446. #if BUILD_COMPLEX16
  1447. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1448. #endif
  1449. #ifdef EXPRECISION
  1450. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1451. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1452. #endif
  1453. #endif
  1454. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1455. #ifdef DEBUG
  1456. fprintf(stderr, "SkylakeX\n");
  1457. #endif
  1458. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1459. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1460. #endif
  1461. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1462. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1463. #endif
  1464. #if BUILD_COMPLEX
  1465. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1466. #endif
  1467. #if BUILD_COMPLEX16
  1468. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1469. #endif
  1470. #ifdef EXPRECISION
  1471. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1472. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1473. #endif
  1474. #endif
  1475. #ifdef OPTERON
  1476. #ifdef DEBUG
  1477. fprintf(stderr, "Opteron\n");
  1478. #endif
  1479. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1480. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1481. #endif
  1482. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1483. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1484. #endif
  1485. #if BUILD_COMPLEX
  1486. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1487. #endif
  1488. #if BUILD_COMPLEX16
  1489. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1490. #endif
  1491. #ifdef EXPRECISION
  1492. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1493. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1494. #endif
  1495. #endif
  1496. #ifdef BARCELONA
  1497. #ifdef DEBUG
  1498. fprintf(stderr, "Barcelona\n");
  1499. #endif
  1500. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1501. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1502. #endif
  1503. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1504. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1505. #endif
  1506. #if BUILD_COMPLEX
  1507. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1508. #endif
  1509. #if BUILD_COMPLEX16
  1510. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1511. #endif
  1512. #ifdef EXPRECISION
  1513. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1514. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1515. #endif
  1516. #endif
  1517. #ifdef BOBCAT
  1518. #ifdef DEBUG
  1519. fprintf(stderr, "Bobcate\n");
  1520. #endif
  1521. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1522. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1523. #endif
  1524. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1525. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1526. #endif
  1527. #if BUILD_COMPLEX
  1528. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1529. #endif
  1530. #if BUILD_COMPLEX16
  1531. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1532. #endif
  1533. #ifdef EXPRECISION
  1534. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1535. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1536. #endif
  1537. #endif
  1538. #ifdef BULLDOZER
  1539. #ifdef DEBUG
  1540. fprintf(stderr, "Bulldozer\n");
  1541. #endif
  1542. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1543. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1544. #endif
  1545. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1546. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1547. #endif
  1548. #if BUILD_COMPLEX
  1549. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1550. #endif
  1551. #if BUILD_COMPLEX16
  1552. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1553. #endif
  1554. #ifdef EXPRECISION
  1555. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1556. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1557. #endif
  1558. #endif
  1559. #ifdef EXCAVATOR
  1560. #ifdef DEBUG
  1561. fprintf(stderr, "Excavator\n");
  1562. #endif
  1563. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1564. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1565. #endif
  1566. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1567. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1568. #endif
  1569. #if BUILD_COMPLEX
  1570. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1571. #endif
  1572. #if BUILD_COMPLEX16
  1573. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1574. #endif
  1575. #ifdef EXPRECISION
  1576. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1577. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1578. #endif
  1579. #endif
  1580. #ifdef PILEDRIVER
  1581. #ifdef DEBUG
  1582. fprintf(stderr, "Piledriver\n");
  1583. #endif
  1584. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1585. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1586. #endif
  1587. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1588. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1589. #endif
  1590. #if BUILD_COMPLEX
  1591. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1592. #endif
  1593. #if BUILD_COMPLEX16
  1594. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1595. #endif
  1596. #ifdef EXPRECISION
  1597. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1598. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1599. #endif
  1600. #endif
  1601. #ifdef STEAMROLLER
  1602. #ifdef DEBUG
  1603. fprintf(stderr, "Steamroller\n");
  1604. #endif
  1605. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1606. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1607. #endif
  1608. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1609. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1610. #endif
  1611. #if BUILD_COMPLEX
  1612. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1613. #endif
  1614. #if BUILD_COMPLEX16
  1615. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1616. #endif
  1617. #ifdef EXPRECISION
  1618. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1619. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1620. #endif
  1621. #endif
  1622. #ifdef ZEN
  1623. #ifdef DEBUG
  1624. fprintf(stderr, "Zen\n");
  1625. #endif
  1626. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1627. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1628. #endif
  1629. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1630. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1631. #endif
  1632. #if BUILD_COMPLEX
  1633. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1634. #endif
  1635. #if BUILD_COMPLEX16
  1636. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1637. #endif
  1638. #ifdef EXPRECISION
  1639. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1640. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1641. #endif
  1642. #endif
  1643. #ifdef NANO
  1644. #ifdef DEBUG
  1645. fprintf(stderr, "NANO\n");
  1646. #endif
  1647. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1648. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1649. #endif
  1650. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1651. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1652. #endif
  1653. #if (BUILD_COMPLEX==1)
  1654. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1655. #endif
  1656. #if (BUILD_COMPLEX16==1)
  1657. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1658. #endif
  1659. #ifdef EXPRECISION
  1660. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1661. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1662. #endif
  1663. #endif
  1664. #ifdef SAPPHIRERAPIDS
  1665. #if (BUILD_BFLOAT16 == 1)
  1666. TABLE_NAME.need_amxtile_permission = 1;
  1667. #endif
  1668. #endif
  1669. #if BUILD_COMPLEX==1
  1670. #ifdef CGEMM3M_DEFAULT_P
  1671. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1672. #else
  1673. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1674. #endif
  1675. #endif
  1676. #if BUILD_COMPLEX16==1
  1677. #ifdef ZGEMM3M_DEFAULT_P
  1678. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1679. #else
  1680. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1681. #endif
  1682. #endif
  1683. #ifdef EXPRECISION
  1684. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1685. #endif
  1686. #if BUILD_SINGLE == 1
  1687. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1688. #endif
  1689. #if BUILD_DOUBLE== 1
  1690. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1691. #endif
  1692. #if BUILD_COMPLEX==1
  1693. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1694. #endif
  1695. #if BUILD_COMPLEX16==1
  1696. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1697. #endif
  1698. #if BUILD_COMPLEX==1
  1699. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1700. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1701. #else
  1702. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1703. #endif
  1704. #endif
  1705. #if BUILD_COMPLEX16==1
  1706. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1707. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1708. #else
  1709. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1710. #endif
  1711. #endif
  1712. #ifdef QUAD_PRECISION
  1713. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1714. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1715. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1716. #endif
  1717. #ifdef DEBUG
  1718. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1719. #endif
  1720. #if BUILD_BFLOAT16==1
  1721. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1722. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1723. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1724. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1725. #endif
  1726. #if BUILD_SINGLE==1
  1727. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1728. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1729. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1730. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1731. #endif
  1732. #if BUILD_DOUBLE==1
  1733. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1734. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1735. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1736. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1737. #endif
  1738. #ifdef EXPRECISION
  1739. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1740. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1741. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1742. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1743. #endif
  1744. #if BUILD_COMPLEX ==1
  1745. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1746. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1747. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1748. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1749. #endif
  1750. #if BUILD_COMPLEX16 ==1
  1751. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1752. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1753. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1754. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1755. #endif
  1756. #if BUILD_COMPLEX == 1
  1757. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1758. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1759. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1760. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1761. #endif
  1762. #if BUILD_COMPLEX16 == 1
  1763. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1764. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1765. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1766. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1767. #endif
  1768. #ifdef EXPRECISION
  1769. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1770. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1771. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1772. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1773. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1774. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1775. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1776. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1777. #endif
  1778. }
  1779. #endif //RISCV64
  1780. #endif //POWER
  1781. #endif //ZARCH
  1782. #endif //(ARCH_LOONGARCH64)
  1783. #endif //(ARCH_MIPS64)
  1784. #endif //(ARCH_ARM64)