You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 53 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. SBGEMM_ALIGN_K,
  58. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  59. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  60. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  61. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  62. dsdot_kTS,
  63. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  64. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  65. ssymv_LTS, ssymv_UTS,
  66. sbgemm_kernelTS, sbgemm_betaTS,
  67. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  68. sbgemm_incopyTS, sbgemm_itcopyTS,
  69. #else
  70. sbgemm_oncopyTS, sbgemm_otcopyTS,
  71. #endif
  72. sbgemm_oncopyTS, sbgemm_otcopyTS,
  73. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  74. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  75. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  76. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  77. #else
  78. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  79. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  80. #endif
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  84. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  85. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  86. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  87. #else
  88. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  89. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  90. #endif
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  94. ssymm_iutcopyTS, ssymm_iltcopyTS,
  95. #else
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #endif
  98. ssymm_outcopyTS, ssymm_oltcopyTS,
  99. #ifndef NO_LAPACK
  100. sneg_tcopyTS, slaswp_ncopyTS,
  101. #else
  102. NULL,NULL,
  103. #endif
  104. #ifdef SMALL_MATRIX_OPT
  105. sbgemm_small_matrix_permitTS,
  106. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  107. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  108. #endif
  109. #endif
  110. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  111. 0, 0, 0,
  112. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  113. #ifdef SGEMM_DEFAULT_UNROLL_MN
  114. SGEMM_DEFAULT_UNROLL_MN,
  115. #else
  116. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  117. #endif
  118. #endif
  119. #ifdef HAVE_EXCLUSIVE_CACHE
  120. 1,
  121. #else
  122. 0,
  123. #endif
  124. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  125. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  126. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  127. snrm2_kTS, sasum_kTS,
  128. #endif
  129. #if BUILD_SINGLE == 1
  130. ssum_kTS,
  131. #endif
  132. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  133. scopy_kTS, sdot_kTS,
  134. // dsdot_kTS,
  135. srot_kTS, saxpy_kTS,
  136. #endif
  137. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  138. sscal_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. sswap_kTS,
  142. sgemv_nTS, sgemv_tTS,
  143. #endif
  144. #if BUILD_SINGLE == 1
  145. sger_kTS,
  146. ssymv_LTS, ssymv_UTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. #ifdef ARCH_X86_64
  150. sgemm_directTS,
  151. sgemm_direct_performantTS,
  152. #endif
  153. sgemm_kernelTS, sgemm_betaTS,
  154. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  155. sgemm_incopyTS, sgemm_itcopyTS,
  156. #else
  157. sgemm_oncopyTS, sgemm_otcopyTS,
  158. #endif
  159. sgemm_oncopyTS, sgemm_otcopyTS,
  160. #endif
  161. #if BUILD_SINGLE == 1
  162. #ifdef SMALL_MATRIX_OPT
  163. sgemm_small_matrix_permitTS,
  164. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  165. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  166. #endif
  167. #endif
  168. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  169. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  170. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  171. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  172. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  173. #else
  174. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  175. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  176. #endif
  177. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  178. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  179. #endif
  180. #if BUILD_SINGLE == 1
  181. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  182. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  183. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  184. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  185. #else
  186. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  187. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  188. #endif
  189. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  190. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  191. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  192. ssymm_iutcopyTS, ssymm_iltcopyTS,
  193. #else
  194. ssymm_outcopyTS, ssymm_oltcopyTS,
  195. #endif
  196. ssymm_outcopyTS, ssymm_oltcopyTS,
  197. #endif
  198. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  199. #ifndef NO_LAPACK
  200. sneg_tcopyTS, slaswp_ncopyTS,
  201. #else
  202. NULL,NULL,
  203. #endif
  204. #endif
  205. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  206. 0, 0, 0,
  207. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  208. #ifdef DGEMM_DEFAULT_UNROLL_MN
  209. DGEMM_DEFAULT_UNROLL_MN,
  210. #else
  211. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  212. #endif
  213. #endif
  214. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  215. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  216. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  217. dnrm2_kTS, dasum_kTS,
  218. #endif
  219. #if (BUILD_DOUBLE==1)
  220. dsum_kTS,
  221. #endif
  222. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  223. dcopy_kTS, ddot_kTS,
  224. #endif
  225. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  226. dsdot_kTS,
  227. #endif
  228. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  229. drot_kTS,
  230. daxpy_kTS,
  231. dscal_kTS,
  232. dswap_kTS,
  233. dgemv_nTS, dgemv_tTS,
  234. #endif
  235. #if (BUILD_DOUBLE==1)
  236. dger_kTS,
  237. dsymv_LTS, dsymv_UTS,
  238. #endif
  239. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  240. dgemm_kernelTS, dgemm_betaTS,
  241. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  242. dgemm_incopyTS, dgemm_itcopyTS,
  243. #else
  244. dgemm_oncopyTS, dgemm_otcopyTS,
  245. #endif
  246. dgemm_oncopyTS, dgemm_otcopyTS,
  247. #endif
  248. #if (BUILD_DOUBLE==1)
  249. #ifdef SMALL_MATRIX_OPT
  250. dgemm_small_matrix_permitTS,
  251. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  252. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  253. #endif
  254. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  255. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  256. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  257. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  258. #else
  259. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  260. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  261. #endif
  262. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  263. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  264. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  265. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  266. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  267. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  268. #else
  269. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  270. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  271. #endif
  272. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  273. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  274. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  275. dsymm_iutcopyTS, dsymm_iltcopyTS,
  276. #else
  277. dsymm_outcopyTS, dsymm_oltcopyTS,
  278. #endif
  279. dsymm_outcopyTS, dsymm_oltcopyTS,
  280. #ifndef NO_LAPACK
  281. dneg_tcopyTS, dlaswp_ncopyTS,
  282. #else
  283. NULL, NULL,
  284. #endif
  285. #endif
  286. #ifdef EXPRECISION
  287. 0, 0, 0,
  288. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  289. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  290. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  291. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  292. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  293. qgemv_nTS, qgemv_tTS, qger_kTS,
  294. qsymv_LTS, qsymv_UTS,
  295. qgemm_kernelTS, qgemm_betaTS,
  296. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  297. qgemm_incopyTS, qgemm_itcopyTS,
  298. #else
  299. qgemm_oncopyTS, qgemm_otcopyTS,
  300. #endif
  301. qgemm_oncopyTS, qgemm_otcopyTS,
  302. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  303. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  304. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  305. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  306. #else
  307. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  308. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  309. #endif
  310. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  311. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  312. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  313. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  314. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  315. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  316. #else
  317. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  318. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  319. #endif
  320. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  321. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  322. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  323. qsymm_iutcopyTS, qsymm_iltcopyTS,
  324. #else
  325. qsymm_outcopyTS, qsymm_oltcopyTS,
  326. #endif
  327. qsymm_outcopyTS, qsymm_oltcopyTS,
  328. #ifndef NO_LAPACK
  329. qneg_tcopyTS, qlaswp_ncopyTS,
  330. #else
  331. NULL, NULL,
  332. #endif
  333. #endif
  334. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  335. 0, 0, 0,
  336. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  337. #ifdef CGEMM_DEFAULT_UNROLL_MN
  338. CGEMM_DEFAULT_UNROLL_MN,
  339. #else
  340. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  341. #endif
  342. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  343. #endif
  344. #if (BUILD_COMPLEX)
  345. cnrm2_kTS, casum_kTS, csum_kTS,
  346. #endif
  347. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  348. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  349. #endif
  350. #if (BUILD_COMPLEX)
  351. csrot_kTS,
  352. #endif
  353. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  354. caxpy_kTS,
  355. caxpyc_kTS,
  356. cscal_kTS,
  357. cswap_kTS,
  358. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  359. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  360. #endif
  361. #if (BUILD_COMPLEX)
  362. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  363. csymv_LTS, csymv_UTS,
  364. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  365. #endif
  366. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  367. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  368. cgemm_betaTS,
  369. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  370. cgemm_incopyTS, cgemm_itcopyTS,
  371. #else
  372. cgemm_oncopyTS, cgemm_otcopyTS,
  373. #endif
  374. cgemm_oncopyTS, cgemm_otcopyTS,
  375. #ifdef SMALL_MATRIX_OPT
  376. cgemm_small_matrix_permitTS,
  377. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  378. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  379. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  380. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  381. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  382. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  383. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  384. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  385. #endif
  386. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  387. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  388. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  389. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  390. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  391. #else
  392. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  393. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  394. #endif
  395. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  396. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  397. #endif
  398. #if (BUILD_COMPLEX)
  399. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  400. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  401. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  402. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  403. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  404. #else
  405. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  406. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  407. #endif
  408. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  409. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  410. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  411. csymm_iutcopyTS, csymm_iltcopyTS,
  412. #else
  413. csymm_outcopyTS, csymm_oltcopyTS,
  414. #endif
  415. csymm_outcopyTS, csymm_oltcopyTS,
  416. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  417. chemm_iutcopyTS, chemm_iltcopyTS,
  418. #else
  419. chemm_outcopyTS, chemm_oltcopyTS,
  420. #endif
  421. chemm_outcopyTS, chemm_oltcopyTS,
  422. 0, 0, 0,
  423. #if (USE_GEMM3M)
  424. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  425. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  426. #else
  427. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  428. #endif
  429. cgemm3m_kernelTS,
  430. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  431. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  432. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  433. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  434. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  435. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  436. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  437. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  438. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  439. csymm3m_oucopybTS, csymm3m_olcopybTS,
  440. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  441. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  442. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  443. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  444. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  445. chemm3m_oucopybTS, chemm3m_olcopybTS,
  446. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  447. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  448. #else
  449. 0, 0, 0,
  450. NULL,
  451. NULL, NULL,
  452. NULL, NULL,
  453. NULL, NULL,
  454. NULL, NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. #endif
  470. #endif
  471. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  472. #ifndef NO_LAPACK
  473. cneg_tcopyTS,
  474. claswp_ncopyTS,
  475. #else
  476. NULL, NULL,
  477. #endif
  478. #endif
  479. #if BUILD_COMPLEX16 == 1
  480. 0, 0, 0,
  481. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  482. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  483. ZGEMM_DEFAULT_UNROLL_MN,
  484. #else
  485. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  486. #endif
  487. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  488. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  489. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  490. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  491. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  492. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  493. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  494. zsymv_LTS, zsymv_UTS,
  495. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  496. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  497. zgemm_betaTS,
  498. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  499. zgemm_incopyTS, zgemm_itcopyTS,
  500. #else
  501. zgemm_oncopyTS, zgemm_otcopyTS,
  502. #endif
  503. zgemm_oncopyTS, zgemm_otcopyTS,
  504. #ifdef SMALL_MATRIX_OPT
  505. zgemm_small_matrix_permitTS,
  506. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  507. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  508. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  509. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  510. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  511. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  512. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  513. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  514. #endif
  515. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  516. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  517. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  518. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  519. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  520. #else
  521. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  522. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  523. #endif
  524. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  525. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  526. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  527. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  528. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  529. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  530. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  531. #else
  532. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  533. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  534. #endif
  535. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  536. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  537. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  538. zsymm_iutcopyTS, zsymm_iltcopyTS,
  539. #else
  540. zsymm_outcopyTS, zsymm_oltcopyTS,
  541. #endif
  542. zsymm_outcopyTS, zsymm_oltcopyTS,
  543. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  544. zhemm_iutcopyTS, zhemm_iltcopyTS,
  545. #else
  546. zhemm_outcopyTS, zhemm_oltcopyTS,
  547. #endif
  548. zhemm_outcopyTS, zhemm_oltcopyTS,
  549. 0, 0, 0,
  550. #if (USE_GEMM3M)
  551. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  552. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  553. #else
  554. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  555. #endif
  556. zgemm3m_kernelTS,
  557. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  558. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  559. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  560. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  561. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  562. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  563. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  564. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  565. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  566. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  567. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  568. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  569. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  570. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  571. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  572. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  573. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  574. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  575. #else
  576. 0, 0, 0,
  577. NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. NULL, NULL,
  586. NULL, NULL,
  587. NULL, NULL,
  588. NULL, NULL,
  589. NULL, NULL,
  590. NULL, NULL,
  591. NULL, NULL,
  592. NULL, NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. NULL, NULL,
  596. #endif
  597. #ifndef NO_LAPACK
  598. zneg_tcopyTS, zlaswp_ncopyTS,
  599. #else
  600. NULL, NULL,
  601. #endif
  602. #endif
  603. #ifdef EXPRECISION
  604. 0, 0, 0,
  605. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  606. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  607. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  608. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  609. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  610. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  611. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  612. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  613. xsymv_LTS, xsymv_UTS,
  614. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  615. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  616. xgemm_betaTS,
  617. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  618. xgemm_incopyTS, xgemm_itcopyTS,
  619. #else
  620. xgemm_oncopyTS, xgemm_otcopyTS,
  621. #endif
  622. xgemm_oncopyTS, xgemm_otcopyTS,
  623. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  624. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  625. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  626. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  627. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  628. #else
  629. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  630. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  631. #endif
  632. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  633. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  634. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  635. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  636. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  637. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  638. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  639. #else
  640. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  641. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  642. #endif
  643. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  644. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  645. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  646. xsymm_iutcopyTS, xsymm_iltcopyTS,
  647. #else
  648. xsymm_outcopyTS, xsymm_oltcopyTS,
  649. #endif
  650. xsymm_outcopyTS, xsymm_oltcopyTS,
  651. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  652. xhemm_iutcopyTS, xhemm_iltcopyTS,
  653. #else
  654. xhemm_outcopyTS, xhemm_oltcopyTS,
  655. #endif
  656. xhemm_outcopyTS, xhemm_oltcopyTS,
  657. 0, 0, 0,
  658. #if (USE_GEMM3M)
  659. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  660. xgemm3m_kernelTS,
  661. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  662. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  663. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  664. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  665. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  666. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  667. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  668. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  669. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  670. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  671. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  672. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  673. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  674. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  675. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  676. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  677. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  678. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  679. #else
  680. 0, 0, 0,
  681. NULL,
  682. NULL, NULL,
  683. NULL, NULL,
  684. NULL, NULL,
  685. NULL, NULL,
  686. NULL, NULL,
  687. NULL, NULL,
  688. NULL, NULL,
  689. NULL, NULL,
  690. NULL, NULL,
  691. NULL, NULL,
  692. NULL, NULL,
  693. NULL, NULL,
  694. NULL, NULL,
  695. NULL, NULL,
  696. NULL, NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. NULL, NULL,
  700. #endif
  701. #ifndef NO_LAPACK
  702. xneg_tcopyTS, xlaswp_ncopyTS,
  703. #else
  704. NULL, NULL,
  705. #endif
  706. #endif
  707. init_parameter,
  708. SNUMOPT, DNUMOPT, QNUMOPT,
  709. #if BUILD_SINGLE == 1
  710. saxpby_kTS,
  711. #endif
  712. #if BUILD_DOUBLE == 1
  713. daxpby_kTS,
  714. #endif
  715. #if BUILD_COMPLEX == 1
  716. caxpby_kTS,
  717. #endif
  718. #if BUILD_COMPLEX16== 1
  719. zaxpby_kTS,
  720. #endif
  721. #if BUILD_SINGLE == 1
  722. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  723. #endif
  724. #if BUILD_DOUBLE== 1
  725. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  726. #endif
  727. #if BUILD_COMPLEX == 1
  728. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  729. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  730. #endif
  731. #if BUILD_COMPLEX16 == 1
  732. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  733. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  734. #endif
  735. #if BUILD_SINGLE == 1
  736. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  737. #endif
  738. #if BUILD_DOUBLE== 1
  739. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  740. #endif
  741. #if BUILD_COMPLEX== 1
  742. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  743. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  744. #endif
  745. #if BUILD_COMPLEX16==1
  746. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  747. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  748. #endif
  749. #if BUILD_SINGLE == 1
  750. sgeadd_kTS,
  751. #endif
  752. #if BUILD_DOUBLE==1
  753. dgeadd_kTS,
  754. #endif
  755. #if BUILD_COMPLEX==1
  756. cgeadd_kTS,
  757. #endif
  758. #if BUILD_COMPLEX16==1
  759. zgeadd_kTS,
  760. #endif
  761. };
  762. #if (ARCH_ARM64)
  763. static void init_parameter(void) {
  764. #if (BUILD_BFLOAT16)
  765. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  766. #endif
  767. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  768. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  769. #endif
  770. #if BUILD_DOUBLE == 1
  771. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  772. #endif
  773. #if BUILD_COMPLEX==1
  774. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  775. #endif
  776. #if BUILD_COMPLEX16==1
  777. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  778. #endif
  779. #if (BUILD_BFLOAT16)
  780. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  781. #endif
  782. #if BUILD_SINGLE == 1
  783. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  784. #endif
  785. #if BUILD_DOUBLE== 1
  786. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  787. #endif
  788. #if BUILD_COMPLEX== 1
  789. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  790. #endif
  791. #if BUILD_COMPLEX16==1
  792. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  793. #endif
  794. #if (BUILD_BFLOAT16)
  795. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  796. #endif
  797. #if BUILD_SINGLE == 1
  798. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  799. #endif
  800. #if BUILD_DOUBLE==1
  801. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  802. #endif
  803. #if BUILD_COMPLEX==1
  804. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  805. #endif
  806. #if BUILD_COMPLEX16==1
  807. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  808. #endif
  809. #ifdef EXPRECISION
  810. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  811. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  812. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  813. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  814. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  815. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  816. #endif
  817. #if (USE_GEMM3M)
  818. #ifdef CGEMM3M_DEFAULT_P
  819. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  820. #else
  821. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  822. #endif
  823. #ifdef ZGEMM3M_DEFAULT_P
  824. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  825. #else
  826. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  827. #endif
  828. #ifdef CGEMM3M_DEFAULT_Q
  829. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  830. #else
  831. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  832. #endif
  833. #ifdef ZGEMM3M_DEFAULT_Q
  834. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  835. #else
  836. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  837. #endif
  838. #ifdef CGEMM3M_DEFAULT_R
  839. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  840. #else
  841. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  842. #endif
  843. #ifdef ZGEMM3M_DEFAULT_R
  844. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  845. #else
  846. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  847. #endif
  848. #ifdef EXPRECISION
  849. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  850. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  851. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  852. #endif
  853. #endif
  854. }
  855. #else // (ARCH_ARM64)
  856. #if defined(ARCH_MIPS64)
  857. static void init_parameter(void) {
  858. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  859. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  860. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  861. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  862. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  863. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  864. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  865. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  866. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  867. TABLE_NAME.dgemm_r = 640;
  868. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  869. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  870. #ifdef EXPRECISION
  871. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  872. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  873. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  874. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  875. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  876. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  877. #endif
  878. #if defined(USE_GEMM3M)
  879. #ifdef CGEMM3M_DEFAULT_P
  880. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  881. #else
  882. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  883. #endif
  884. #ifdef ZGEMM3M_DEFAULT_P
  885. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  886. #else
  887. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  888. #endif
  889. #ifdef CGEMM3M_DEFAULT_Q
  890. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  891. #else
  892. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  893. #endif
  894. #ifdef ZGEMM3M_DEFAULT_Q
  895. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  896. #else
  897. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  898. #endif
  899. #ifdef CGEMM3M_DEFAULT_R
  900. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  901. #else
  902. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  903. #endif
  904. #ifdef ZGEMM3M_DEFAULT_R
  905. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  906. #else
  907. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  908. #endif
  909. #ifdef EXPRECISION
  910. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  911. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  912. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  913. #endif
  914. #endif
  915. }
  916. #else // (ARCH_MIPS64)
  917. #if (ARCH_LOONGARCH64)
  918. static void init_parameter(void) {
  919. #ifdef BUILD_BFLOAT16
  920. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  921. #endif
  922. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  923. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  924. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  925. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  926. #ifdef BUILD_BFLOAT16
  927. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  928. #endif
  929. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  930. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  931. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  932. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  933. #ifdef BUILD_BFLOAT16
  934. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  935. #endif
  936. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  937. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  938. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  939. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  940. }
  941. #else // (ARCH_LOONGARCH64)
  942. #if (ARCH_POWER)
  943. static void init_parameter(void) {
  944. #ifdef BUILD_BFLOAT16
  945. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  946. #endif
  947. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  948. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  949. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  950. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  951. #ifdef BUILD_BFLOAT16
  952. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  953. #endif
  954. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  955. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  956. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  957. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  958. #ifdef BUILD_BFLOAT16
  959. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  960. #endif
  961. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  962. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  963. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  964. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  965. }
  966. #else //POWER
  967. #if (ARCH_ZARCH)
  968. static void init_parameter(void) {
  969. #ifdef BUILD_BFLOAT16
  970. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  971. #endif
  972. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  973. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  974. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  975. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  976. #ifdef BUILD_BFLOAT16
  977. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  978. #endif
  979. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  980. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  981. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  982. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  983. #ifdef BUILD_BFLOAT16
  984. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  985. #endif
  986. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  987. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  988. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  989. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  990. }
  991. #else //ZARCH
  992. #ifdef ARCH_X86
  993. static int get_l2_size_old(void){
  994. int i, eax, ebx, ecx, edx, cpuid_level;
  995. int info[15];
  996. cpuid(2, &eax, &ebx, &ecx, &edx);
  997. info[ 0] = BITMASK(eax, 8, 0xff);
  998. info[ 1] = BITMASK(eax, 16, 0xff);
  999. info[ 2] = BITMASK(eax, 24, 0xff);
  1000. info[ 3] = BITMASK(ebx, 0, 0xff);
  1001. info[ 4] = BITMASK(ebx, 8, 0xff);
  1002. info[ 5] = BITMASK(ebx, 16, 0xff);
  1003. info[ 6] = BITMASK(ebx, 24, 0xff);
  1004. info[ 7] = BITMASK(ecx, 0, 0xff);
  1005. info[ 8] = BITMASK(ecx, 8, 0xff);
  1006. info[ 9] = BITMASK(ecx, 16, 0xff);
  1007. info[10] = BITMASK(ecx, 24, 0xff);
  1008. info[11] = BITMASK(edx, 0, 0xff);
  1009. info[12] = BITMASK(edx, 8, 0xff);
  1010. info[13] = BITMASK(edx, 16, 0xff);
  1011. info[14] = BITMASK(edx, 24, 0xff);
  1012. for (i = 0; i < 15; i++){
  1013. switch (info[i]){
  1014. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1015. case 0x1a :
  1016. return 96;
  1017. case 0x39 :
  1018. case 0x3b :
  1019. case 0x41 :
  1020. case 0x79 :
  1021. case 0x81 :
  1022. return 128;
  1023. case 0x3a :
  1024. return 192;
  1025. case 0x21 :
  1026. case 0x3c :
  1027. case 0x42 :
  1028. case 0x7a :
  1029. case 0x7e :
  1030. case 0x82 :
  1031. return 256;
  1032. case 0x3d :
  1033. return 384;
  1034. case 0x3e :
  1035. case 0x43 :
  1036. case 0x7b :
  1037. case 0x7f :
  1038. case 0x83 :
  1039. case 0x86 :
  1040. return 512;
  1041. case 0x44 :
  1042. case 0x78 :
  1043. case 0x7c :
  1044. case 0x84 :
  1045. case 0x87 :
  1046. return 1024;
  1047. case 0x45 :
  1048. case 0x7d :
  1049. case 0x85 :
  1050. return 2048;
  1051. case 0x48 :
  1052. return 3184;
  1053. case 0x49 :
  1054. return 4096;
  1055. case 0x4e :
  1056. return 6144;
  1057. }
  1058. }
  1059. // return 0;
  1060. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1061. return 256;
  1062. }
  1063. #endif
  1064. static __inline__ int get_l2_size(void){
  1065. int eax, ebx, ecx, edx, l2;
  1066. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1067. l2 = BITMASK(ecx, 16, 0xffff);
  1068. #ifndef ARCH_X86
  1069. if (l2 <= 0) {
  1070. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1071. return 256;
  1072. }
  1073. return l2;
  1074. #else
  1075. if (l2 > 0) return l2;
  1076. return get_l2_size_old();
  1077. #endif
  1078. }
  1079. static __inline__ int get_l3_size(void){
  1080. int eax, ebx, ecx, edx;
  1081. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1082. return BITMASK(edx, 18, 0x3fff) * 512;
  1083. }
  1084. static void init_parameter(void) {
  1085. int l2 = get_l2_size();
  1086. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1087. /* where the GEMM unrolling parameters do not depend on l2 */
  1088. #ifdef BUILD_BFLOAT16
  1089. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1090. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1091. #endif
  1092. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1093. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1094. #endif
  1095. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1096. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1097. #endif
  1098. #if BUILD_COMPLEX == 1
  1099. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1100. #endif
  1101. #if BUILD_COMPLEX16==1
  1102. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1103. #endif
  1104. #if BUILD_COMPLEX == 1
  1105. #ifdef CGEMM3M_DEFAULT_Q
  1106. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1107. #else
  1108. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1109. #endif
  1110. #endif
  1111. #if BUILD_COMPLEX16 == 1
  1112. #ifdef ZGEMM3M_DEFAULT_Q
  1113. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1114. #else
  1115. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1116. #endif
  1117. #endif
  1118. #ifdef EXPRECISION
  1119. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1120. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1121. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1122. #endif
  1123. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1124. #ifdef DEBUG
  1125. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1126. #endif
  1127. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1128. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1129. #endif
  1130. #if BUILD_DOUBLE == 1
  1131. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1132. #endif
  1133. #if BUILD_COMPLEX==1
  1134. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1135. #endif
  1136. #if BUILD_COMPLEX16==1
  1137. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1138. #endif
  1139. #ifdef EXPRECISION
  1140. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1141. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1142. #endif
  1143. #endif
  1144. #ifdef CORE_NORTHWOOD
  1145. #ifdef DEBUG
  1146. fprintf(stderr, "Northwood\n");
  1147. #endif
  1148. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1149. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1150. #endif
  1151. #if BUILD_DOUBLE == 1
  1152. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1153. #endif
  1154. #if BUILD_COMPLEX==1
  1155. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1156. #endif
  1157. #if BUILD_COMPLEX16==1
  1158. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1159. #endif
  1160. #ifdef EXPRECISION
  1161. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1162. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1163. #endif
  1164. #endif
  1165. #ifdef ATOM
  1166. #ifdef DEBUG
  1167. fprintf(stderr, "Atom\n");
  1168. #endif
  1169. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1170. TABLE_NAME.sgemm_p = 256;
  1171. #endif
  1172. #if BUILD_DOUBLE ==1
  1173. TABLE_NAME.dgemm_p = 128;
  1174. #endif
  1175. #if BUILD_COMPLEX==1
  1176. TABLE_NAME.cgemm_p = 128;
  1177. #endif
  1178. #if BUILD_COMPLEX16==1
  1179. TABLE_NAME.zgemm_p = 64;
  1180. #endif
  1181. #ifdef EXPRECISION
  1182. TABLE_NAME.qgemm_p = 64;
  1183. TABLE_NAME.xgemm_p = 32;
  1184. #endif
  1185. #endif
  1186. #ifdef CORE_PRESCOTT
  1187. #ifdef DEBUG
  1188. fprintf(stderr, "Prescott\n");
  1189. #endif
  1190. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1191. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1192. #endif
  1193. #if BUILD_DOUBLE ==1
  1194. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1195. #endif
  1196. #if BUILD_COMPLEX==1
  1197. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1198. #endif
  1199. #if BUILD_COMPLEX16 == 1
  1200. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1201. #endif
  1202. #ifdef EXPRECISION
  1203. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1204. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1205. #endif
  1206. #endif
  1207. #ifdef CORE2
  1208. #ifdef DEBUG
  1209. fprintf(stderr, "Core2\n");
  1210. #endif
  1211. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1212. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1213. #endif
  1214. #if BUILD_DOUBLE==1
  1215. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1216. #endif
  1217. #if BUILD_COMPLEX==1
  1218. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1219. #endif
  1220. #if BUILD_COMPLEX16==1
  1221. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1222. #endif
  1223. #ifdef EXPRECISION
  1224. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1225. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1226. #endif
  1227. #endif
  1228. #ifdef PENRYN
  1229. #ifdef DEBUG
  1230. fprintf(stderr, "Penryn\n");
  1231. #endif
  1232. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1233. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1234. #endif
  1235. #if BUILD_DOUBLE == 1
  1236. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1237. #endif
  1238. #if BUILD_COMPLEX==1
  1239. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1240. #endif
  1241. #if BUILD_COMPLEX16==1
  1242. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1243. #endif
  1244. #ifdef EXPRECISION
  1245. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1246. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1247. #endif
  1248. #endif
  1249. #ifdef DUNNINGTON
  1250. #ifdef DEBUG
  1251. fprintf(stderr, "Dunnington\n");
  1252. #endif
  1253. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1254. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1255. #endif
  1256. #if BUILD_DOUBLE ==1
  1257. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1258. #endif
  1259. #if BUILD_COMPLEX==1
  1260. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1261. #endif
  1262. #if BUILD_COMPLEX16==1
  1263. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1264. #endif
  1265. #ifdef EXPRECISION
  1266. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1267. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1268. #endif
  1269. #endif
  1270. #ifdef NEHALEM
  1271. #ifdef DEBUG
  1272. fprintf(stderr, "Nehalem\n");
  1273. #endif
  1274. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1275. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1276. #endif
  1277. #if BUILD_DOUBLE
  1278. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1279. #endif
  1280. #if BUILD_COMPLEX
  1281. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1282. #endif
  1283. #if BUILD_COMPLEX16
  1284. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1285. #endif
  1286. #ifdef EXPRECISION
  1287. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1288. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1289. #endif
  1290. #endif
  1291. #ifdef SANDYBRIDGE
  1292. #ifdef DEBUG
  1293. fprintf(stderr, "Sandybridge\n");
  1294. #endif
  1295. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1296. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1297. #endif
  1298. #if BUILD_DOUBLE
  1299. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1300. #endif
  1301. #if BUILD_COMPLEX
  1302. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1303. #endif
  1304. #if BUILD_COMPLEX16
  1305. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1306. #endif
  1307. #ifdef EXPRECISION
  1308. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1309. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1310. #endif
  1311. #endif
  1312. #ifdef HASWELL
  1313. #ifdef DEBUG
  1314. fprintf(stderr, "Haswell\n");
  1315. #endif
  1316. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1317. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1318. #endif
  1319. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1320. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1321. #endif
  1322. #if BUILD_COMPLEX
  1323. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1324. #endif
  1325. #if BUILD_COMPLEX16
  1326. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1327. #endif
  1328. #ifdef EXPRECISION
  1329. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1330. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1331. #endif
  1332. #endif
  1333. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1334. #ifdef DEBUG
  1335. fprintf(stderr, "SkylakeX\n");
  1336. #endif
  1337. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1338. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1339. #endif
  1340. #if BUILD_DOUBLE
  1341. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1342. #endif
  1343. #if BUILD_COMPLEX
  1344. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1345. #endif
  1346. #if BUILD_COMPLEX16
  1347. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1348. #endif
  1349. #ifdef EXPRECISION
  1350. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1351. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1352. #endif
  1353. #endif
  1354. #ifdef OPTERON
  1355. #ifdef DEBUG
  1356. fprintf(stderr, "Opteron\n");
  1357. #endif
  1358. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1359. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1360. #endif
  1361. #if BUILD_DOUBLE
  1362. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1363. #endif
  1364. #if BUILD_COMPLEX
  1365. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1366. #endif
  1367. #if BUILD_COMPLEX16
  1368. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1369. #endif
  1370. #ifdef EXPRECISION
  1371. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1372. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1373. #endif
  1374. #endif
  1375. #ifdef BARCELONA
  1376. #ifdef DEBUG
  1377. fprintf(stderr, "Barcelona\n");
  1378. #endif
  1379. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1380. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1381. #endif
  1382. #if BUILD_DOUBLE
  1383. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1384. #endif
  1385. #if BUILD_COMPLEX
  1386. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1387. #endif
  1388. #if BUILD_COMPLEX16
  1389. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1390. #endif
  1391. #ifdef EXPRECISION
  1392. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1393. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1394. #endif
  1395. #endif
  1396. #ifdef BOBCAT
  1397. #ifdef DEBUG
  1398. fprintf(stderr, "Bobcate\n");
  1399. #endif
  1400. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1401. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1402. #endif
  1403. #if BUILD_DOUBLE
  1404. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1405. #endif
  1406. #if BUILD_COMPLEX
  1407. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1408. #endif
  1409. #if BUILD_COMPLEX16
  1410. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1411. #endif
  1412. #ifdef EXPRECISION
  1413. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1414. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1415. #endif
  1416. #endif
  1417. #ifdef BULLDOZER
  1418. #ifdef DEBUG
  1419. fprintf(stderr, "Bulldozer\n");
  1420. #endif
  1421. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1422. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1423. #endif
  1424. #if BUILD_DOUBLE
  1425. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1426. #endif
  1427. #if BUILD_COMPLEX
  1428. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1429. #endif
  1430. #if BUILD_COMPLEX16
  1431. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1432. #endif
  1433. #ifdef EXPRECISION
  1434. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1435. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1436. #endif
  1437. #endif
  1438. #ifdef EXCAVATOR
  1439. #ifdef DEBUG
  1440. fprintf(stderr, "Excavator\n");
  1441. #endif
  1442. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1443. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1444. #endif
  1445. #if BUILD_DOUBLE
  1446. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1447. #endif
  1448. #if BUILD_COMPLEX
  1449. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1450. #endif
  1451. #if BUILD_COMPLEX16
  1452. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1453. #endif
  1454. #ifdef EXPRECISION
  1455. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1456. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1457. #endif
  1458. #endif
  1459. #ifdef PILEDRIVER
  1460. #ifdef DEBUG
  1461. fprintf(stderr, "Piledriver\n");
  1462. #endif
  1463. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1464. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1465. #endif
  1466. #if BUILD_DOUBLE
  1467. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1468. #endif
  1469. #if BUILD_COMPLEX
  1470. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1471. #endif
  1472. #if BUILD_COMPLEX16
  1473. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1474. #endif
  1475. #ifdef EXPRECISION
  1476. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1477. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1478. #endif
  1479. #endif
  1480. #ifdef STEAMROLLER
  1481. #ifdef DEBUG
  1482. fprintf(stderr, "Steamroller\n");
  1483. #endif
  1484. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1485. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1486. #endif
  1487. #if BUILD_DOUBLE
  1488. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1489. #endif
  1490. #if BUILD_COMPLEX
  1491. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1492. #endif
  1493. #if BUILD_COMPLEX16
  1494. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1495. #endif
  1496. #ifdef EXPRECISION
  1497. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1498. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1499. #endif
  1500. #endif
  1501. #ifdef ZEN
  1502. #ifdef DEBUG
  1503. fprintf(stderr, "Zen\n");
  1504. #endif
  1505. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1506. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1507. #endif
  1508. #if BUILD_DOUBLE
  1509. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1510. #endif
  1511. #if BUILD_COMPLEX
  1512. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1513. #endif
  1514. #if BUILD_COMPLEX16
  1515. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1516. #endif
  1517. #ifdef EXPRECISION
  1518. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1519. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1520. #endif
  1521. #endif
  1522. #ifdef NANO
  1523. #ifdef DEBUG
  1524. fprintf(stderr, "NANO\n");
  1525. #endif
  1526. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1527. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1528. #endif
  1529. #if (BUILD_DOUBLE==1)
  1530. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1531. #endif
  1532. #if (BUILD_COMPLEX==1)
  1533. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1534. #endif
  1535. #if (BUILD_COMPLEX16==1)
  1536. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1537. #endif
  1538. #ifdef EXPRECISION
  1539. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1540. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1541. #endif
  1542. #endif
  1543. #if BUILD_COMPLEX==1
  1544. #ifdef CGEMM3M_DEFAULT_P
  1545. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1546. #else
  1547. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1548. #endif
  1549. #endif
  1550. #if BUILD_COMPLEX16==1
  1551. #ifdef ZGEMM3M_DEFAULT_P
  1552. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1553. #else
  1554. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1555. #endif
  1556. #endif
  1557. #ifdef EXPRECISION
  1558. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1559. #endif
  1560. #if BUILD_SINGLE == 1
  1561. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1562. #endif
  1563. #if BUILD_DOUBLE== 1
  1564. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1565. #endif
  1566. #if BUILD_COMPLEX==1
  1567. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1568. #endif
  1569. #if BUILD_COMPLEX16==1
  1570. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1571. #endif
  1572. #if BUILD_COMPLEX==1
  1573. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1574. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1575. #else
  1576. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1577. #endif
  1578. #endif
  1579. #if BUILD_COMPLEX16==1
  1580. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1581. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1582. #else
  1583. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1584. #endif
  1585. #endif
  1586. #ifdef QUAD_PRECISION
  1587. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1588. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1589. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1590. #endif
  1591. #ifdef DEBUG
  1592. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1593. #endif
  1594. #if BUILD_BFLOAT16==1
  1595. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1596. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1597. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1598. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1599. #endif
  1600. #if BUILD_SINGLE==1
  1601. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1602. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1603. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1604. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1605. #endif
  1606. #if BUILD_DOUBLE==1
  1607. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1608. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1609. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1610. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1611. #endif
  1612. #ifdef EXPRECISION
  1613. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1614. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1615. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1616. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1617. #endif
  1618. #if BUILD_COMPLEX ==1
  1619. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1620. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1621. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1622. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1623. #endif
  1624. #if BUILD_COMPLEX16 ==1
  1625. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1626. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1627. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1628. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1629. #endif
  1630. #if BUILD_COMPLEX == 1
  1631. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1632. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1633. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1634. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1635. #endif
  1636. #if BUILD_COMPLEX16 == 1
  1637. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1638. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1639. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1640. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1641. #endif
  1642. #ifdef EXPRECISION
  1643. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1644. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1645. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1646. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1647. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1648. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1649. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1650. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1651. #endif
  1652. }
  1653. #endif //POWER
  1654. #endif //ZARCH
  1655. #endif //(ARCH_LOONGARCH64)
  1656. #endif //(ARCH_MIPS64)
  1657. #endif //(ARCH_ARM64)