You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 52 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. sbgemm_kernelTS, sbgemm_betaTS,
  66. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  67. sbgemm_incopyTS, sbgemm_itcopyTS,
  68. #else
  69. sbgemm_oncopyTS, sbgemm_otcopyTS,
  70. #endif
  71. sbgemm_oncopyTS, sbgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #endif
  104. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  105. 0, 0, 0,
  106. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  107. #ifdef SGEMM_DEFAULT_UNROLL_MN
  108. SGEMM_DEFAULT_UNROLL_MN,
  109. #else
  110. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  111. #endif
  112. #endif
  113. #ifdef HAVE_EXCLUSIVE_CACHE
  114. 1,
  115. #else
  116. 0,
  117. #endif
  118. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  119. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  120. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  121. snrm2_kTS, sasum_kTS,
  122. #endif
  123. #if BUILD_SINGLE == 1
  124. ssum_kTS,
  125. #endif
  126. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  127. scopy_kTS, sdot_kTS,
  128. // dsdot_kTS,
  129. srot_kTS, saxpy_kTS,
  130. #endif
  131. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  132. sscal_kTS,
  133. #endif
  134. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  135. sswap_kTS,
  136. sgemv_nTS, sgemv_tTS,
  137. #endif
  138. #if BUILD_SINGLE == 1
  139. sger_kTS,
  140. ssymv_LTS, ssymv_UTS,
  141. #endif
  142. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  143. #ifdef ARCH_X86_64
  144. sgemm_directTS,
  145. sgemm_direct_performantTS,
  146. #endif
  147. sgemm_kernelTS, sgemm_betaTS,
  148. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  149. sgemm_incopyTS, sgemm_itcopyTS,
  150. #else
  151. sgemm_oncopyTS, sgemm_otcopyTS,
  152. #endif
  153. sgemm_oncopyTS, sgemm_otcopyTS,
  154. #endif
  155. #if BUILD_SINGLE == 1
  156. #ifdef SMALL_MATRIX_OPT
  157. sgemm_small_matrix_permitTS,
  158. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  159. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  160. #endif
  161. #endif
  162. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  163. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  164. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  165. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  166. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  167. #else
  168. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  169. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  170. #endif
  171. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  172. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  173. #endif
  174. #if BUILD_SINGLE == 1
  175. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  176. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  177. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  178. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  179. #else
  180. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  181. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  182. #endif
  183. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  184. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  185. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  186. ssymm_iutcopyTS, ssymm_iltcopyTS,
  187. #else
  188. ssymm_outcopyTS, ssymm_oltcopyTS,
  189. #endif
  190. ssymm_outcopyTS, ssymm_oltcopyTS,
  191. #endif
  192. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  193. #ifndef NO_LAPACK
  194. sneg_tcopyTS, slaswp_ncopyTS,
  195. #else
  196. NULL,NULL,
  197. #endif
  198. #endif
  199. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  200. 0, 0, 0,
  201. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  202. #ifdef DGEMM_DEFAULT_UNROLL_MN
  203. DGEMM_DEFAULT_UNROLL_MN,
  204. #else
  205. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  206. #endif
  207. #endif
  208. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  209. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  210. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  211. dnrm2_kTS, dasum_kTS,
  212. #endif
  213. #if (BUILD_DOUBLE==1)
  214. dsum_kTS,
  215. #endif
  216. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  217. dcopy_kTS, ddot_kTS,
  218. #endif
  219. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  220. dsdot_kTS,
  221. #endif
  222. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  223. drot_kTS,
  224. daxpy_kTS,
  225. dscal_kTS,
  226. dswap_kTS,
  227. dgemv_nTS, dgemv_tTS,
  228. #endif
  229. #if (BUILD_DOUBLE==1)
  230. dger_kTS,
  231. dsymv_LTS, dsymv_UTS,
  232. #endif
  233. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  234. dgemm_kernelTS, dgemm_betaTS,
  235. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  236. dgemm_incopyTS, dgemm_itcopyTS,
  237. #else
  238. dgemm_oncopyTS, dgemm_otcopyTS,
  239. #endif
  240. dgemm_oncopyTS, dgemm_otcopyTS,
  241. #endif
  242. #if (BUILD_DOUBLE==1)
  243. #ifdef SMALL_MATRIX_OPT
  244. dgemm_small_matrix_permitTS,
  245. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  246. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  247. #endif
  248. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  249. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  250. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  251. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  252. #else
  253. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  254. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  255. #endif
  256. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  257. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  258. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  259. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  260. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  261. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  262. #else
  263. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  264. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  265. #endif
  266. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  267. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  268. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  269. dsymm_iutcopyTS, dsymm_iltcopyTS,
  270. #else
  271. dsymm_outcopyTS, dsymm_oltcopyTS,
  272. #endif
  273. dsymm_outcopyTS, dsymm_oltcopyTS,
  274. #ifndef NO_LAPACK
  275. dneg_tcopyTS, dlaswp_ncopyTS,
  276. #else
  277. NULL, NULL,
  278. #endif
  279. #endif
  280. #ifdef EXPRECISION
  281. 0, 0, 0,
  282. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  283. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  284. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  285. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  286. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  287. qgemv_nTS, qgemv_tTS, qger_kTS,
  288. qsymv_LTS, qsymv_UTS,
  289. qgemm_kernelTS, qgemm_betaTS,
  290. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  291. qgemm_incopyTS, qgemm_itcopyTS,
  292. #else
  293. qgemm_oncopyTS, qgemm_otcopyTS,
  294. #endif
  295. qgemm_oncopyTS, qgemm_otcopyTS,
  296. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  297. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  298. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  299. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  300. #else
  301. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  302. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  303. #endif
  304. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  305. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  306. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  307. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  308. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  309. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  310. #else
  311. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  312. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  313. #endif
  314. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  315. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  316. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  317. qsymm_iutcopyTS, qsymm_iltcopyTS,
  318. #else
  319. qsymm_outcopyTS, qsymm_oltcopyTS,
  320. #endif
  321. qsymm_outcopyTS, qsymm_oltcopyTS,
  322. #ifndef NO_LAPACK
  323. qneg_tcopyTS, qlaswp_ncopyTS,
  324. #else
  325. NULL, NULL,
  326. #endif
  327. #endif
  328. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  329. 0, 0, 0,
  330. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  331. #ifdef CGEMM_DEFAULT_UNROLL_MN
  332. CGEMM_DEFAULT_UNROLL_MN,
  333. #else
  334. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  335. #endif
  336. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  337. #endif
  338. #if (BUILD_COMPLEX)
  339. cnrm2_kTS, casum_kTS, csum_kTS,
  340. #endif
  341. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  342. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  343. #endif
  344. #if (BUILD_COMPLEX)
  345. csrot_kTS,
  346. #endif
  347. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  348. caxpy_kTS,
  349. caxpyc_kTS,
  350. cscal_kTS,
  351. cswap_kTS,
  352. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  353. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  354. #endif
  355. #if (BUILD_COMPLEX)
  356. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  357. csymv_LTS, csymv_UTS,
  358. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  359. #endif
  360. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  361. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  362. cgemm_betaTS,
  363. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  364. cgemm_incopyTS, cgemm_itcopyTS,
  365. #else
  366. cgemm_oncopyTS, cgemm_otcopyTS,
  367. #endif
  368. cgemm_oncopyTS, cgemm_otcopyTS,
  369. #ifdef SMALL_MATRIX_OPT
  370. cgemm_small_matrix_permitTS,
  371. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  372. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  373. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  374. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  375. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  376. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  377. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  378. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  379. #endif
  380. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  381. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  382. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  383. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  384. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  385. #else
  386. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  387. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  388. #endif
  389. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  390. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  391. #endif
  392. #if (BUILD_COMPLEX)
  393. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  394. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  395. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  396. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  397. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  398. #else
  399. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  400. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  401. #endif
  402. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  403. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  404. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  405. csymm_iutcopyTS, csymm_iltcopyTS,
  406. #else
  407. csymm_outcopyTS, csymm_oltcopyTS,
  408. #endif
  409. csymm_outcopyTS, csymm_oltcopyTS,
  410. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  411. chemm_iutcopyTS, chemm_iltcopyTS,
  412. #else
  413. chemm_outcopyTS, chemm_oltcopyTS,
  414. #endif
  415. chemm_outcopyTS, chemm_oltcopyTS,
  416. 0, 0, 0,
  417. #if (USE_GEMM3M)
  418. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  419. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  420. #else
  421. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  422. #endif
  423. cgemm3m_kernelTS,
  424. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  425. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  426. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  427. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  428. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  429. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  430. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  431. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  432. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  433. csymm3m_oucopybTS, csymm3m_olcopybTS,
  434. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  435. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  436. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  437. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  438. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  439. chemm3m_oucopybTS, chemm3m_olcopybTS,
  440. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  441. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  442. #else
  443. 0, 0, 0,
  444. NULL,
  445. NULL, NULL,
  446. NULL, NULL,
  447. NULL, NULL,
  448. NULL, NULL,
  449. NULL, NULL,
  450. NULL, NULL,
  451. NULL, NULL,
  452. NULL, NULL,
  453. NULL, NULL,
  454. NULL, NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. #endif
  464. #endif
  465. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  466. #ifndef NO_LAPACK
  467. cneg_tcopyTS,
  468. claswp_ncopyTS,
  469. #else
  470. NULL, NULL,
  471. #endif
  472. #endif
  473. #if BUILD_COMPLEX16 == 1
  474. 0, 0, 0,
  475. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  476. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  477. ZGEMM_DEFAULT_UNROLL_MN,
  478. #else
  479. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  480. #endif
  481. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  482. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  483. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  484. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  485. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  486. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  487. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  488. zsymv_LTS, zsymv_UTS,
  489. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  490. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  491. zgemm_betaTS,
  492. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  493. zgemm_incopyTS, zgemm_itcopyTS,
  494. #else
  495. zgemm_oncopyTS, zgemm_otcopyTS,
  496. #endif
  497. zgemm_oncopyTS, zgemm_otcopyTS,
  498. #ifdef SMALL_MATRIX_OPT
  499. zgemm_small_matrix_permitTS,
  500. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  501. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  502. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  503. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  504. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  505. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  506. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  507. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  508. #endif
  509. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  510. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  511. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  512. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  513. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  514. #else
  515. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  516. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  517. #endif
  518. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  519. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  520. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  521. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  522. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  523. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  524. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  525. #else
  526. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  527. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  528. #endif
  529. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  530. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  531. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  532. zsymm_iutcopyTS, zsymm_iltcopyTS,
  533. #else
  534. zsymm_outcopyTS, zsymm_oltcopyTS,
  535. #endif
  536. zsymm_outcopyTS, zsymm_oltcopyTS,
  537. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  538. zhemm_iutcopyTS, zhemm_iltcopyTS,
  539. #else
  540. zhemm_outcopyTS, zhemm_oltcopyTS,
  541. #endif
  542. zhemm_outcopyTS, zhemm_oltcopyTS,
  543. 0, 0, 0,
  544. #if (USE_GEMM3M)
  545. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  546. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  547. #else
  548. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  549. #endif
  550. zgemm3m_kernelTS,
  551. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  552. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  553. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  554. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  555. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  556. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  557. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  558. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  559. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  560. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  561. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  562. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  563. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  564. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  565. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  566. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  567. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  568. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  569. #else
  570. 0, 0, 0,
  571. NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. NULL, NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. NULL, NULL,
  586. NULL, NULL,
  587. NULL, NULL,
  588. NULL, NULL,
  589. NULL, NULL,
  590. #endif
  591. #ifndef NO_LAPACK
  592. zneg_tcopyTS, zlaswp_ncopyTS,
  593. #else
  594. NULL, NULL,
  595. #endif
  596. #endif
  597. #ifdef EXPRECISION
  598. 0, 0, 0,
  599. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  600. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  601. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  602. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  603. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  604. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  605. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  606. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  607. xsymv_LTS, xsymv_UTS,
  608. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  609. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  610. xgemm_betaTS,
  611. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  612. xgemm_incopyTS, xgemm_itcopyTS,
  613. #else
  614. xgemm_oncopyTS, xgemm_otcopyTS,
  615. #endif
  616. xgemm_oncopyTS, xgemm_otcopyTS,
  617. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  618. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  619. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  620. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  621. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  622. #else
  623. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  624. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  625. #endif
  626. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  627. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  628. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  629. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  630. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  631. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  632. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  633. #else
  634. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  635. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  636. #endif
  637. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  638. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  639. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  640. xsymm_iutcopyTS, xsymm_iltcopyTS,
  641. #else
  642. xsymm_outcopyTS, xsymm_oltcopyTS,
  643. #endif
  644. xsymm_outcopyTS, xsymm_oltcopyTS,
  645. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  646. xhemm_iutcopyTS, xhemm_iltcopyTS,
  647. #else
  648. xhemm_outcopyTS, xhemm_oltcopyTS,
  649. #endif
  650. xhemm_outcopyTS, xhemm_oltcopyTS,
  651. 0, 0, 0,
  652. #if (USE_GEMM3M)
  653. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  654. xgemm3m_kernelTS,
  655. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  656. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  657. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  658. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  659. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  660. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  661. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  662. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  663. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  664. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  665. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  666. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  667. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  668. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  669. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  670. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  671. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  672. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  673. #else
  674. 0, 0, 0,
  675. NULL,
  676. NULL, NULL,
  677. NULL, NULL,
  678. NULL, NULL,
  679. NULL, NULL,
  680. NULL, NULL,
  681. NULL, NULL,
  682. NULL, NULL,
  683. NULL, NULL,
  684. NULL, NULL,
  685. NULL, NULL,
  686. NULL, NULL,
  687. NULL, NULL,
  688. NULL, NULL,
  689. NULL, NULL,
  690. NULL, NULL,
  691. NULL, NULL,
  692. NULL, NULL,
  693. NULL, NULL,
  694. #endif
  695. #ifndef NO_LAPACK
  696. xneg_tcopyTS, xlaswp_ncopyTS,
  697. #else
  698. NULL, NULL,
  699. #endif
  700. #endif
  701. init_parameter,
  702. SNUMOPT, DNUMOPT, QNUMOPT,
  703. #if BUILD_SINGLE == 1
  704. saxpby_kTS,
  705. #endif
  706. #if BUILD_DOUBLE == 1
  707. daxpby_kTS,
  708. #endif
  709. #if BUILD_COMPLEX == 1
  710. caxpby_kTS,
  711. #endif
  712. #if BUILD_COMPLEX16== 1
  713. zaxpby_kTS,
  714. #endif
  715. #if BUILD_SINGLE == 1
  716. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  717. #endif
  718. #if BUILD_DOUBLE== 1
  719. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  720. #endif
  721. #if BUILD_COMPLEX == 1
  722. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  723. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  724. #endif
  725. #if BUILD_COMPLEX16 == 1
  726. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  727. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  728. #endif
  729. #if BUILD_SINGLE == 1
  730. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  731. #endif
  732. #if BUILD_DOUBLE== 1
  733. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  734. #endif
  735. #if BUILD_COMPLEX== 1
  736. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  737. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  738. #endif
  739. #if BUILD_COMPLEX16==1
  740. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  741. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  742. #endif
  743. #if BUILD_SINGLE == 1
  744. sgeadd_kTS,
  745. #endif
  746. #if BUILD_DOUBLE==1
  747. dgeadd_kTS,
  748. #endif
  749. #if BUILD_COMPLEX==1
  750. cgeadd_kTS,
  751. #endif
  752. #if BUILD_COMPLEX16==1
  753. zgeadd_kTS
  754. #endif
  755. };
  756. #if (ARCH_ARM64)
  757. static void init_parameter(void) {
  758. #if (BUILD_BFLOAT16)
  759. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  760. #endif
  761. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  762. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  763. #endif
  764. #if BUILD_DOUBLE == 1
  765. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  766. #endif
  767. #if BUILD_COMPLEX==1
  768. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  769. #endif
  770. #if BUILD_COMPLEX16==1
  771. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  772. #endif
  773. #if (BUILD_BFLOAT16)
  774. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  775. #endif
  776. #if BUILD_SINGLE == 1
  777. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  778. #endif
  779. #if BUILD_DOUBLE== 1
  780. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  781. #endif
  782. #if BUILD_COMPLEX== 1
  783. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  784. #endif
  785. #if BUILD_COMPLEX16==1
  786. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  787. #endif
  788. #if (BUILD_BFLOAT16)
  789. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  790. #endif
  791. #if BUILD_SINGLE == 1
  792. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  793. #endif
  794. #if BUILD_DOUBLE==1
  795. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  796. #endif
  797. #if BUILD_COMPLEX==1
  798. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  799. #endif
  800. #if BUILD_COMPLEX16==1
  801. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  802. #endif
  803. #ifdef EXPRECISION
  804. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  805. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  806. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  807. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  808. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  809. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  810. #endif
  811. #if (USE_GEMM3M)
  812. #ifdef CGEMM3M_DEFAULT_P
  813. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  814. #else
  815. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  816. #endif
  817. #ifdef ZGEMM3M_DEFAULT_P
  818. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  819. #else
  820. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  821. #endif
  822. #ifdef CGEMM3M_DEFAULT_Q
  823. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  824. #else
  825. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  826. #endif
  827. #ifdef ZGEMM3M_DEFAULT_Q
  828. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  829. #else
  830. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  831. #endif
  832. #ifdef CGEMM3M_DEFAULT_R
  833. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  834. #else
  835. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  836. #endif
  837. #ifdef ZGEMM3M_DEFAULT_R
  838. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  839. #else
  840. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  841. #endif
  842. #ifdef EXPRECISION
  843. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  844. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  845. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  846. #endif
  847. #endif
  848. }
  849. #else // (ARCH_ARM64)
  850. #if defined(ARCH_MIPS64)
  851. static void init_parameter(void) {
  852. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  853. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  854. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  855. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  856. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  857. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  858. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  859. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  860. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  861. TABLE_NAME.dgemm_r = 640;
  862. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  863. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  864. #ifdef EXPRECISION
  865. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  866. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  867. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  868. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  869. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  870. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  871. #endif
  872. #if defined(USE_GEMM3M)
  873. #ifdef CGEMM3M_DEFAULT_P
  874. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  875. #else
  876. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  877. #endif
  878. #ifdef ZGEMM3M_DEFAULT_P
  879. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  880. #else
  881. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  882. #endif
  883. #ifdef CGEMM3M_DEFAULT_Q
  884. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  885. #else
  886. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  887. #endif
  888. #ifdef ZGEMM3M_DEFAULT_Q
  889. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  890. #else
  891. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  892. #endif
  893. #ifdef CGEMM3M_DEFAULT_R
  894. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  895. #else
  896. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  897. #endif
  898. #ifdef ZGEMM3M_DEFAULT_R
  899. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  900. #else
  901. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  902. #endif
  903. #ifdef EXPRECISION
  904. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  905. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  906. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  907. #endif
  908. #endif
  909. }
  910. #else // (ARCH_MIPS64)
  911. #if (ARCH_POWER)
  912. static void init_parameter(void) {
  913. #ifdef BUILD_BFLOAT16
  914. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  915. #endif
  916. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  917. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  918. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  919. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  920. #ifdef BUILD_BFLOAT16
  921. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  922. #endif
  923. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  924. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  925. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  926. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  927. #ifdef BUILD_BFLOAT16
  928. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  929. #endif
  930. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  931. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  932. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  933. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  934. }
  935. #else //POWER
  936. #if (ARCH_ZARCH)
  937. static void init_parameter(void) {
  938. #ifdef BUILD_BFLOAT16
  939. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  940. #endif
  941. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  942. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  943. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  944. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  945. #ifdef BUILD_BFLOAT16
  946. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  947. #endif
  948. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  949. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  950. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  951. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  952. #ifdef BUILD_BFLOAT16
  953. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  954. #endif
  955. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  956. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  957. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  958. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  959. }
  960. #else //ZARCH
  961. #ifdef ARCH_X86
  962. static int get_l2_size_old(void){
  963. int i, eax, ebx, ecx, edx, cpuid_level;
  964. int info[15];
  965. cpuid(2, &eax, &ebx, &ecx, &edx);
  966. info[ 0] = BITMASK(eax, 8, 0xff);
  967. info[ 1] = BITMASK(eax, 16, 0xff);
  968. info[ 2] = BITMASK(eax, 24, 0xff);
  969. info[ 3] = BITMASK(ebx, 0, 0xff);
  970. info[ 4] = BITMASK(ebx, 8, 0xff);
  971. info[ 5] = BITMASK(ebx, 16, 0xff);
  972. info[ 6] = BITMASK(ebx, 24, 0xff);
  973. info[ 7] = BITMASK(ecx, 0, 0xff);
  974. info[ 8] = BITMASK(ecx, 8, 0xff);
  975. info[ 9] = BITMASK(ecx, 16, 0xff);
  976. info[10] = BITMASK(ecx, 24, 0xff);
  977. info[11] = BITMASK(edx, 0, 0xff);
  978. info[12] = BITMASK(edx, 8, 0xff);
  979. info[13] = BITMASK(edx, 16, 0xff);
  980. info[14] = BITMASK(edx, 24, 0xff);
  981. for (i = 0; i < 15; i++){
  982. switch (info[i]){
  983. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  984. case 0x1a :
  985. return 96;
  986. case 0x39 :
  987. case 0x3b :
  988. case 0x41 :
  989. case 0x79 :
  990. case 0x81 :
  991. return 128;
  992. case 0x3a :
  993. return 192;
  994. case 0x21 :
  995. case 0x3c :
  996. case 0x42 :
  997. case 0x7a :
  998. case 0x7e :
  999. case 0x82 :
  1000. return 256;
  1001. case 0x3d :
  1002. return 384;
  1003. case 0x3e :
  1004. case 0x43 :
  1005. case 0x7b :
  1006. case 0x7f :
  1007. case 0x83 :
  1008. case 0x86 :
  1009. return 512;
  1010. case 0x44 :
  1011. case 0x78 :
  1012. case 0x7c :
  1013. case 0x84 :
  1014. case 0x87 :
  1015. return 1024;
  1016. case 0x45 :
  1017. case 0x7d :
  1018. case 0x85 :
  1019. return 2048;
  1020. case 0x48 :
  1021. return 3184;
  1022. case 0x49 :
  1023. return 4096;
  1024. case 0x4e :
  1025. return 6144;
  1026. }
  1027. }
  1028. // return 0;
  1029. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1030. return 256;
  1031. }
  1032. #endif
  1033. static __inline__ int get_l2_size(void){
  1034. int eax, ebx, ecx, edx, l2;
  1035. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1036. l2 = BITMASK(ecx, 16, 0xffff);
  1037. #ifndef ARCH_X86
  1038. if (l2 <= 0) {
  1039. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1040. return 256;
  1041. }
  1042. return l2;
  1043. #else
  1044. if (l2 > 0) return l2;
  1045. return get_l2_size_old();
  1046. #endif
  1047. }
  1048. static __inline__ int get_l3_size(void){
  1049. int eax, ebx, ecx, edx;
  1050. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1051. return BITMASK(edx, 18, 0x3fff) * 512;
  1052. }
  1053. static void init_parameter(void) {
  1054. int l2 = get_l2_size();
  1055. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1056. /* where the GEMM unrolling parameters do not depend on l2 */
  1057. #ifdef BUILD_BFLOAT16
  1058. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1059. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1060. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1061. #endif
  1062. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1063. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1064. #endif
  1065. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1066. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1067. #endif
  1068. #if BUILD_COMPLEX == 1
  1069. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1070. #endif
  1071. #if BUILD_COMPLEX16==1
  1072. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1073. #endif
  1074. #if BUILD_COMPLEX == 1
  1075. #ifdef CGEMM3M_DEFAULT_Q
  1076. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1077. #else
  1078. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1079. #endif
  1080. #endif
  1081. #if BUILD_COMPLEX16 == 1
  1082. #ifdef ZGEMM3M_DEFAULT_Q
  1083. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1084. #else
  1085. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1086. #endif
  1087. #endif
  1088. #ifdef EXPRECISION
  1089. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1090. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1091. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1092. #endif
  1093. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1094. #ifdef DEBUG
  1095. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1096. #endif
  1097. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1098. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1099. #endif
  1100. #if BUILD_DOUBLE == 1
  1101. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1102. #endif
  1103. #if BUILD_COMPLEX==1
  1104. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1105. #endif
  1106. #if BUILD_COMPLEX16==1
  1107. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1108. #endif
  1109. #ifdef EXPRECISION
  1110. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1111. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1112. #endif
  1113. #endif
  1114. #ifdef CORE_NORTHWOOD
  1115. #ifdef DEBUG
  1116. fprintf(stderr, "Northwood\n");
  1117. #endif
  1118. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1119. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1120. #endif
  1121. #if BUILD_DOUBLE == 1
  1122. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1123. #endif
  1124. #if BUILD_COMPLEX==1
  1125. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1126. #endif
  1127. #if BUILD_COMPLEX16==1
  1128. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1129. #endif
  1130. #ifdef EXPRECISION
  1131. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1132. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1133. #endif
  1134. #endif
  1135. #ifdef ATOM
  1136. #ifdef DEBUG
  1137. fprintf(stderr, "Atom\n");
  1138. #endif
  1139. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1140. TABLE_NAME.sgemm_p = 256;
  1141. #endif
  1142. #if BUILD_DOUBLE ==1
  1143. TABLE_NAME.dgemm_p = 128;
  1144. #endif
  1145. #if BUILD_COMPLEX==1
  1146. TABLE_NAME.cgemm_p = 128;
  1147. #endif
  1148. #if BUILD_COMPLEX16==1
  1149. TABLE_NAME.zgemm_p = 64;
  1150. #endif
  1151. #ifdef EXPRECISION
  1152. TABLE_NAME.qgemm_p = 64;
  1153. TABLE_NAME.xgemm_p = 32;
  1154. #endif
  1155. #endif
  1156. #ifdef CORE_PRESCOTT
  1157. #ifdef DEBUG
  1158. fprintf(stderr, "Prescott\n");
  1159. #endif
  1160. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1161. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1162. #endif
  1163. #if BUILD_DOUBLE ==1
  1164. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1165. #endif
  1166. #if BUILD_COMPLEX==1
  1167. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1168. #endif
  1169. #if BUILD_COMPLEX16 == 1
  1170. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1171. #endif
  1172. #ifdef EXPRECISION
  1173. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1174. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1175. #endif
  1176. #endif
  1177. #ifdef CORE2
  1178. #ifdef DEBUG
  1179. fprintf(stderr, "Core2\n");
  1180. #endif
  1181. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1182. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1183. #endif
  1184. #if BUILD_DOUBLE==1
  1185. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1186. #endif
  1187. #if BUILD_COMPLEX==1
  1188. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1189. #endif
  1190. #if BUILD_COMPLEX16==1
  1191. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1192. #endif
  1193. #ifdef EXPRECISION
  1194. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1195. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1196. #endif
  1197. #endif
  1198. #ifdef PENRYN
  1199. #ifdef DEBUG
  1200. fprintf(stderr, "Penryn\n");
  1201. #endif
  1202. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1203. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1204. #endif
  1205. #if BUILD_DOUBLE == 1
  1206. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1207. #endif
  1208. #if BUILD_COMPLEX==1
  1209. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1210. #endif
  1211. #if BUILD_COMPLEX16==1
  1212. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1213. #endif
  1214. #ifdef EXPRECISION
  1215. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1216. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1217. #endif
  1218. #endif
  1219. #ifdef DUNNINGTON
  1220. #ifdef DEBUG
  1221. fprintf(stderr, "Dunnington\n");
  1222. #endif
  1223. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1224. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1225. #endif
  1226. #if BUILD_DOUBLE ==1
  1227. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1228. #endif
  1229. #if BUILD_COMPLEX==1
  1230. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1231. #endif
  1232. #if BUILD_COMPLEX16==1
  1233. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1234. #endif
  1235. #ifdef EXPRECISION
  1236. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1237. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1238. #endif
  1239. #endif
  1240. #ifdef NEHALEM
  1241. #ifdef DEBUG
  1242. fprintf(stderr, "Nehalem\n");
  1243. #endif
  1244. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1245. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1246. #endif
  1247. #if BUILD_DOUBLE
  1248. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1249. #endif
  1250. #if BUILD_COMPLEX
  1251. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1252. #endif
  1253. #if BUILD_COMPLEX16
  1254. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1255. #endif
  1256. #ifdef EXPRECISION
  1257. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1258. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1259. #endif
  1260. #endif
  1261. #ifdef SANDYBRIDGE
  1262. #ifdef DEBUG
  1263. fprintf(stderr, "Sandybridge\n");
  1264. #endif
  1265. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1266. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1267. #endif
  1268. #if BUILD_DOUBLE
  1269. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1270. #endif
  1271. #if BUILD_COMPLEX
  1272. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1273. #endif
  1274. #if BUILD_COMPLEX16
  1275. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1276. #endif
  1277. #ifdef EXPRECISION
  1278. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1279. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1280. #endif
  1281. #endif
  1282. #ifdef HASWELL
  1283. #ifdef DEBUG
  1284. fprintf(stderr, "Haswell\n");
  1285. #endif
  1286. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1287. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1288. #endif
  1289. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1290. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1291. #endif
  1292. #if BUILD_COMPLEX
  1293. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1294. #endif
  1295. #if BUILD_COMPLEX16
  1296. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1297. #endif
  1298. #ifdef EXPRECISION
  1299. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1300. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1301. #endif
  1302. #endif
  1303. #if defined(SKYLAKEX) || defined(COOPERLAKE)
  1304. #ifdef DEBUG
  1305. fprintf(stderr, "SkylakeX\n");
  1306. #endif
  1307. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1308. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1309. #endif
  1310. #if BUILD_DOUBLE
  1311. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1312. #endif
  1313. #if BUILD_COMPLEX
  1314. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1315. #endif
  1316. #if BUILD_COMPLEX16
  1317. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1318. #endif
  1319. #ifdef EXPRECISION
  1320. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1321. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1322. #endif
  1323. #endif
  1324. #ifdef OPTERON
  1325. #ifdef DEBUG
  1326. fprintf(stderr, "Opteron\n");
  1327. #endif
  1328. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1329. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1330. #endif
  1331. #if BUILD_DOUBLE
  1332. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1333. #endif
  1334. #if BUILD_COMPLEX
  1335. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1336. #endif
  1337. #if BUILD_COMPLEX16
  1338. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1339. #endif
  1340. #ifdef EXPRECISION
  1341. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1342. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1343. #endif
  1344. #endif
  1345. #ifdef BARCELONA
  1346. #ifdef DEBUG
  1347. fprintf(stderr, "Barcelona\n");
  1348. #endif
  1349. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1350. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1351. #endif
  1352. #if BUILD_DOUBLE
  1353. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1354. #endif
  1355. #if BUILD_COMPLEX
  1356. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1357. #endif
  1358. #if BUILD_COMPLEX16
  1359. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1360. #endif
  1361. #ifdef EXPRECISION
  1362. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1363. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1364. #endif
  1365. #endif
  1366. #ifdef BOBCAT
  1367. #ifdef DEBUG
  1368. fprintf(stderr, "Bobcate\n");
  1369. #endif
  1370. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1371. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1372. #endif
  1373. #if BUILD_DOUBLE
  1374. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1375. #endif
  1376. #if BUILD_COMPLEX
  1377. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1378. #endif
  1379. #if BUILD_COMPLEX16
  1380. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1381. #endif
  1382. #ifdef EXPRECISION
  1383. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1384. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1385. #endif
  1386. #endif
  1387. #ifdef BULLDOZER
  1388. #ifdef DEBUG
  1389. fprintf(stderr, "Bulldozer\n");
  1390. #endif
  1391. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1392. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1393. #endif
  1394. #if BUILD_DOUBLE
  1395. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1396. #endif
  1397. #if BUILD_COMPLEX
  1398. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1399. #endif
  1400. #if BUILD_COMPLEX16
  1401. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1402. #endif
  1403. #ifdef EXPRECISION
  1404. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1405. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1406. #endif
  1407. #endif
  1408. #ifdef EXCAVATOR
  1409. #ifdef DEBUG
  1410. fprintf(stderr, "Excavator\n");
  1411. #endif
  1412. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1413. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1414. #endif
  1415. #if BUILD_DOUBLE
  1416. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1417. #endif
  1418. #if BUILD_COMPLEX
  1419. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1420. #endif
  1421. #if BUILD_COMPLEX16
  1422. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1423. #endif
  1424. #ifdef EXPRECISION
  1425. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1426. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1427. #endif
  1428. #endif
  1429. #ifdef PILEDRIVER
  1430. #ifdef DEBUG
  1431. fprintf(stderr, "Piledriver\n");
  1432. #endif
  1433. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1434. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1435. #endif
  1436. #if BUILD_DOUBLE
  1437. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1438. #endif
  1439. #if BUILD_COMPLEX
  1440. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1441. #endif
  1442. #if BUILD_COMPLEX16
  1443. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1444. #endif
  1445. #ifdef EXPRECISION
  1446. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1447. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1448. #endif
  1449. #endif
  1450. #ifdef STEAMROLLER
  1451. #ifdef DEBUG
  1452. fprintf(stderr, "Steamroller\n");
  1453. #endif
  1454. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1455. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1456. #endif
  1457. #if BUILD_DOUBLE
  1458. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1459. #endif
  1460. #if BUILD_COMPLEX
  1461. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1462. #endif
  1463. #if BUILD_COMPLEX16
  1464. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1465. #endif
  1466. #ifdef EXPRECISION
  1467. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1468. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1469. #endif
  1470. #endif
  1471. #ifdef ZEN
  1472. #ifdef DEBUG
  1473. fprintf(stderr, "Zen\n");
  1474. #endif
  1475. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1476. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1477. #endif
  1478. #if BUILD_DOUBLE
  1479. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1480. #endif
  1481. #if BUILD_COMPLEX
  1482. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1483. #endif
  1484. #if BUILD_COMPLEX16
  1485. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1486. #endif
  1487. #ifdef EXPRECISION
  1488. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1489. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1490. #endif
  1491. #endif
  1492. #ifdef NANO
  1493. #ifdef DEBUG
  1494. fprintf(stderr, "NANO\n");
  1495. #endif
  1496. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1497. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1498. #endif
  1499. #if (BUILD_DOUBLE==1)
  1500. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1501. #endif
  1502. #if (BUILD_COMPLEX==1)
  1503. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1504. #endif
  1505. #if (BUILD_COMPLEX16==1)
  1506. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1507. #endif
  1508. #ifdef EXPRECISION
  1509. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1510. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1511. #endif
  1512. #endif
  1513. #if BUILD_COMPLEX==1
  1514. #ifdef CGEMM3M_DEFAULT_P
  1515. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1516. #else
  1517. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1518. #endif
  1519. #endif
  1520. #if BUILD_COMPLEX16==1
  1521. #ifdef ZGEMM3M_DEFAULT_P
  1522. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1523. #else
  1524. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1525. #endif
  1526. #endif
  1527. #ifdef EXPRECISION
  1528. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1529. #endif
  1530. #if BUILD_SINGLE == 1
  1531. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1532. #endif
  1533. #if BUILD_DOUBLE== 1
  1534. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1535. #endif
  1536. #if BUILD_COMPLEX==1
  1537. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1538. #endif
  1539. #if BUILD_COMPLEX16==1
  1540. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1541. #endif
  1542. #if BUILD_COMPLEX==1
  1543. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1544. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1545. #else
  1546. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1547. #endif
  1548. #endif
  1549. #if BUILD_COMPLEX16==1
  1550. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1551. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1552. #else
  1553. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1554. #endif
  1555. #endif
  1556. #ifdef QUAD_PRECISION
  1557. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1558. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1559. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1560. #endif
  1561. #ifdef DEBUG
  1562. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1563. #endif
  1564. #if BUILD_SINGLE==1
  1565. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1566. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1567. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1568. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1569. #endif
  1570. #if BUILD_DOUBLE==1
  1571. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1572. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1573. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1574. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1575. #endif
  1576. #ifdef EXPRECISION
  1577. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1578. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1579. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1580. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1581. #endif
  1582. #if BUILD_COMPLEX ==1
  1583. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1584. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1585. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1586. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1587. #endif
  1588. #if BUILD_COMPLEX16 ==1
  1589. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1590. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1591. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1592. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1593. #endif
  1594. #if BUILD_COMPLEX == 1
  1595. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1596. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1597. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1598. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1599. #endif
  1600. #if BUILD_COMPLEX16 == 1
  1601. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1602. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1603. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1604. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1605. #endif
  1606. #ifdef EXPRECISION
  1607. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1608. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1609. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1610. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1611. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1612. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1613. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1614. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1615. #endif
  1616. }
  1617. #endif //POWER
  1618. #endif //ZARCH
  1619. #endif //(ARCH_MIPS64)
  1620. #endif //(ARCH_ARM64)