You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 54 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. sbgemm_kernelTS, sbgemm_betaTS,
  66. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  67. sbgemm_incopyTS, sbgemm_itcopyTS,
  68. #else
  69. sbgemm_oncopyTS, sbgemm_otcopyTS,
  70. #endif
  71. sbgemm_oncopyTS, sbgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #ifdef SMALL_MATRIX_OPT
  104. sbgemm_small_matrix_permitTS,
  105. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  106. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  107. #endif
  108. #endif
  109. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  110. 0, 0, 0,
  111. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  112. #ifdef SGEMM_DEFAULT_UNROLL_MN
  113. SGEMM_DEFAULT_UNROLL_MN,
  114. #else
  115. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  116. #endif
  117. #endif
  118. #ifdef HAVE_EXCLUSIVE_CACHE
  119. 1,
  120. #else
  121. 0,
  122. #endif
  123. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  124. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  125. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  126. snrm2_kTS, sasum_kTS,
  127. #endif
  128. #if BUILD_SINGLE == 1
  129. ssum_kTS,
  130. #endif
  131. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  132. scopy_kTS, sdot_kTS,
  133. // dsdot_kTS,
  134. srot_kTS, saxpy_kTS,
  135. #endif
  136. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  137. sscal_kTS,
  138. #endif
  139. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  140. sswap_kTS,
  141. sgemv_nTS, sgemv_tTS,
  142. #endif
  143. #if BUILD_SINGLE == 1
  144. sger_kTS,
  145. ssymv_LTS, ssymv_UTS,
  146. #endif
  147. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  148. #ifdef ARCH_X86_64
  149. sgemm_directTS,
  150. sgemm_direct_performantTS,
  151. #endif
  152. sgemm_kernelTS, sgemm_betaTS,
  153. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  154. sgemm_incopyTS, sgemm_itcopyTS,
  155. #else
  156. sgemm_oncopyTS, sgemm_otcopyTS,
  157. #endif
  158. sgemm_oncopyTS, sgemm_otcopyTS,
  159. #endif
  160. #if BUILD_SINGLE == 1
  161. #ifdef SMALL_MATRIX_OPT
  162. sgemm_small_matrix_permitTS,
  163. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  164. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  165. #endif
  166. #endif
  167. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  168. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  169. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  170. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  171. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  172. #else
  173. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  174. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  175. #endif
  176. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  177. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  178. #endif
  179. #if BUILD_SINGLE == 1
  180. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  181. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  182. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  183. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  184. #else
  185. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  186. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  187. #endif
  188. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  189. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  190. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  191. ssymm_iutcopyTS, ssymm_iltcopyTS,
  192. #else
  193. ssymm_outcopyTS, ssymm_oltcopyTS,
  194. #endif
  195. ssymm_outcopyTS, ssymm_oltcopyTS,
  196. #endif
  197. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  198. #ifndef NO_LAPACK
  199. sneg_tcopyTS, slaswp_ncopyTS,
  200. #else
  201. NULL,NULL,
  202. #endif
  203. #endif
  204. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  205. 0, 0, 0,
  206. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  207. #ifdef DGEMM_DEFAULT_UNROLL_MN
  208. DGEMM_DEFAULT_UNROLL_MN,
  209. #else
  210. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  211. #endif
  212. #endif
  213. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  214. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  215. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  216. dnrm2_kTS, dasum_kTS,
  217. #endif
  218. #if (BUILD_DOUBLE==1)
  219. dsum_kTS,
  220. #endif
  221. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  222. dcopy_kTS, ddot_kTS,
  223. #endif
  224. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  225. dsdot_kTS,
  226. #endif
  227. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  228. drot_kTS,
  229. daxpy_kTS,
  230. dscal_kTS,
  231. dswap_kTS,
  232. dgemv_nTS, dgemv_tTS,
  233. #endif
  234. #if (BUILD_DOUBLE==1)
  235. dger_kTS,
  236. dsymv_LTS, dsymv_UTS,
  237. #endif
  238. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  239. dgemm_kernelTS, dgemm_betaTS,
  240. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  241. dgemm_incopyTS, dgemm_itcopyTS,
  242. #else
  243. dgemm_oncopyTS, dgemm_otcopyTS,
  244. #endif
  245. dgemm_oncopyTS, dgemm_otcopyTS,
  246. #endif
  247. #if (BUILD_DOUBLE==1)
  248. #ifdef SMALL_MATRIX_OPT
  249. dgemm_small_matrix_permitTS,
  250. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  251. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  252. #endif
  253. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  254. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  255. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  256. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  257. #else
  258. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  259. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  260. #endif
  261. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  262. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  263. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  264. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  265. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  266. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  267. #else
  268. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  269. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  270. #endif
  271. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  272. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  273. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  274. dsymm_iutcopyTS, dsymm_iltcopyTS,
  275. #else
  276. dsymm_outcopyTS, dsymm_oltcopyTS,
  277. #endif
  278. dsymm_outcopyTS, dsymm_oltcopyTS,
  279. #ifndef NO_LAPACK
  280. dneg_tcopyTS, dlaswp_ncopyTS,
  281. #else
  282. NULL, NULL,
  283. #endif
  284. #endif
  285. #ifdef EXPRECISION
  286. 0, 0, 0,
  287. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  288. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  289. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  290. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  291. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  292. qgemv_nTS, qgemv_tTS, qger_kTS,
  293. qsymv_LTS, qsymv_UTS,
  294. qgemm_kernelTS, qgemm_betaTS,
  295. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  296. qgemm_incopyTS, qgemm_itcopyTS,
  297. #else
  298. qgemm_oncopyTS, qgemm_otcopyTS,
  299. #endif
  300. qgemm_oncopyTS, qgemm_otcopyTS,
  301. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  302. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  303. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  304. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  305. #else
  306. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  307. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  308. #endif
  309. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  310. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  311. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  312. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  313. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  314. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  315. #else
  316. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  317. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  318. #endif
  319. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  320. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  321. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  322. qsymm_iutcopyTS, qsymm_iltcopyTS,
  323. #else
  324. qsymm_outcopyTS, qsymm_oltcopyTS,
  325. #endif
  326. qsymm_outcopyTS, qsymm_oltcopyTS,
  327. #ifndef NO_LAPACK
  328. qneg_tcopyTS, qlaswp_ncopyTS,
  329. #else
  330. NULL, NULL,
  331. #endif
  332. #endif
  333. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  334. 0, 0, 0,
  335. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  336. #ifdef CGEMM_DEFAULT_UNROLL_MN
  337. CGEMM_DEFAULT_UNROLL_MN,
  338. #else
  339. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  340. #endif
  341. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  342. #endif
  343. #if (BUILD_COMPLEX)
  344. cnrm2_kTS, casum_kTS, csum_kTS,
  345. #endif
  346. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  347. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  348. #endif
  349. #if (BUILD_COMPLEX)
  350. csrot_kTS,
  351. #endif
  352. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  353. caxpy_kTS,
  354. caxpyc_kTS,
  355. cscal_kTS,
  356. cswap_kTS,
  357. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  358. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  359. #endif
  360. #if (BUILD_COMPLEX)
  361. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  362. csymv_LTS, csymv_UTS,
  363. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  364. #endif
  365. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  366. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  367. cgemm_betaTS,
  368. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  369. cgemm_incopyTS, cgemm_itcopyTS,
  370. #else
  371. cgemm_oncopyTS, cgemm_otcopyTS,
  372. #endif
  373. cgemm_oncopyTS, cgemm_otcopyTS,
  374. #ifdef SMALL_MATRIX_OPT
  375. cgemm_small_matrix_permitTS,
  376. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  377. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  378. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  379. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  380. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  381. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  382. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  383. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  384. #endif
  385. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  386. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  387. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  388. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  389. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  390. #else
  391. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  392. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  393. #endif
  394. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  395. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  396. #endif
  397. #if (BUILD_COMPLEX)
  398. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  399. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  400. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  401. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  402. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  403. #else
  404. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  405. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  406. #endif
  407. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  408. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  409. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  410. csymm_iutcopyTS, csymm_iltcopyTS,
  411. #else
  412. csymm_outcopyTS, csymm_oltcopyTS,
  413. #endif
  414. csymm_outcopyTS, csymm_oltcopyTS,
  415. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  416. chemm_iutcopyTS, chemm_iltcopyTS,
  417. #else
  418. chemm_outcopyTS, chemm_oltcopyTS,
  419. #endif
  420. chemm_outcopyTS, chemm_oltcopyTS,
  421. 0, 0, 0,
  422. #if (USE_GEMM3M)
  423. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  424. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  425. #else
  426. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  427. #endif
  428. cgemm3m_kernelTS,
  429. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  430. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  431. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  432. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  433. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  434. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  435. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  436. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  437. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  438. csymm3m_oucopybTS, csymm3m_olcopybTS,
  439. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  440. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  441. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  442. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  443. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  444. chemm3m_oucopybTS, chemm3m_olcopybTS,
  445. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  446. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  447. #else
  448. 0, 0, 0,
  449. NULL,
  450. NULL, NULL,
  451. NULL, NULL,
  452. NULL, NULL,
  453. NULL, NULL,
  454. NULL, NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. #endif
  469. #endif
  470. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  471. #ifndef NO_LAPACK
  472. cneg_tcopyTS,
  473. claswp_ncopyTS,
  474. #else
  475. NULL, NULL,
  476. #endif
  477. #endif
  478. #if BUILD_COMPLEX16 == 1
  479. 0, 0, 0,
  480. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  481. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  482. ZGEMM_DEFAULT_UNROLL_MN,
  483. #else
  484. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  485. #endif
  486. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  487. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  488. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  489. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  490. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  491. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  492. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  493. zsymv_LTS, zsymv_UTS,
  494. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  495. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  496. zgemm_betaTS,
  497. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  498. zgemm_incopyTS, zgemm_itcopyTS,
  499. #else
  500. zgemm_oncopyTS, zgemm_otcopyTS,
  501. #endif
  502. zgemm_oncopyTS, zgemm_otcopyTS,
  503. #ifdef SMALL_MATRIX_OPT
  504. zgemm_small_matrix_permitTS,
  505. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  506. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  507. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  508. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  509. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  510. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  511. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  512. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  513. #endif
  514. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  515. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  516. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  517. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  518. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  519. #else
  520. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  521. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  522. #endif
  523. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  524. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  525. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  526. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  527. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  528. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  529. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  530. #else
  531. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  532. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  533. #endif
  534. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  535. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  536. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  537. zsymm_iutcopyTS, zsymm_iltcopyTS,
  538. #else
  539. zsymm_outcopyTS, zsymm_oltcopyTS,
  540. #endif
  541. zsymm_outcopyTS, zsymm_oltcopyTS,
  542. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  543. zhemm_iutcopyTS, zhemm_iltcopyTS,
  544. #else
  545. zhemm_outcopyTS, zhemm_oltcopyTS,
  546. #endif
  547. zhemm_outcopyTS, zhemm_oltcopyTS,
  548. 0, 0, 0,
  549. #if (USE_GEMM3M)
  550. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  551. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  552. #else
  553. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  554. #endif
  555. zgemm3m_kernelTS,
  556. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  557. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  558. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  559. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  560. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  561. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  562. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  563. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  564. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  565. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  566. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  567. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  568. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  569. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  570. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  571. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  572. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  573. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  574. #else
  575. 0, 0, 0,
  576. NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. NULL, NULL,
  586. NULL, NULL,
  587. NULL, NULL,
  588. NULL, NULL,
  589. NULL, NULL,
  590. NULL, NULL,
  591. NULL, NULL,
  592. NULL, NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. #endif
  596. #ifndef NO_LAPACK
  597. zneg_tcopyTS, zlaswp_ncopyTS,
  598. #else
  599. NULL, NULL,
  600. #endif
  601. #endif
  602. #ifdef EXPRECISION
  603. 0, 0, 0,
  604. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  605. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  606. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  607. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  608. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  609. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  610. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  611. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  612. xsymv_LTS, xsymv_UTS,
  613. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  614. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  615. xgemm_betaTS,
  616. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  617. xgemm_incopyTS, xgemm_itcopyTS,
  618. #else
  619. xgemm_oncopyTS, xgemm_otcopyTS,
  620. #endif
  621. xgemm_oncopyTS, xgemm_otcopyTS,
  622. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  623. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  624. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  625. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  626. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  627. #else
  628. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  629. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  630. #endif
  631. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  632. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  633. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  634. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  635. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  636. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  637. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  638. #else
  639. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  640. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  641. #endif
  642. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  643. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  644. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  645. xsymm_iutcopyTS, xsymm_iltcopyTS,
  646. #else
  647. xsymm_outcopyTS, xsymm_oltcopyTS,
  648. #endif
  649. xsymm_outcopyTS, xsymm_oltcopyTS,
  650. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  651. xhemm_iutcopyTS, xhemm_iltcopyTS,
  652. #else
  653. xhemm_outcopyTS, xhemm_oltcopyTS,
  654. #endif
  655. xhemm_outcopyTS, xhemm_oltcopyTS,
  656. 0, 0, 0,
  657. #if (USE_GEMM3M)
  658. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  659. xgemm3m_kernelTS,
  660. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  661. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  662. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  663. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  664. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  665. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  666. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  667. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  668. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  669. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  670. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  671. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  672. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  673. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  674. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  675. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  676. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  677. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  678. #else
  679. 0, 0, 0,
  680. NULL,
  681. NULL, NULL,
  682. NULL, NULL,
  683. NULL, NULL,
  684. NULL, NULL,
  685. NULL, NULL,
  686. NULL, NULL,
  687. NULL, NULL,
  688. NULL, NULL,
  689. NULL, NULL,
  690. NULL, NULL,
  691. NULL, NULL,
  692. NULL, NULL,
  693. NULL, NULL,
  694. NULL, NULL,
  695. NULL, NULL,
  696. NULL, NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. #endif
  700. #ifndef NO_LAPACK
  701. xneg_tcopyTS, xlaswp_ncopyTS,
  702. #else
  703. NULL, NULL,
  704. #endif
  705. #endif
  706. init_parameter,
  707. SNUMOPT, DNUMOPT, QNUMOPT,
  708. #if BUILD_SINGLE == 1
  709. saxpby_kTS,
  710. #endif
  711. #if BUILD_DOUBLE == 1
  712. daxpby_kTS,
  713. #endif
  714. #if BUILD_COMPLEX == 1
  715. caxpby_kTS,
  716. #endif
  717. #if BUILD_COMPLEX16== 1
  718. zaxpby_kTS,
  719. #endif
  720. #if BUILD_SINGLE == 1
  721. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  722. #endif
  723. #if BUILD_DOUBLE== 1
  724. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  725. #endif
  726. #if BUILD_COMPLEX == 1
  727. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  728. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  729. #endif
  730. #if BUILD_COMPLEX16 == 1
  731. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  732. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  733. #endif
  734. #if BUILD_SINGLE == 1
  735. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  736. #endif
  737. #if BUILD_DOUBLE== 1
  738. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  739. #endif
  740. #if BUILD_COMPLEX== 1
  741. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  742. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  743. #endif
  744. #if BUILD_COMPLEX16==1
  745. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  746. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  747. #endif
  748. #if BUILD_SINGLE == 1
  749. sgeadd_kTS,
  750. #endif
  751. #if BUILD_DOUBLE==1
  752. dgeadd_kTS,
  753. #endif
  754. #if BUILD_COMPLEX==1
  755. cgeadd_kTS,
  756. #endif
  757. #if BUILD_COMPLEX16==1
  758. zgeadd_kTS,
  759. #endif
  760. 0, // padding_k
  761. };
  762. #if (ARCH_ARM64)
  763. static void init_parameter(void) {
  764. #if (BUILD_BFLOAT16)
  765. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  766. #endif
  767. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  768. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  769. #endif
  770. #if BUILD_DOUBLE == 1
  771. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  772. #endif
  773. #if BUILD_COMPLEX==1
  774. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  775. #endif
  776. #if BUILD_COMPLEX16==1
  777. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  778. #endif
  779. #if (BUILD_BFLOAT16)
  780. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  781. #endif
  782. #if BUILD_SINGLE == 1
  783. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  784. #endif
  785. #if BUILD_DOUBLE== 1
  786. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  787. #endif
  788. #if BUILD_COMPLEX== 1
  789. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  790. #endif
  791. #if BUILD_COMPLEX16==1
  792. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  793. #endif
  794. #if (BUILD_BFLOAT16)
  795. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  796. #endif
  797. #if BUILD_SINGLE == 1
  798. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  799. #endif
  800. #if BUILD_DOUBLE==1
  801. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  802. #endif
  803. #if BUILD_COMPLEX==1
  804. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  805. #endif
  806. #if BUILD_COMPLEX16==1
  807. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  808. #endif
  809. #ifdef EXPRECISION
  810. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  811. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  812. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  813. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  814. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  815. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  816. #endif
  817. #if (USE_GEMM3M)
  818. #ifdef CGEMM3M_DEFAULT_P
  819. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  820. #else
  821. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  822. #endif
  823. #ifdef ZGEMM3M_DEFAULT_P
  824. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  825. #else
  826. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  827. #endif
  828. #ifdef CGEMM3M_DEFAULT_Q
  829. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  830. #else
  831. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  832. #endif
  833. #ifdef ZGEMM3M_DEFAULT_Q
  834. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  835. #else
  836. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  837. #endif
  838. #ifdef CGEMM3M_DEFAULT_R
  839. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  840. #else
  841. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  842. #endif
  843. #ifdef ZGEMM3M_DEFAULT_R
  844. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  845. #else
  846. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  847. #endif
  848. #ifdef EXPRECISION
  849. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  850. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  851. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  852. #endif
  853. #endif
  854. #if defined(NEOVERSEN2) && BUILD_BFLOAT16 == 1
  855. TABLE_NAME.align_k = 4;
  856. #else
  857. TABLE_NAME.align_k = 1;
  858. #endif
  859. }
  860. #else // (ARCH_ARM64)
  861. #if defined(ARCH_MIPS64)
  862. static void init_parameter(void) {
  863. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  864. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  865. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  866. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  867. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  868. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  869. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  870. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  871. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  872. TABLE_NAME.dgemm_r = 640;
  873. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  874. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  875. #ifdef EXPRECISION
  876. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  877. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  878. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  879. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  880. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  881. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  882. #endif
  883. #if defined(USE_GEMM3M)
  884. #ifdef CGEMM3M_DEFAULT_P
  885. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  886. #else
  887. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  888. #endif
  889. #ifdef ZGEMM3M_DEFAULT_P
  890. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  891. #else
  892. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  893. #endif
  894. #ifdef CGEMM3M_DEFAULT_Q
  895. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  896. #else
  897. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  898. #endif
  899. #ifdef ZGEMM3M_DEFAULT_Q
  900. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  901. #else
  902. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  903. #endif
  904. #ifdef CGEMM3M_DEFAULT_R
  905. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  906. #else
  907. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  908. #endif
  909. #ifdef ZGEMM3M_DEFAULT_R
  910. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  911. #else
  912. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  913. #endif
  914. #ifdef EXPRECISION
  915. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  916. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  917. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  918. #endif
  919. #endif
  920. }
  921. #else // (ARCH_MIPS64)
  922. #if (ARCH_LOONGARCH64)
  923. static void init_parameter(void) {
  924. #ifdef BUILD_BFLOAT16
  925. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  926. #endif
  927. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  928. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  929. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  930. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  931. #ifdef BUILD_BFLOAT16
  932. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  933. #endif
  934. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  935. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  936. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  937. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  938. #ifdef BUILD_BFLOAT16
  939. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  940. #endif
  941. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  942. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  943. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  944. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  945. }
  946. #else // (ARCH_LOONGARCH64)
  947. #if (ARCH_POWER)
  948. static void init_parameter(void) {
  949. #ifdef BUILD_BFLOAT16
  950. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  951. #endif
  952. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  953. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  954. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  955. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  956. #ifdef BUILD_BFLOAT16
  957. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  958. #endif
  959. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  960. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  961. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  962. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  963. #ifdef BUILD_BFLOAT16
  964. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  965. #endif
  966. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  967. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  968. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  969. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  970. }
  971. #else //POWER
  972. #if (ARCH_ZARCH)
  973. static void init_parameter(void) {
  974. #ifdef BUILD_BFLOAT16
  975. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  976. #endif
  977. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  978. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  979. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  980. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  981. #ifdef BUILD_BFLOAT16
  982. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  983. #endif
  984. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  985. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  986. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  987. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  988. #ifdef BUILD_BFLOAT16
  989. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  990. #endif
  991. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  992. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  993. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  994. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  995. }
  996. #else //ZARCH
  997. #ifdef ARCH_X86
  998. static int get_l2_size_old(void){
  999. int i, eax, ebx, ecx, edx, cpuid_level;
  1000. int info[15];
  1001. cpuid(2, &eax, &ebx, &ecx, &edx);
  1002. info[ 0] = BITMASK(eax, 8, 0xff);
  1003. info[ 1] = BITMASK(eax, 16, 0xff);
  1004. info[ 2] = BITMASK(eax, 24, 0xff);
  1005. info[ 3] = BITMASK(ebx, 0, 0xff);
  1006. info[ 4] = BITMASK(ebx, 8, 0xff);
  1007. info[ 5] = BITMASK(ebx, 16, 0xff);
  1008. info[ 6] = BITMASK(ebx, 24, 0xff);
  1009. info[ 7] = BITMASK(ecx, 0, 0xff);
  1010. info[ 8] = BITMASK(ecx, 8, 0xff);
  1011. info[ 9] = BITMASK(ecx, 16, 0xff);
  1012. info[10] = BITMASK(ecx, 24, 0xff);
  1013. info[11] = BITMASK(edx, 0, 0xff);
  1014. info[12] = BITMASK(edx, 8, 0xff);
  1015. info[13] = BITMASK(edx, 16, 0xff);
  1016. info[14] = BITMASK(edx, 24, 0xff);
  1017. for (i = 0; i < 15; i++){
  1018. switch (info[i]){
  1019. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1020. case 0x1a :
  1021. return 96;
  1022. case 0x39 :
  1023. case 0x3b :
  1024. case 0x41 :
  1025. case 0x79 :
  1026. case 0x81 :
  1027. return 128;
  1028. case 0x3a :
  1029. return 192;
  1030. case 0x21 :
  1031. case 0x3c :
  1032. case 0x42 :
  1033. case 0x7a :
  1034. case 0x7e :
  1035. case 0x82 :
  1036. return 256;
  1037. case 0x3d :
  1038. return 384;
  1039. case 0x3e :
  1040. case 0x43 :
  1041. case 0x7b :
  1042. case 0x7f :
  1043. case 0x83 :
  1044. case 0x86 :
  1045. return 512;
  1046. case 0x44 :
  1047. case 0x78 :
  1048. case 0x7c :
  1049. case 0x84 :
  1050. case 0x87 :
  1051. return 1024;
  1052. case 0x45 :
  1053. case 0x7d :
  1054. case 0x85 :
  1055. return 2048;
  1056. case 0x48 :
  1057. return 3184;
  1058. case 0x49 :
  1059. return 4096;
  1060. case 0x4e :
  1061. return 6144;
  1062. }
  1063. }
  1064. // return 0;
  1065. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1066. return 256;
  1067. }
  1068. #endif
  1069. static __inline__ int get_l2_size(void){
  1070. int eax, ebx, ecx, edx, l2;
  1071. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1072. l2 = BITMASK(ecx, 16, 0xffff);
  1073. #ifndef ARCH_X86
  1074. if (l2 <= 0) {
  1075. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1076. return 256;
  1077. }
  1078. return l2;
  1079. #else
  1080. if (l2 > 0) return l2;
  1081. return get_l2_size_old();
  1082. #endif
  1083. }
  1084. static __inline__ int get_l3_size(void){
  1085. int eax, ebx, ecx, edx;
  1086. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1087. return BITMASK(edx, 18, 0x3fff) * 512;
  1088. }
  1089. static void init_parameter(void) {
  1090. int l2 = get_l2_size();
  1091. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1092. /* where the GEMM unrolling parameters do not depend on l2 */
  1093. #ifdef BUILD_BFLOAT16
  1094. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1095. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1096. #endif
  1097. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1098. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1099. #endif
  1100. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1101. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1102. #endif
  1103. #if BUILD_COMPLEX == 1
  1104. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1105. #endif
  1106. #if BUILD_COMPLEX16==1
  1107. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1108. #endif
  1109. #if BUILD_COMPLEX == 1
  1110. #ifdef CGEMM3M_DEFAULT_Q
  1111. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1112. #else
  1113. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1114. #endif
  1115. #endif
  1116. #if BUILD_COMPLEX16 == 1
  1117. #ifdef ZGEMM3M_DEFAULT_Q
  1118. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1119. #else
  1120. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1121. #endif
  1122. #endif
  1123. #ifdef EXPRECISION
  1124. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1125. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1126. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1127. #endif
  1128. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1129. #ifdef DEBUG
  1130. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1131. #endif
  1132. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1133. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1134. #endif
  1135. #if BUILD_DOUBLE == 1
  1136. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1137. #endif
  1138. #if BUILD_COMPLEX==1
  1139. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1140. #endif
  1141. #if BUILD_COMPLEX16==1
  1142. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1143. #endif
  1144. #ifdef EXPRECISION
  1145. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1146. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1147. #endif
  1148. #endif
  1149. #ifdef CORE_NORTHWOOD
  1150. #ifdef DEBUG
  1151. fprintf(stderr, "Northwood\n");
  1152. #endif
  1153. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1154. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1155. #endif
  1156. #if BUILD_DOUBLE == 1
  1157. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1158. #endif
  1159. #if BUILD_COMPLEX==1
  1160. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1161. #endif
  1162. #if BUILD_COMPLEX16==1
  1163. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1164. #endif
  1165. #ifdef EXPRECISION
  1166. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1167. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1168. #endif
  1169. #endif
  1170. #ifdef ATOM
  1171. #ifdef DEBUG
  1172. fprintf(stderr, "Atom\n");
  1173. #endif
  1174. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1175. TABLE_NAME.sgemm_p = 256;
  1176. #endif
  1177. #if BUILD_DOUBLE ==1
  1178. TABLE_NAME.dgemm_p = 128;
  1179. #endif
  1180. #if BUILD_COMPLEX==1
  1181. TABLE_NAME.cgemm_p = 128;
  1182. #endif
  1183. #if BUILD_COMPLEX16==1
  1184. TABLE_NAME.zgemm_p = 64;
  1185. #endif
  1186. #ifdef EXPRECISION
  1187. TABLE_NAME.qgemm_p = 64;
  1188. TABLE_NAME.xgemm_p = 32;
  1189. #endif
  1190. #endif
  1191. #ifdef CORE_PRESCOTT
  1192. #ifdef DEBUG
  1193. fprintf(stderr, "Prescott\n");
  1194. #endif
  1195. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1196. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1197. #endif
  1198. #if BUILD_DOUBLE ==1
  1199. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1200. #endif
  1201. #if BUILD_COMPLEX==1
  1202. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1203. #endif
  1204. #if BUILD_COMPLEX16 == 1
  1205. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1206. #endif
  1207. #ifdef EXPRECISION
  1208. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1209. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1210. #endif
  1211. #endif
  1212. #ifdef CORE2
  1213. #ifdef DEBUG
  1214. fprintf(stderr, "Core2\n");
  1215. #endif
  1216. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1217. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1218. #endif
  1219. #if BUILD_DOUBLE==1
  1220. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1221. #endif
  1222. #if BUILD_COMPLEX==1
  1223. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1224. #endif
  1225. #if BUILD_COMPLEX16==1
  1226. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1227. #endif
  1228. #ifdef EXPRECISION
  1229. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1230. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1231. #endif
  1232. #endif
  1233. #ifdef PENRYN
  1234. #ifdef DEBUG
  1235. fprintf(stderr, "Penryn\n");
  1236. #endif
  1237. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1238. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1239. #endif
  1240. #if BUILD_DOUBLE == 1
  1241. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1242. #endif
  1243. #if BUILD_COMPLEX==1
  1244. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1245. #endif
  1246. #if BUILD_COMPLEX16==1
  1247. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1248. #endif
  1249. #ifdef EXPRECISION
  1250. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1251. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1252. #endif
  1253. #endif
  1254. #ifdef DUNNINGTON
  1255. #ifdef DEBUG
  1256. fprintf(stderr, "Dunnington\n");
  1257. #endif
  1258. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1259. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1260. #endif
  1261. #if BUILD_DOUBLE ==1
  1262. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1263. #endif
  1264. #if BUILD_COMPLEX==1
  1265. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1266. #endif
  1267. #if BUILD_COMPLEX16==1
  1268. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1269. #endif
  1270. #ifdef EXPRECISION
  1271. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1272. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1273. #endif
  1274. #endif
  1275. #ifdef NEHALEM
  1276. #ifdef DEBUG
  1277. fprintf(stderr, "Nehalem\n");
  1278. #endif
  1279. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1280. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1281. #endif
  1282. #if BUILD_DOUBLE
  1283. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1284. #endif
  1285. #if BUILD_COMPLEX
  1286. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1287. #endif
  1288. #if BUILD_COMPLEX16
  1289. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1290. #endif
  1291. #ifdef EXPRECISION
  1292. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1293. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1294. #endif
  1295. #endif
  1296. #ifdef SANDYBRIDGE
  1297. #ifdef DEBUG
  1298. fprintf(stderr, "Sandybridge\n");
  1299. #endif
  1300. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1301. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1302. #endif
  1303. #if BUILD_DOUBLE
  1304. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1305. #endif
  1306. #if BUILD_COMPLEX
  1307. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1308. #endif
  1309. #if BUILD_COMPLEX16
  1310. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1311. #endif
  1312. #ifdef EXPRECISION
  1313. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1314. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1315. #endif
  1316. #endif
  1317. #ifdef HASWELL
  1318. #ifdef DEBUG
  1319. fprintf(stderr, "Haswell\n");
  1320. #endif
  1321. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1322. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1323. #endif
  1324. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1325. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1326. #endif
  1327. #if BUILD_COMPLEX
  1328. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1329. #endif
  1330. #if BUILD_COMPLEX16
  1331. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1332. #endif
  1333. #ifdef EXPRECISION
  1334. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1335. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1336. #endif
  1337. #endif
  1338. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1339. #ifdef DEBUG
  1340. fprintf(stderr, "SkylakeX\n");
  1341. #endif
  1342. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1343. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1344. #endif
  1345. #if BUILD_DOUBLE
  1346. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1347. #endif
  1348. #if BUILD_COMPLEX
  1349. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1350. #endif
  1351. #if BUILD_COMPLEX16
  1352. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1353. #endif
  1354. #ifdef EXPRECISION
  1355. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1356. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1357. #endif
  1358. #endif
  1359. #ifdef OPTERON
  1360. #ifdef DEBUG
  1361. fprintf(stderr, "Opteron\n");
  1362. #endif
  1363. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1364. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1365. #endif
  1366. #if BUILD_DOUBLE
  1367. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1368. #endif
  1369. #if BUILD_COMPLEX
  1370. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1371. #endif
  1372. #if BUILD_COMPLEX16
  1373. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1374. #endif
  1375. #ifdef EXPRECISION
  1376. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1377. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1378. #endif
  1379. #endif
  1380. #ifdef BARCELONA
  1381. #ifdef DEBUG
  1382. fprintf(stderr, "Barcelona\n");
  1383. #endif
  1384. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1385. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1386. #endif
  1387. #if BUILD_DOUBLE
  1388. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1389. #endif
  1390. #if BUILD_COMPLEX
  1391. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1392. #endif
  1393. #if BUILD_COMPLEX16
  1394. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1395. #endif
  1396. #ifdef EXPRECISION
  1397. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1398. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1399. #endif
  1400. #endif
  1401. #ifdef BOBCAT
  1402. #ifdef DEBUG
  1403. fprintf(stderr, "Bobcate\n");
  1404. #endif
  1405. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1406. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1407. #endif
  1408. #if BUILD_DOUBLE
  1409. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1410. #endif
  1411. #if BUILD_COMPLEX
  1412. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1413. #endif
  1414. #if BUILD_COMPLEX16
  1415. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1416. #endif
  1417. #ifdef EXPRECISION
  1418. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1419. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1420. #endif
  1421. #endif
  1422. #ifdef BULLDOZER
  1423. #ifdef DEBUG
  1424. fprintf(stderr, "Bulldozer\n");
  1425. #endif
  1426. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1427. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1428. #endif
  1429. #if BUILD_DOUBLE
  1430. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1431. #endif
  1432. #if BUILD_COMPLEX
  1433. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1434. #endif
  1435. #if BUILD_COMPLEX16
  1436. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1437. #endif
  1438. #ifdef EXPRECISION
  1439. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1440. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1441. #endif
  1442. #endif
  1443. #ifdef EXCAVATOR
  1444. #ifdef DEBUG
  1445. fprintf(stderr, "Excavator\n");
  1446. #endif
  1447. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1448. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1449. #endif
  1450. #if BUILD_DOUBLE
  1451. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1452. #endif
  1453. #if BUILD_COMPLEX
  1454. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1455. #endif
  1456. #if BUILD_COMPLEX16
  1457. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1458. #endif
  1459. #ifdef EXPRECISION
  1460. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1461. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1462. #endif
  1463. #endif
  1464. #ifdef PILEDRIVER
  1465. #ifdef DEBUG
  1466. fprintf(stderr, "Piledriver\n");
  1467. #endif
  1468. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1469. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1470. #endif
  1471. #if BUILD_DOUBLE
  1472. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1473. #endif
  1474. #if BUILD_COMPLEX
  1475. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1476. #endif
  1477. #if BUILD_COMPLEX16
  1478. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1479. #endif
  1480. #ifdef EXPRECISION
  1481. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1482. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1483. #endif
  1484. #endif
  1485. #ifdef STEAMROLLER
  1486. #ifdef DEBUG
  1487. fprintf(stderr, "Steamroller\n");
  1488. #endif
  1489. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1490. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1491. #endif
  1492. #if BUILD_DOUBLE
  1493. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1494. #endif
  1495. #if BUILD_COMPLEX
  1496. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1497. #endif
  1498. #if BUILD_COMPLEX16
  1499. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1500. #endif
  1501. #ifdef EXPRECISION
  1502. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1503. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1504. #endif
  1505. #endif
  1506. #ifdef ZEN
  1507. #ifdef DEBUG
  1508. fprintf(stderr, "Zen\n");
  1509. #endif
  1510. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1511. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1512. #endif
  1513. #if BUILD_DOUBLE
  1514. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1515. #endif
  1516. #if BUILD_COMPLEX
  1517. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1518. #endif
  1519. #if BUILD_COMPLEX16
  1520. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1521. #endif
  1522. #ifdef EXPRECISION
  1523. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1524. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1525. #endif
  1526. #endif
  1527. #ifdef NANO
  1528. #ifdef DEBUG
  1529. fprintf(stderr, "NANO\n");
  1530. #endif
  1531. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1532. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1533. #endif
  1534. #if (BUILD_DOUBLE==1)
  1535. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1536. #endif
  1537. #if (BUILD_COMPLEX==1)
  1538. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1539. #endif
  1540. #if (BUILD_COMPLEX16==1)
  1541. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1542. #endif
  1543. #ifdef EXPRECISION
  1544. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1545. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1546. #endif
  1547. #endif
  1548. #if BUILD_COMPLEX==1
  1549. #ifdef CGEMM3M_DEFAULT_P
  1550. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1551. #else
  1552. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1553. #endif
  1554. #endif
  1555. #if BUILD_COMPLEX16==1
  1556. #ifdef ZGEMM3M_DEFAULT_P
  1557. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1558. #else
  1559. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1560. #endif
  1561. #endif
  1562. #ifdef EXPRECISION
  1563. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1564. #endif
  1565. #if BUILD_SINGLE == 1
  1566. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1567. #endif
  1568. #if BUILD_DOUBLE== 1
  1569. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1570. #endif
  1571. #if BUILD_COMPLEX==1
  1572. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1573. #endif
  1574. #if BUILD_COMPLEX16==1
  1575. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1576. #endif
  1577. #if BUILD_COMPLEX==1
  1578. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1579. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1580. #else
  1581. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1582. #endif
  1583. #endif
  1584. #if BUILD_COMPLEX16==1
  1585. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1586. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1587. #else
  1588. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1589. #endif
  1590. #endif
  1591. #ifdef QUAD_PRECISION
  1592. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1593. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1594. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1595. #endif
  1596. #ifdef DEBUG
  1597. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1598. #endif
  1599. #if BUILD_BFLOAT16==1
  1600. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1601. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1602. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1603. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1604. #endif
  1605. #if BUILD_SINGLE==1
  1606. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1607. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1608. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1609. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1610. #endif
  1611. #if BUILD_DOUBLE==1
  1612. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1613. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1614. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1615. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1616. #endif
  1617. #ifdef EXPRECISION
  1618. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1619. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1620. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1621. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1622. #endif
  1623. #if BUILD_COMPLEX ==1
  1624. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1625. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1626. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1627. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1628. #endif
  1629. #if BUILD_COMPLEX16 ==1
  1630. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1631. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1632. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1633. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1634. #endif
  1635. #if BUILD_COMPLEX == 1
  1636. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1637. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1638. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1639. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1640. #endif
  1641. #if BUILD_COMPLEX16 == 1
  1642. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1643. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1644. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1645. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1646. #endif
  1647. #ifdef EXPRECISION
  1648. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1649. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1650. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1651. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1652. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1653. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1654. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1655. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1656. #endif
  1657. }
  1658. #endif //POWER
  1659. #endif //ZARCH
  1660. #endif //(ARCH_LOONGARCH64)
  1661. #endif //(ARCH_MIPS64)
  1662. #endif //(ARCH_ARM64)