You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 59 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023, 2025 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N,
  54. #ifdef BGEMM_DEFAULT_UNROLL_MN
  55. BGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. BGEMM_ALIGN_K,
  60. 0, 0, 0,
  61. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  62. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  63. SBGEMM_DEFAULT_UNROLL_MN,
  64. #else
  65. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  66. #endif
  67. SBGEMM_ALIGN_K,
  68. 0, // need_amxtile_permission
  69. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  70. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  71. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  72. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  73. dsdot_kTS,
  74. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  75. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  76. ssymv_LTS, ssymv_UTS,
  77. bgemm_kernelTS, bgemm_betaTS,
  78. bgemm_incopyTS, bgemm_itcopyTS,
  79. bgemm_oncopyTS, bgemm_otcopyTS,
  80. sbgemm_kernelTS, sbgemm_betaTS,
  81. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  82. sbgemm_incopyTS, sbgemm_itcopyTS,
  83. #else
  84. sbgemm_oncopyTS, sbgemm_otcopyTS,
  85. #endif
  86. sbgemm_oncopyTS, sbgemm_otcopyTS,
  87. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  88. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  89. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  90. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  91. #else
  92. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  93. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  94. #endif
  95. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  96. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  97. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  98. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  99. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  100. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  101. #else
  102. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  103. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  104. #endif
  105. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  106. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  107. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  108. ssymm_iutcopyTS, ssymm_iltcopyTS,
  109. #else
  110. ssymm_outcopyTS, ssymm_oltcopyTS,
  111. #endif
  112. ssymm_outcopyTS, ssymm_oltcopyTS,
  113. #ifndef NO_LAPACK
  114. sneg_tcopyTS, slaswp_ncopyTS,
  115. #else
  116. NULL,NULL,
  117. #endif
  118. #ifdef SMALL_MATRIX_OPT
  119. sbgemm_small_matrix_permitTS,
  120. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  121. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  122. #endif
  123. #endif
  124. #ifdef BUILD_HFLOAT16
  125. 0, 0, 0,
  126. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  127. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  128. SHGEMM_DEFAULT_UNROLL_MN,
  129. #else
  130. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  131. #endif
  132. shgemm_kernelTS, shgemm_betaTS,
  133. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  134. shgemm_incopyTS, shgemm_itcopyTS,
  135. #else
  136. shgemm_oncopyTS, shgemm_otcopyTS,
  137. #endif
  138. shgemm_oncopyTS, shgemm_otcopyTS,
  139. #endif
  140. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  141. 0, 0, 0,
  142. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  143. #ifdef SGEMM_DEFAULT_UNROLL_MN
  144. SGEMM_DEFAULT_UNROLL_MN,
  145. #else
  146. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  147. #endif
  148. #endif
  149. #ifdef HAVE_EXCLUSIVE_CACHE
  150. 1,
  151. #else
  152. 0,
  153. #endif
  154. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  155. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  156. #endif
  157. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  158. isamax_kTS,
  159. #endif
  160. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  161. isamin_kTS, ismax_kTS, ismin_kTS,
  162. snrm2_kTS, sasum_kTS,
  163. #endif
  164. #if BUILD_SINGLE == 1
  165. ssum_kTS,
  166. #endif
  167. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  168. scopy_kTS, sdot_kTS,
  169. // dsdot_kTS,
  170. srot_kTS, srotm_kTS, saxpy_kTS,
  171. #endif
  172. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  173. sscal_kTS,
  174. #endif
  175. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  176. sswap_kTS,
  177. sgemv_nTS, sgemv_tTS,
  178. #endif
  179. #if BUILD_SINGLE == 1
  180. sger_kTS,
  181. #endif
  182. #if BUILD_SINGLE == 1
  183. ssymv_LTS, ssymv_UTS,
  184. #endif
  185. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  186. #ifdef ARCH_X86_64
  187. sgemm_directTS,
  188. sgemm_direct_performantTS,
  189. #endif
  190. #ifdef ARCH_ARM64
  191. sgemm_directTS,
  192. #endif
  193. sgemm_kernelTS, sgemm_betaTS,
  194. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  195. sgemm_incopyTS, sgemm_itcopyTS,
  196. #else
  197. sgemm_oncopyTS, sgemm_otcopyTS,
  198. #endif
  199. sgemm_oncopyTS, sgemm_otcopyTS,
  200. #endif
  201. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  202. #ifdef SMALL_MATRIX_OPT
  203. sgemm_small_matrix_permitTS,
  204. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  205. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  206. #endif
  207. #endif
  208. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  209. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  210. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  211. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  212. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  213. #else
  214. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  215. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  216. #endif
  217. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  218. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  219. #endif
  220. #if (BUILD_SINGLE==1)
  221. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  222. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  223. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  224. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  225. #else
  226. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  227. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  228. #endif
  229. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  230. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  231. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  232. ssymm_iutcopyTS, ssymm_iltcopyTS,
  233. #else
  234. ssymm_outcopyTS, ssymm_oltcopyTS,
  235. #endif
  236. ssymm_outcopyTS, ssymm_oltcopyTS,
  237. #ifndef NO_LAPACK
  238. sneg_tcopyTS, slaswp_ncopyTS,
  239. #else
  240. NULL,NULL,
  241. #endif
  242. #endif
  243. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  244. 0, 0, 0,
  245. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  246. #ifdef DGEMM_DEFAULT_UNROLL_MN
  247. DGEMM_DEFAULT_UNROLL_MN,
  248. #else
  249. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  250. #endif
  251. #endif
  252. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  253. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  254. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  255. dnrm2_kTS, dasum_kTS,
  256. #endif
  257. #if (BUILD_DOUBLE==1)
  258. dsum_kTS,
  259. #endif
  260. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  261. dcopy_kTS, ddot_kTS,
  262. #endif
  263. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  264. dsdot_kTS,
  265. #endif
  266. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  267. drot_kTS,
  268. drotm_kTS,
  269. daxpy_kTS,
  270. dscal_kTS,
  271. dswap_kTS,
  272. dgemv_nTS, dgemv_tTS,
  273. #endif
  274. #if (BUILD_DOUBLE==1)
  275. dger_kTS,
  276. dsymv_LTS, dsymv_UTS,
  277. #endif
  278. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  279. dgemm_kernelTS, dgemm_betaTS,
  280. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  281. dgemm_incopyTS, dgemm_itcopyTS,
  282. #else
  283. dgemm_oncopyTS, dgemm_otcopyTS,
  284. #endif
  285. dgemm_oncopyTS, dgemm_otcopyTS,
  286. #endif
  287. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  288. #ifdef SMALL_MATRIX_OPT
  289. dgemm_small_matrix_permitTS,
  290. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  291. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  292. #endif
  293. #endif
  294. #if (BUILD_DOUBLE==1)
  295. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  296. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  297. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  298. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  299. #else
  300. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  301. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  302. #endif
  303. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  304. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  305. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  306. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  307. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  308. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  309. #else
  310. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  311. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  312. #endif
  313. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  314. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  315. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  316. dsymm_iutcopyTS, dsymm_iltcopyTS,
  317. #else
  318. dsymm_outcopyTS, dsymm_oltcopyTS,
  319. #endif
  320. dsymm_outcopyTS, dsymm_oltcopyTS,
  321. #ifndef NO_LAPACK
  322. dneg_tcopyTS, dlaswp_ncopyTS,
  323. #else
  324. NULL, NULL,
  325. #endif
  326. #endif
  327. #ifdef EXPRECISION
  328. 0, 0, 0,
  329. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  330. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  331. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  332. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  333. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  334. qgemv_nTS, qgemv_tTS, qger_kTS,
  335. qsymv_LTS, qsymv_UTS,
  336. qgemm_kernelTS, qgemm_betaTS,
  337. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  338. qgemm_incopyTS, qgemm_itcopyTS,
  339. #else
  340. qgemm_oncopyTS, qgemm_otcopyTS,
  341. #endif
  342. qgemm_oncopyTS, qgemm_otcopyTS,
  343. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  344. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  345. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  346. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  347. #else
  348. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  349. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  350. #endif
  351. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  352. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  353. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  354. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  355. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  356. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  357. #else
  358. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  359. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  360. #endif
  361. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  362. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  363. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  364. qsymm_iutcopyTS, qsymm_iltcopyTS,
  365. #else
  366. qsymm_outcopyTS, qsymm_oltcopyTS,
  367. #endif
  368. qsymm_outcopyTS, qsymm_oltcopyTS,
  369. #ifndef NO_LAPACK
  370. qneg_tcopyTS, qlaswp_ncopyTS,
  371. #else
  372. NULL, NULL,
  373. #endif
  374. #endif
  375. #if (BUILD_COMPLEX)
  376. 0, 0, 0,
  377. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  378. #ifdef CGEMM_DEFAULT_UNROLL_MN
  379. CGEMM_DEFAULT_UNROLL_MN,
  380. #else
  381. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  382. #endif
  383. #if (BUILD_COMPLEX)
  384. camax_kTS, camin_kTS,
  385. #endif
  386. #if (BUILD_COMPLEX)
  387. icamax_kTS,
  388. #endif
  389. #if (BUILD_COMPLEX)
  390. icamin_kTS,
  391. cnrm2_kTS, casum_kTS, csum_kTS,
  392. #endif
  393. #if (BUILD_COMPLEX)
  394. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  395. #endif
  396. #if (BUILD_COMPLEX)
  397. csrot_kTS,
  398. #endif
  399. #if (BUILD_COMPLEX)
  400. caxpy_kTS,
  401. caxpyc_kTS,
  402. cscal_kTS,
  403. cswap_kTS,
  404. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  405. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  406. #endif
  407. #if (BUILD_COMPLEX)
  408. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  409. csymv_LTS, csymv_UTS,
  410. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  411. #endif
  412. #if (BUILD_COMPLEX)
  413. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  414. cgemm_betaTS,
  415. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  416. cgemm_incopyTS, cgemm_itcopyTS,
  417. #else
  418. cgemm_oncopyTS, cgemm_otcopyTS,
  419. #endif
  420. cgemm_oncopyTS, cgemm_otcopyTS,
  421. #ifdef SMALL_MATRIX_OPT
  422. cgemm_small_matrix_permitTS,
  423. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  424. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  425. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  426. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  427. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  428. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  429. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  430. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  431. #endif
  432. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  433. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  434. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  435. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  436. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  437. #else
  438. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  439. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  440. #endif
  441. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  442. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  443. #endif
  444. #endif
  445. #if (BUILD_COMPLEX)
  446. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  447. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  448. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  449. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  450. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  451. #else
  452. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  453. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  454. #endif
  455. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  456. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  457. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  458. csymm_iutcopyTS, csymm_iltcopyTS,
  459. #else
  460. csymm_outcopyTS, csymm_oltcopyTS,
  461. #endif
  462. csymm_outcopyTS, csymm_oltcopyTS,
  463. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  464. chemm_iutcopyTS, chemm_iltcopyTS,
  465. #else
  466. chemm_outcopyTS, chemm_oltcopyTS,
  467. #endif
  468. chemm_outcopyTS, chemm_oltcopyTS,
  469. 0, 0, 0,
  470. #if (USE_GEMM3M)
  471. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  472. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  473. #else
  474. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  475. #endif
  476. cgemm3m_kernelTS,
  477. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  478. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  479. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  480. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  481. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  482. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  483. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  484. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  485. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  486. csymm3m_oucopybTS, csymm3m_olcopybTS,
  487. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  488. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  489. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  490. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  491. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  492. chemm3m_oucopybTS, chemm3m_olcopybTS,
  493. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  494. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  495. #else
  496. 0, 0, 0,
  497. NULL,
  498. NULL, NULL,
  499. NULL, NULL,
  500. NULL, NULL,
  501. NULL, NULL,
  502. NULL, NULL,
  503. NULL, NULL,
  504. NULL, NULL,
  505. NULL, NULL,
  506. NULL, NULL,
  507. NULL, NULL,
  508. NULL, NULL,
  509. NULL, NULL,
  510. NULL, NULL,
  511. NULL, NULL,
  512. NULL, NULL,
  513. NULL, NULL,
  514. NULL, NULL,
  515. NULL, NULL,
  516. #endif
  517. #endif
  518. #if (BUILD_COMPLEX)
  519. #ifndef NO_LAPACK
  520. cneg_tcopyTS,
  521. claswp_ncopyTS,
  522. #else
  523. NULL, NULL,
  524. #endif
  525. #endif
  526. #if BUILD_COMPLEX16 == 1
  527. 0, 0, 0,
  528. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  529. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  530. ZGEMM_DEFAULT_UNROLL_MN,
  531. #else
  532. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  533. #endif
  534. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  535. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  536. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  537. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  538. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  539. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  540. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  541. zsymv_LTS, zsymv_UTS,
  542. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  543. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  544. zgemm_betaTS,
  545. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  546. zgemm_incopyTS, zgemm_itcopyTS,
  547. #else
  548. zgemm_oncopyTS, zgemm_otcopyTS,
  549. #endif
  550. zgemm_oncopyTS, zgemm_otcopyTS,
  551. #ifdef SMALL_MATRIX_OPT
  552. zgemm_small_matrix_permitTS,
  553. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  554. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  555. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  556. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  557. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  558. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  559. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  560. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  561. #endif
  562. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  563. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  564. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  565. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  566. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  567. #else
  568. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  569. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  570. #endif
  571. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  572. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  573. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  574. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  575. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  576. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  577. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  578. #else
  579. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  580. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  581. #endif
  582. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  583. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  584. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  585. zsymm_iutcopyTS, zsymm_iltcopyTS,
  586. #else
  587. zsymm_outcopyTS, zsymm_oltcopyTS,
  588. #endif
  589. zsymm_outcopyTS, zsymm_oltcopyTS,
  590. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  591. zhemm_iutcopyTS, zhemm_iltcopyTS,
  592. #else
  593. zhemm_outcopyTS, zhemm_oltcopyTS,
  594. #endif
  595. zhemm_outcopyTS, zhemm_oltcopyTS,
  596. 0, 0, 0,
  597. #if (USE_GEMM3M)
  598. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  599. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  600. #else
  601. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  602. #endif
  603. zgemm3m_kernelTS,
  604. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  605. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  606. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  607. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  608. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  609. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  610. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  611. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  612. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  613. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  614. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  615. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  616. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  617. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  618. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  619. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  620. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  621. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  622. #else
  623. 0, 0, 0,
  624. NULL,
  625. NULL, NULL,
  626. NULL, NULL,
  627. NULL, NULL,
  628. NULL, NULL,
  629. NULL, NULL,
  630. NULL, NULL,
  631. NULL, NULL,
  632. NULL, NULL,
  633. NULL, NULL,
  634. NULL, NULL,
  635. NULL, NULL,
  636. NULL, NULL,
  637. NULL, NULL,
  638. NULL, NULL,
  639. NULL, NULL,
  640. NULL, NULL,
  641. NULL, NULL,
  642. NULL, NULL,
  643. #endif
  644. #ifndef NO_LAPACK
  645. zneg_tcopyTS, zlaswp_ncopyTS,
  646. #else
  647. NULL, NULL,
  648. #endif
  649. #endif
  650. #ifdef EXPRECISION
  651. 0, 0, 0,
  652. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  653. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  654. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  655. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  656. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  657. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  658. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  659. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  660. xsymv_LTS, xsymv_UTS,
  661. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  662. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  663. xgemm_betaTS,
  664. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  665. xgemm_incopyTS, xgemm_itcopyTS,
  666. #else
  667. xgemm_oncopyTS, xgemm_otcopyTS,
  668. #endif
  669. xgemm_oncopyTS, xgemm_otcopyTS,
  670. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  671. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  672. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  673. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  674. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  675. #else
  676. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  677. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  678. #endif
  679. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  680. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  681. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  682. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  683. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  684. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  685. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  686. #else
  687. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  688. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  689. #endif
  690. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  691. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  692. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  693. xsymm_iutcopyTS, xsymm_iltcopyTS,
  694. #else
  695. xsymm_outcopyTS, xsymm_oltcopyTS,
  696. #endif
  697. xsymm_outcopyTS, xsymm_oltcopyTS,
  698. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  699. xhemm_iutcopyTS, xhemm_iltcopyTS,
  700. #else
  701. xhemm_outcopyTS, xhemm_oltcopyTS,
  702. #endif
  703. xhemm_outcopyTS, xhemm_oltcopyTS,
  704. 0, 0, 0,
  705. #if (USE_GEMM3M)
  706. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  707. xgemm3m_kernelTS,
  708. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  709. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  710. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  711. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  712. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  713. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  714. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  715. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  716. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  717. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  718. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  719. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  720. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  721. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  722. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  723. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  724. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  725. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  726. #else
  727. 0, 0, 0,
  728. NULL,
  729. NULL, NULL,
  730. NULL, NULL,
  731. NULL, NULL,
  732. NULL, NULL,
  733. NULL, NULL,
  734. NULL, NULL,
  735. NULL, NULL,
  736. NULL, NULL,
  737. NULL, NULL,
  738. NULL, NULL,
  739. NULL, NULL,
  740. NULL, NULL,
  741. NULL, NULL,
  742. NULL, NULL,
  743. NULL, NULL,
  744. NULL, NULL,
  745. NULL, NULL,
  746. NULL, NULL,
  747. #endif
  748. #ifndef NO_LAPACK
  749. xneg_tcopyTS, xlaswp_ncopyTS,
  750. #else
  751. NULL, NULL,
  752. #endif
  753. #endif
  754. init_parameter,
  755. SNUMOPT, DNUMOPT, QNUMOPT,
  756. #if BUILD_SINGLE == 1
  757. saxpby_kTS,
  758. #endif
  759. #if BUILD_DOUBLE == 1
  760. daxpby_kTS,
  761. #endif
  762. #if BUILD_COMPLEX == 1
  763. caxpby_kTS,
  764. #endif
  765. #if BUILD_COMPLEX16== 1
  766. zaxpby_kTS,
  767. #endif
  768. #if BUILD_SINGLE == 1
  769. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  770. #endif
  771. #if BUILD_DOUBLE== 1
  772. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  773. #endif
  774. #if BUILD_COMPLEX == 1
  775. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  776. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  777. #endif
  778. #if BUILD_COMPLEX16 == 1
  779. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  780. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  781. #endif
  782. #if BUILD_SINGLE == 1
  783. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  784. #endif
  785. #if BUILD_DOUBLE== 1
  786. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  787. #endif
  788. #if BUILD_COMPLEX== 1
  789. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  790. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  791. #endif
  792. #if BUILD_COMPLEX16==1
  793. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  794. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  795. #endif
  796. #if BUILD_SINGLE == 1
  797. sgeadd_kTS,
  798. #endif
  799. #if BUILD_DOUBLE==1
  800. dgeadd_kTS,
  801. #endif
  802. #if BUILD_COMPLEX==1
  803. cgeadd_kTS,
  804. #endif
  805. #if BUILD_COMPLEX16==1
  806. zgeadd_kTS,
  807. #endif
  808. };
  809. #if (ARCH_ARM64)
  810. static void init_parameter(void) {
  811. #if (BUILD_BFLOAT16)
  812. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  813. #endif
  814. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  815. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  816. #endif
  817. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  818. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  819. #endif
  820. #if BUILD_COMPLEX==1
  821. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  822. #endif
  823. #if BUILD_COMPLEX16==1
  824. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  825. #endif
  826. #if (BUILD_BFLOAT16)
  827. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  828. #endif
  829. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  830. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  831. #endif
  832. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  833. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  834. #endif
  835. #if BUILD_COMPLEX== 1
  836. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  837. #endif
  838. #if BUILD_COMPLEX16==1
  839. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  840. #endif
  841. #if (BUILD_BFLOAT16)
  842. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  843. #endif
  844. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  845. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  846. #endif
  847. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  848. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  849. #endif
  850. #if BUILD_COMPLEX==1
  851. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  852. #endif
  853. #if BUILD_COMPLEX16==1
  854. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  855. #endif
  856. #ifdef EXPRECISION
  857. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  858. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  859. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  860. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  861. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  862. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  863. #endif
  864. #if (USE_GEMM3M)
  865. #ifdef CGEMM3M_DEFAULT_P
  866. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  867. #else
  868. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  869. #endif
  870. #ifdef ZGEMM3M_DEFAULT_P
  871. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  872. #else
  873. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  874. #endif
  875. #ifdef CGEMM3M_DEFAULT_Q
  876. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  877. #else
  878. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  879. #endif
  880. #ifdef ZGEMM3M_DEFAULT_Q
  881. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  882. #else
  883. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  884. #endif
  885. #ifdef CGEMM3M_DEFAULT_R
  886. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  887. #else
  888. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  889. #endif
  890. #ifdef ZGEMM3M_DEFAULT_R
  891. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  892. #else
  893. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  894. #endif
  895. #ifdef EXPRECISION
  896. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  897. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  898. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  899. #endif
  900. #endif
  901. }
  902. #else // (ARCH_ARM64)
  903. #if defined(ARCH_MIPS64)
  904. static void init_parameter(void) {
  905. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  906. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  907. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  908. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  909. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  910. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  911. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  912. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  913. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  914. TABLE_NAME.dgemm_r = 640;
  915. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  916. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  917. #ifdef EXPRECISION
  918. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  919. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  920. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  921. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  922. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  923. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  924. #endif
  925. #if defined(USE_GEMM3M)
  926. #ifdef CGEMM3M_DEFAULT_P
  927. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  928. #else
  929. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  930. #endif
  931. #ifdef ZGEMM3M_DEFAULT_P
  932. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  933. #else
  934. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  935. #endif
  936. #ifdef CGEMM3M_DEFAULT_Q
  937. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  938. #else
  939. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  940. #endif
  941. #ifdef ZGEMM3M_DEFAULT_Q
  942. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  943. #else
  944. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  945. #endif
  946. #ifdef CGEMM3M_DEFAULT_R
  947. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  948. #else
  949. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  950. #endif
  951. #ifdef ZGEMM3M_DEFAULT_R
  952. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  953. #else
  954. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  955. #endif
  956. #ifdef EXPRECISION
  957. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  958. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  959. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  960. #endif
  961. #endif
  962. }
  963. #else // (ARCH_MIPS64)
  964. #if (ARCH_LOONGARCH64)
  965. static int get_L3_size() {
  966. int ret = 0, id = 0x14;
  967. __asm__ volatile (
  968. "cpucfg %[ret], %[id]"
  969. : [ret]"=r"(ret)
  970. : [id]"r"(id)
  971. : "memory"
  972. );
  973. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  974. }
  975. static void init_parameter(void) {
  976. #ifdef BUILD_BFLOAT16
  977. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  978. #endif
  979. #ifdef BUILD_BFLOAT16
  980. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  981. #endif
  982. #if defined(LA464)
  983. int L3_size = get_L3_size();
  984. #ifdef SMP
  985. if(blas_num_threads == 1){
  986. #endif
  987. //single thread
  988. if (L3_size == 32){ // 3C5000 and 3D5000
  989. TABLE_NAME.sgemm_p = 256;
  990. TABLE_NAME.sgemm_q = 384;
  991. TABLE_NAME.sgemm_r = 8192;
  992. TABLE_NAME.dgemm_p = 112;
  993. TABLE_NAME.dgemm_q = 289;
  994. TABLE_NAME.dgemm_r = 4096;
  995. TABLE_NAME.cgemm_p = 128;
  996. TABLE_NAME.cgemm_q = 256;
  997. TABLE_NAME.cgemm_r = 4096;
  998. TABLE_NAME.zgemm_p = 128;
  999. TABLE_NAME.zgemm_q = 128;
  1000. TABLE_NAME.zgemm_r = 2048;
  1001. } else { // 3A5000 and 3C5000L
  1002. TABLE_NAME.sgemm_p = 256;
  1003. TABLE_NAME.sgemm_q = 384;
  1004. TABLE_NAME.sgemm_r = 4096;
  1005. TABLE_NAME.dgemm_p = 112;
  1006. TABLE_NAME.dgemm_q = 300;
  1007. TABLE_NAME.dgemm_r = 3024;
  1008. TABLE_NAME.cgemm_p = 128;
  1009. TABLE_NAME.cgemm_q = 256;
  1010. TABLE_NAME.cgemm_r = 2048;
  1011. TABLE_NAME.zgemm_p = 128;
  1012. TABLE_NAME.zgemm_q = 128;
  1013. TABLE_NAME.zgemm_r = 1024;
  1014. }
  1015. #ifdef SMP
  1016. }else{
  1017. //multi thread
  1018. if (L3_size == 32){ // 3C5000 and 3D5000
  1019. TABLE_NAME.sgemm_p = 256;
  1020. TABLE_NAME.sgemm_q = 384;
  1021. TABLE_NAME.sgemm_r = 1024;
  1022. TABLE_NAME.dgemm_p = 112;
  1023. TABLE_NAME.dgemm_q = 289;
  1024. TABLE_NAME.dgemm_r = 342;
  1025. TABLE_NAME.cgemm_p = 128;
  1026. TABLE_NAME.cgemm_q = 256;
  1027. TABLE_NAME.cgemm_r = 512;
  1028. TABLE_NAME.zgemm_p = 128;
  1029. TABLE_NAME.zgemm_q = 128;
  1030. TABLE_NAME.zgemm_r = 512;
  1031. } else { // 3A5000 and 3C5000L
  1032. TABLE_NAME.sgemm_p = 256;
  1033. TABLE_NAME.sgemm_q = 384;
  1034. TABLE_NAME.sgemm_r = 2048;
  1035. TABLE_NAME.dgemm_p = 112;
  1036. TABLE_NAME.dgemm_q = 300;
  1037. TABLE_NAME.dgemm_r = 738;
  1038. TABLE_NAME.cgemm_p = 128;
  1039. TABLE_NAME.cgemm_q = 256;
  1040. TABLE_NAME.cgemm_r = 1024;
  1041. TABLE_NAME.zgemm_p = 128;
  1042. TABLE_NAME.zgemm_q = 128;
  1043. TABLE_NAME.zgemm_r = 1024;
  1044. }
  1045. }
  1046. #endif
  1047. #else
  1048. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1049. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1050. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1051. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1052. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1053. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1054. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1055. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1056. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1057. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1058. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1059. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1060. #endif
  1061. #ifdef BUILD_BFLOAT16
  1062. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1063. #endif
  1064. }
  1065. #else // (ARCH_LOONGARCH64)
  1066. #if (ARCH_POWER)
  1067. static void init_parameter(void) {
  1068. #ifdef BUILD_BFLOAT16
  1069. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1070. #endif
  1071. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1072. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1073. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1074. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1075. #ifdef BUILD_BFLOAT16
  1076. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1077. #endif
  1078. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1079. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1080. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1081. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1082. #ifdef BUILD_BFLOAT16
  1083. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1084. #endif
  1085. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1086. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1087. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1088. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1089. }
  1090. #else //POWER
  1091. #if (ARCH_ZARCH)
  1092. static void init_parameter(void) {
  1093. #ifdef BUILD_BFLOAT16
  1094. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1095. #endif
  1096. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1097. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1098. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1099. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1100. #ifdef BUILD_BFLOAT16
  1101. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1102. #endif
  1103. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1104. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1105. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1106. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1107. #ifdef BUILD_BFLOAT16
  1108. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1109. #endif
  1110. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1111. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1112. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1113. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1114. }
  1115. #else //ZARCH
  1116. #if (ARCH_RISCV64)
  1117. static void init_parameter(void) {
  1118. #ifdef BUILD_BFLOAT16
  1119. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1120. #endif
  1121. #ifdef BUILD_HFLOAT16
  1122. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1123. #endif
  1124. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1125. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1126. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1127. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1128. #ifdef BUILD_BFLOAT16
  1129. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1130. #endif
  1131. #ifdef BUILD_HFLOAT16
  1132. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  1133. #endif
  1134. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1135. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1136. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1137. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1138. #ifdef BUILD_BFLOAT16
  1139. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1140. #endif
  1141. #ifdef BUILD_HFLOAT16
  1142. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1143. #endif
  1144. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1145. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1146. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1147. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1148. }
  1149. #else //RISCV64
  1150. #ifdef ARCH_X86
  1151. static int get_l2_size_old(void){
  1152. int i, eax, ebx, ecx, edx, cpuid_level;
  1153. int info[15];
  1154. cpuid(2, &eax, &ebx, &ecx, &edx);
  1155. info[ 0] = BITMASK(eax, 8, 0xff);
  1156. info[ 1] = BITMASK(eax, 16, 0xff);
  1157. info[ 2] = BITMASK(eax, 24, 0xff);
  1158. info[ 3] = BITMASK(ebx, 0, 0xff);
  1159. info[ 4] = BITMASK(ebx, 8, 0xff);
  1160. info[ 5] = BITMASK(ebx, 16, 0xff);
  1161. info[ 6] = BITMASK(ebx, 24, 0xff);
  1162. info[ 7] = BITMASK(ecx, 0, 0xff);
  1163. info[ 8] = BITMASK(ecx, 8, 0xff);
  1164. info[ 9] = BITMASK(ecx, 16, 0xff);
  1165. info[10] = BITMASK(ecx, 24, 0xff);
  1166. info[11] = BITMASK(edx, 0, 0xff);
  1167. info[12] = BITMASK(edx, 8, 0xff);
  1168. info[13] = BITMASK(edx, 16, 0xff);
  1169. info[14] = BITMASK(edx, 24, 0xff);
  1170. for (i = 0; i < 15; i++){
  1171. switch (info[i]){
  1172. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1173. case 0x1a :
  1174. return 96;
  1175. case 0x39 :
  1176. case 0x3b :
  1177. case 0x41 :
  1178. case 0x79 :
  1179. case 0x81 :
  1180. return 128;
  1181. case 0x3a :
  1182. return 192;
  1183. case 0x21 :
  1184. case 0x3c :
  1185. case 0x42 :
  1186. case 0x7a :
  1187. case 0x7e :
  1188. case 0x82 :
  1189. return 256;
  1190. case 0x3d :
  1191. return 384;
  1192. case 0x3e :
  1193. case 0x43 :
  1194. case 0x7b :
  1195. case 0x7f :
  1196. case 0x83 :
  1197. case 0x86 :
  1198. return 512;
  1199. case 0x44 :
  1200. case 0x78 :
  1201. case 0x7c :
  1202. case 0x84 :
  1203. case 0x87 :
  1204. return 1024;
  1205. case 0x45 :
  1206. case 0x7d :
  1207. case 0x85 :
  1208. return 2048;
  1209. case 0x48 :
  1210. return 3184;
  1211. case 0x49 :
  1212. return 4096;
  1213. case 0x4e :
  1214. return 6144;
  1215. }
  1216. }
  1217. // return 0;
  1218. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1219. return 256;
  1220. }
  1221. #endif
  1222. static __inline__ int get_l2_size(void){
  1223. int eax, ebx, ecx, edx, l2;
  1224. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1225. if (l2 != 0)
  1226. return l2;
  1227. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1228. l2 = BITMASK(ecx, 16, 0xffff);
  1229. #ifndef ARCH_X86
  1230. if (l2 <= 0) {
  1231. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1232. return 256;
  1233. }
  1234. return l2;
  1235. #else
  1236. if (l2 > 0) return l2;
  1237. return get_l2_size_old();
  1238. #endif
  1239. }
  1240. static __inline__ int get_l3_size(void){
  1241. int eax, ebx, ecx, edx;
  1242. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1243. return BITMASK(edx, 18, 0x3fff) * 512;
  1244. }
  1245. static void init_parameter(void) {
  1246. int l2 = get_l2_size();
  1247. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1248. /* where the GEMM unrolling parameters do not depend on l2 */
  1249. #ifdef BUILD_BFLOAT16
  1250. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1251. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1252. #endif
  1253. #ifdef BUILD_HFLOAT16
  1254. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1255. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1256. #endif
  1257. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1258. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1259. #endif
  1260. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1261. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1262. #endif
  1263. #if BUILD_COMPLEX == 1
  1264. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1265. #endif
  1266. #if BUILD_COMPLEX16==1
  1267. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1268. #endif
  1269. #if BUILD_COMPLEX == 1
  1270. #ifdef CGEMM3M_DEFAULT_Q
  1271. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1272. #else
  1273. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1274. #endif
  1275. #endif
  1276. #if BUILD_COMPLEX16 == 1
  1277. #ifdef ZGEMM3M_DEFAULT_Q
  1278. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1279. #else
  1280. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1281. #endif
  1282. #endif
  1283. #ifdef EXPRECISION
  1284. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1285. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1286. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1287. #endif
  1288. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1289. #ifdef DEBUG
  1290. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1291. #endif
  1292. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1293. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1294. #endif
  1295. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1296. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1297. #endif
  1298. #if BUILD_COMPLEX==1
  1299. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1300. #endif
  1301. #if BUILD_COMPLEX16==1
  1302. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1303. #endif
  1304. #ifdef EXPRECISION
  1305. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1306. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1307. #endif
  1308. #endif
  1309. #ifdef CORE_NORTHWOOD
  1310. #ifdef DEBUG
  1311. fprintf(stderr, "Northwood\n");
  1312. #endif
  1313. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1314. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1315. #endif
  1316. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1317. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1318. #endif
  1319. #if BUILD_COMPLEX==1
  1320. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1321. #endif
  1322. #if BUILD_COMPLEX16==1
  1323. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1324. #endif
  1325. #ifdef EXPRECISION
  1326. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1327. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1328. #endif
  1329. #endif
  1330. #ifdef ATOM
  1331. #ifdef DEBUG
  1332. fprintf(stderr, "Atom\n");
  1333. #endif
  1334. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1335. TABLE_NAME.sgemm_p = 256;
  1336. #endif
  1337. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1338. TABLE_NAME.dgemm_p = 128;
  1339. #endif
  1340. #if BUILD_COMPLEX==1
  1341. TABLE_NAME.cgemm_p = 128;
  1342. #endif
  1343. #if BUILD_COMPLEX16==1
  1344. TABLE_NAME.zgemm_p = 64;
  1345. #endif
  1346. #ifdef EXPRECISION
  1347. TABLE_NAME.qgemm_p = 64;
  1348. TABLE_NAME.xgemm_p = 32;
  1349. #endif
  1350. #endif
  1351. #ifdef CORE_PRESCOTT
  1352. #ifdef DEBUG
  1353. fprintf(stderr, "Prescott\n");
  1354. #endif
  1355. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1356. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1357. #endif
  1358. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1359. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1360. #endif
  1361. #if BUILD_COMPLEX==1
  1362. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1363. #endif
  1364. #if BUILD_COMPLEX16 == 1
  1365. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1366. #endif
  1367. #ifdef EXPRECISION
  1368. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1369. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1370. #endif
  1371. #endif
  1372. #ifdef CORE2
  1373. #ifdef DEBUG
  1374. fprintf(stderr, "Core2\n");
  1375. #endif
  1376. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1377. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1378. #endif
  1379. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1380. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1381. #endif
  1382. #if BUILD_COMPLEX==1
  1383. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1384. #endif
  1385. #if BUILD_COMPLEX16==1
  1386. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1387. #endif
  1388. #ifdef EXPRECISION
  1389. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1390. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1391. #endif
  1392. #endif
  1393. #ifdef PENRYN
  1394. #ifdef DEBUG
  1395. fprintf(stderr, "Penryn\n");
  1396. #endif
  1397. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1398. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1399. #endif
  1400. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1401. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1402. #endif
  1403. #if BUILD_COMPLEX==1
  1404. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1405. #endif
  1406. #if BUILD_COMPLEX16==1
  1407. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1408. #endif
  1409. #ifdef EXPRECISION
  1410. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1411. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1412. #endif
  1413. #endif
  1414. #ifdef DUNNINGTON
  1415. #ifdef DEBUG
  1416. fprintf(stderr, "Dunnington\n");
  1417. #endif
  1418. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1419. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1420. #endif
  1421. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1422. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1423. #endif
  1424. #if BUILD_COMPLEX==1
  1425. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1426. #endif
  1427. #if BUILD_COMPLEX16==1
  1428. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1429. #endif
  1430. #ifdef EXPRECISION
  1431. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1432. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1433. #endif
  1434. #endif
  1435. #ifdef NEHALEM
  1436. #ifdef DEBUG
  1437. fprintf(stderr, "Nehalem\n");
  1438. #endif
  1439. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1440. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1441. #endif
  1442. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1443. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1444. #endif
  1445. #if BUILD_COMPLEX
  1446. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1447. #endif
  1448. #if BUILD_COMPLEX16
  1449. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1450. #endif
  1451. #ifdef EXPRECISION
  1452. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1453. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1454. #endif
  1455. #endif
  1456. #ifdef SANDYBRIDGE
  1457. #ifdef DEBUG
  1458. fprintf(stderr, "Sandybridge\n");
  1459. #endif
  1460. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1461. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1462. #endif
  1463. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1464. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1465. #endif
  1466. #if BUILD_COMPLEX
  1467. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1468. #endif
  1469. #if BUILD_COMPLEX16
  1470. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1471. #endif
  1472. #ifdef EXPRECISION
  1473. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1474. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1475. #endif
  1476. #endif
  1477. #ifdef HASWELL
  1478. #ifdef DEBUG
  1479. fprintf(stderr, "Haswell\n");
  1480. #endif
  1481. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1482. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1483. #endif
  1484. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1485. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1486. #endif
  1487. #if BUILD_COMPLEX
  1488. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1489. #endif
  1490. #if BUILD_COMPLEX16
  1491. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1492. #endif
  1493. #ifdef EXPRECISION
  1494. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1495. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1496. #endif
  1497. #endif
  1498. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1499. #ifdef DEBUG
  1500. fprintf(stderr, "SkylakeX\n");
  1501. #endif
  1502. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1503. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1504. #endif
  1505. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1506. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1507. #endif
  1508. #if BUILD_COMPLEX
  1509. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1510. #endif
  1511. #if BUILD_COMPLEX16
  1512. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1513. #endif
  1514. #ifdef EXPRECISION
  1515. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1516. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1517. #endif
  1518. #endif
  1519. #ifdef OPTERON
  1520. #ifdef DEBUG
  1521. fprintf(stderr, "Opteron\n");
  1522. #endif
  1523. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1524. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1525. #endif
  1526. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1527. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1528. #endif
  1529. #if BUILD_COMPLEX
  1530. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1531. #endif
  1532. #if BUILD_COMPLEX16
  1533. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1534. #endif
  1535. #ifdef EXPRECISION
  1536. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1537. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1538. #endif
  1539. #endif
  1540. #ifdef BARCELONA
  1541. #ifdef DEBUG
  1542. fprintf(stderr, "Barcelona\n");
  1543. #endif
  1544. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1545. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1546. #endif
  1547. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1548. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1549. #endif
  1550. #if BUILD_COMPLEX
  1551. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1552. #endif
  1553. #if BUILD_COMPLEX16
  1554. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1555. #endif
  1556. #ifdef EXPRECISION
  1557. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1558. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1559. #endif
  1560. #endif
  1561. #ifdef BOBCAT
  1562. #ifdef DEBUG
  1563. fprintf(stderr, "Bobcate\n");
  1564. #endif
  1565. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1566. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1567. #endif
  1568. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1569. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1570. #endif
  1571. #if BUILD_COMPLEX
  1572. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1573. #endif
  1574. #if BUILD_COMPLEX16
  1575. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1576. #endif
  1577. #ifdef EXPRECISION
  1578. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1579. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1580. #endif
  1581. #endif
  1582. #ifdef BULLDOZER
  1583. #ifdef DEBUG
  1584. fprintf(stderr, "Bulldozer\n");
  1585. #endif
  1586. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1587. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1588. #endif
  1589. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1590. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1591. #endif
  1592. #if BUILD_COMPLEX
  1593. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1594. #endif
  1595. #if BUILD_COMPLEX16
  1596. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1597. #endif
  1598. #ifdef EXPRECISION
  1599. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1600. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1601. #endif
  1602. #endif
  1603. #ifdef EXCAVATOR
  1604. #ifdef DEBUG
  1605. fprintf(stderr, "Excavator\n");
  1606. #endif
  1607. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1608. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1609. #endif
  1610. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1611. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1612. #endif
  1613. #if BUILD_COMPLEX
  1614. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1615. #endif
  1616. #if BUILD_COMPLEX16
  1617. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1618. #endif
  1619. #ifdef EXPRECISION
  1620. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1621. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1622. #endif
  1623. #endif
  1624. #ifdef PILEDRIVER
  1625. #ifdef DEBUG
  1626. fprintf(stderr, "Piledriver\n");
  1627. #endif
  1628. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1629. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1630. #endif
  1631. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1632. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1633. #endif
  1634. #if BUILD_COMPLEX
  1635. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1636. #endif
  1637. #if BUILD_COMPLEX16
  1638. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1639. #endif
  1640. #ifdef EXPRECISION
  1641. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1642. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1643. #endif
  1644. #endif
  1645. #ifdef STEAMROLLER
  1646. #ifdef DEBUG
  1647. fprintf(stderr, "Steamroller\n");
  1648. #endif
  1649. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1650. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1651. #endif
  1652. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1653. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1654. #endif
  1655. #if BUILD_COMPLEX
  1656. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1657. #endif
  1658. #if BUILD_COMPLEX16
  1659. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1660. #endif
  1661. #ifdef EXPRECISION
  1662. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1663. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1664. #endif
  1665. #endif
  1666. #ifdef ZEN
  1667. #ifdef DEBUG
  1668. fprintf(stderr, "Zen\n");
  1669. #endif
  1670. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1671. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1672. #endif
  1673. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1674. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1675. #endif
  1676. #if BUILD_COMPLEX
  1677. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1678. #endif
  1679. #if BUILD_COMPLEX16
  1680. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1681. #endif
  1682. #ifdef EXPRECISION
  1683. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1684. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1685. #endif
  1686. #endif
  1687. #ifdef NANO
  1688. #ifdef DEBUG
  1689. fprintf(stderr, "NANO\n");
  1690. #endif
  1691. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1692. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1693. #endif
  1694. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1695. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1696. #endif
  1697. #if (BUILD_COMPLEX==1)
  1698. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1699. #endif
  1700. #if (BUILD_COMPLEX16==1)
  1701. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1702. #endif
  1703. #ifdef EXPRECISION
  1704. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1705. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1706. #endif
  1707. #endif
  1708. #ifdef SAPPHIRERAPIDS
  1709. #if (BUILD_BFLOAT16 == 1)
  1710. TABLE_NAME.need_amxtile_permission = 1;
  1711. #endif
  1712. #endif
  1713. #if BUILD_COMPLEX==1
  1714. #ifdef CGEMM3M_DEFAULT_P
  1715. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1716. #else
  1717. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1718. #endif
  1719. #endif
  1720. #if BUILD_COMPLEX16==1
  1721. #ifdef ZGEMM3M_DEFAULT_P
  1722. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1723. #else
  1724. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1725. #endif
  1726. #endif
  1727. #ifdef EXPRECISION
  1728. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1729. #endif
  1730. #if BUILD_SINGLE == 1
  1731. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1732. #endif
  1733. #if BUILD_DOUBLE== 1
  1734. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1735. #endif
  1736. #if BUILD_COMPLEX==1
  1737. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1738. #endif
  1739. #if BUILD_COMPLEX16==1
  1740. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1741. #endif
  1742. #if BUILD_COMPLEX==1
  1743. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1744. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1745. #else
  1746. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1747. #endif
  1748. #endif
  1749. #if BUILD_COMPLEX16==1
  1750. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1751. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1752. #else
  1753. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1754. #endif
  1755. #endif
  1756. #ifdef QUAD_PRECISION
  1757. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1758. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1759. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1760. #endif
  1761. #ifdef DEBUG
  1762. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1763. #endif
  1764. #if BUILD_BFLOAT16==1
  1765. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1766. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1767. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1768. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1769. #endif
  1770. #if BUILD_HFLOAT16==1
  1771. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1772. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1773. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1774. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1775. #endif
  1776. #if BUILD_SINGLE==1
  1777. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1778. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1779. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1780. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1781. #endif
  1782. #if BUILD_DOUBLE==1
  1783. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1784. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1785. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1786. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1787. #endif
  1788. #ifdef EXPRECISION
  1789. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1790. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1791. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1792. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1793. #endif
  1794. #if BUILD_COMPLEX ==1
  1795. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1796. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1797. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1798. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1799. #endif
  1800. #if BUILD_COMPLEX16 ==1
  1801. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1802. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1803. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1804. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1805. #endif
  1806. #if BUILD_COMPLEX == 1
  1807. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1808. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1809. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1810. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1811. #endif
  1812. #if BUILD_COMPLEX16 == 1
  1813. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1814. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1815. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1816. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1817. #endif
  1818. #ifdef EXPRECISION
  1819. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1820. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1821. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1822. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1823. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1824. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1825. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1826. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1827. #endif
  1828. }
  1829. #endif //RISCV64
  1830. #endif //POWER
  1831. #endif //ZARCH
  1832. #endif //(ARCH_LOONGARCH64)
  1833. #endif //(ARCH_MIPS64)
  1834. #endif //(ARCH_ARM64)