You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 60 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023, 2025 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N,
  54. #ifdef BGEMM_DEFAULT_UNROLL_MN
  55. BGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. BGEMM_ALIGN_K,
  60. 0, 0, 0,
  61. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  62. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  63. SBGEMM_DEFAULT_UNROLL_MN,
  64. #else
  65. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  66. #endif
  67. SBGEMM_ALIGN_K,
  68. 0, // need_amxtile_permission
  69. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  70. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  71. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  72. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  73. dsdot_kTS,
  74. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  75. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  76. ssymv_LTS, ssymv_UTS,
  77. bgemm_kernelTS, bgemm_betaTS,
  78. bgemm_incopyTS, bgemm_itcopyTS,
  79. bgemm_oncopyTS, bgemm_otcopyTS,
  80. sbgemm_kernelTS, sbgemm_betaTS,
  81. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  82. sbgemm_incopyTS, sbgemm_itcopyTS,
  83. #else
  84. sbgemm_oncopyTS, sbgemm_otcopyTS,
  85. #endif
  86. sbgemm_oncopyTS, sbgemm_otcopyTS,
  87. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  88. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  89. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  90. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  91. #else
  92. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  93. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  94. #endif
  95. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  96. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  97. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  98. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  99. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  100. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  101. #else
  102. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  103. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  104. #endif
  105. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  106. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  107. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  108. ssymm_iutcopyTS, ssymm_iltcopyTS,
  109. #else
  110. ssymm_outcopyTS, ssymm_oltcopyTS,
  111. #endif
  112. ssymm_outcopyTS, ssymm_oltcopyTS,
  113. #ifndef NO_LAPACK
  114. sneg_tcopyTS, slaswp_ncopyTS,
  115. #else
  116. NULL,NULL,
  117. #endif
  118. #ifdef SMALL_MATRIX_OPT
  119. sbgemm_small_matrix_permitTS,
  120. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  121. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  122. #endif
  123. #endif
  124. #ifdef BUILD_HFLOAT16
  125. 0, 0, 0,
  126. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  127. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  128. SHGEMM_DEFAULT_UNROLL_MN,
  129. #else
  130. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  131. #endif
  132. shgemm_kernelTS, shgemm_betaTS,
  133. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  134. shgemm_incopyTS, shgemm_itcopyTS,
  135. #else
  136. shgemm_oncopyTS, shgemm_otcopyTS,
  137. #endif
  138. shgemm_oncopyTS, shgemm_otcopyTS,
  139. #endif
  140. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  141. 0, 0, 0,
  142. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  143. #ifdef SGEMM_DEFAULT_UNROLL_MN
  144. SGEMM_DEFAULT_UNROLL_MN,
  145. #else
  146. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  147. #endif
  148. #endif
  149. #ifdef HAVE_EXCLUSIVE_CACHE
  150. 1,
  151. #else
  152. 0,
  153. #endif
  154. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  155. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  156. #endif
  157. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  158. isamax_kTS,
  159. #endif
  160. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  161. isamin_kTS, ismax_kTS, ismin_kTS,
  162. snrm2_kTS, sasum_kTS,
  163. #endif
  164. #if BUILD_SINGLE == 1
  165. ssum_kTS,
  166. #endif
  167. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  168. scopy_kTS, sdot_kTS,
  169. // dsdot_kTS,
  170. srot_kTS, srotm_kTS, saxpy_kTS,
  171. #endif
  172. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  173. sscal_kTS,
  174. #endif
  175. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  176. sswap_kTS,
  177. sgemv_nTS, sgemv_tTS,
  178. #endif
  179. #if BUILD_SINGLE == 1
  180. sger_kTS,
  181. #endif
  182. #if BUILD_SINGLE == 1
  183. ssymv_LTS, ssymv_UTS,
  184. #endif
  185. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  186. #ifdef ARCH_X86_64
  187. sgemm_directTS,
  188. sgemm_direct_performantTS,
  189. #endif
  190. #ifdef ARCH_ARM64
  191. sgemm_directTS,
  192. #endif
  193. sgemm_kernelTS, sgemm_betaTS,
  194. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  195. sgemm_incopyTS, sgemm_itcopyTS,
  196. #else
  197. sgemm_oncopyTS, sgemm_otcopyTS,
  198. #endif
  199. sgemm_oncopyTS, sgemm_otcopyTS,
  200. #endif
  201. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  202. #ifdef SMALL_MATRIX_OPT
  203. sgemm_small_matrix_permitTS,
  204. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  205. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  206. #endif
  207. #endif
  208. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  209. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  210. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  211. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  212. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  213. #else
  214. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  215. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  216. #endif
  217. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  218. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  219. #endif
  220. #if (BUILD_SINGLE==1)
  221. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  222. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  223. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  224. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  225. #else
  226. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  227. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  228. #endif
  229. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  230. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  231. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  232. ssymm_iutcopyTS, ssymm_iltcopyTS,
  233. #else
  234. ssymm_outcopyTS, ssymm_oltcopyTS,
  235. #endif
  236. ssymm_outcopyTS, ssymm_oltcopyTS,
  237. #ifndef NO_LAPACK
  238. sneg_tcopyTS, slaswp_ncopyTS,
  239. #else
  240. NULL,NULL,
  241. #endif
  242. #endif
  243. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  244. 0, 0, 0,
  245. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  246. #ifdef DGEMM_DEFAULT_UNROLL_MN
  247. DGEMM_DEFAULT_UNROLL_MN,
  248. #else
  249. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  250. #endif
  251. #endif
  252. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  253. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  254. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  255. dnrm2_kTS, dasum_kTS,
  256. #endif
  257. #if (BUILD_DOUBLE==1)
  258. dsum_kTS,
  259. #endif
  260. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  261. dcopy_kTS, ddot_kTS,
  262. #endif
  263. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  264. dsdot_kTS,
  265. #endif
  266. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  267. drot_kTS,
  268. drotm_kTS,
  269. daxpy_kTS,
  270. dscal_kTS,
  271. dswap_kTS,
  272. dgemv_nTS, dgemv_tTS,
  273. #endif
  274. #if (BUILD_DOUBLE==1)
  275. dger_kTS,
  276. dsymv_LTS, dsymv_UTS,
  277. #endif
  278. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  279. dgemm_kernelTS, dgemm_betaTS,
  280. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  281. dgemm_incopyTS, dgemm_itcopyTS,
  282. #else
  283. dgemm_oncopyTS, dgemm_otcopyTS,
  284. #endif
  285. dgemm_oncopyTS, dgemm_otcopyTS,
  286. #endif
  287. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  288. #ifdef SMALL_MATRIX_OPT
  289. dgemm_small_matrix_permitTS,
  290. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  291. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  292. #endif
  293. #endif
  294. #if (BUILD_DOUBLE==1)
  295. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  296. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  297. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  298. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  299. #else
  300. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  301. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  302. #endif
  303. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  304. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  305. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  306. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  307. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  308. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  309. #else
  310. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  311. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  312. #endif
  313. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  314. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  315. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  316. dsymm_iutcopyTS, dsymm_iltcopyTS,
  317. #else
  318. dsymm_outcopyTS, dsymm_oltcopyTS,
  319. #endif
  320. dsymm_outcopyTS, dsymm_oltcopyTS,
  321. #ifndef NO_LAPACK
  322. dneg_tcopyTS, dlaswp_ncopyTS,
  323. #else
  324. NULL, NULL,
  325. #endif
  326. #endif
  327. #ifdef EXPRECISION
  328. 0, 0, 0,
  329. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  330. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  331. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  332. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  333. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  334. qgemv_nTS, qgemv_tTS, qger_kTS,
  335. qsymv_LTS, qsymv_UTS,
  336. qgemm_kernelTS, qgemm_betaTS,
  337. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  338. qgemm_incopyTS, qgemm_itcopyTS,
  339. #else
  340. qgemm_oncopyTS, qgemm_otcopyTS,
  341. #endif
  342. qgemm_oncopyTS, qgemm_otcopyTS,
  343. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  344. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  345. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  346. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  347. #else
  348. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  349. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  350. #endif
  351. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  352. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  353. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  354. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  355. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  356. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  357. #else
  358. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  359. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  360. #endif
  361. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  362. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  363. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  364. qsymm_iutcopyTS, qsymm_iltcopyTS,
  365. #else
  366. qsymm_outcopyTS, qsymm_oltcopyTS,
  367. #endif
  368. qsymm_outcopyTS, qsymm_oltcopyTS,
  369. #ifndef NO_LAPACK
  370. qneg_tcopyTS, qlaswp_ncopyTS,
  371. #else
  372. NULL, NULL,
  373. #endif
  374. #endif
  375. #if (BUILD_COMPLEX)
  376. 0, 0, 0,
  377. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  378. #ifdef CGEMM_DEFAULT_UNROLL_MN
  379. CGEMM_DEFAULT_UNROLL_MN,
  380. #else
  381. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  382. #endif
  383. #if (BUILD_COMPLEX)
  384. camax_kTS, camin_kTS,
  385. #endif
  386. #if (BUILD_COMPLEX)
  387. icamax_kTS,
  388. #endif
  389. #if (BUILD_COMPLEX)
  390. icamin_kTS,
  391. cnrm2_kTS, casum_kTS, csum_kTS,
  392. #endif
  393. #if (BUILD_COMPLEX)
  394. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  395. #endif
  396. #if (BUILD_COMPLEX)
  397. csrot_kTS,
  398. #endif
  399. #if (BUILD_COMPLEX)
  400. caxpy_kTS,
  401. caxpyc_kTS,
  402. cscal_kTS,
  403. cswap_kTS,
  404. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  405. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  406. #endif
  407. #if (BUILD_COMPLEX)
  408. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  409. csymv_LTS, csymv_UTS,
  410. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  411. #endif
  412. #if (BUILD_COMPLEX)
  413. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  414. cgemm_betaTS,
  415. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  416. cgemm_incopyTS, cgemm_itcopyTS,
  417. #else
  418. cgemm_oncopyTS, cgemm_otcopyTS,
  419. #endif
  420. cgemm_oncopyTS, cgemm_otcopyTS,
  421. #ifdef SMALL_MATRIX_OPT
  422. cgemm_small_matrix_permitTS,
  423. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  424. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  425. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  426. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  427. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  428. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  429. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  430. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  431. #endif
  432. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  433. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  434. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  435. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  436. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  437. #else
  438. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  439. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  440. #endif
  441. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  442. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  443. #endif
  444. #endif
  445. #if (BUILD_COMPLEX)
  446. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  447. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  448. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  449. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  450. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  451. #else
  452. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  453. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  454. #endif
  455. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  456. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  457. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  458. csymm_iutcopyTS, csymm_iltcopyTS,
  459. #else
  460. csymm_outcopyTS, csymm_oltcopyTS,
  461. #endif
  462. csymm_outcopyTS, csymm_oltcopyTS,
  463. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  464. chemm_iutcopyTS, chemm_iltcopyTS,
  465. #else
  466. chemm_outcopyTS, chemm_oltcopyTS,
  467. #endif
  468. chemm_outcopyTS, chemm_oltcopyTS,
  469. 0, 0, 0,
  470. #if (USE_GEMM3M)
  471. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  472. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  473. #else
  474. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  475. #endif
  476. cgemm3m_kernelTS,
  477. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  478. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  479. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  480. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  481. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  482. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  483. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  484. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  485. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  486. csymm3m_oucopybTS, csymm3m_olcopybTS,
  487. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  488. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  489. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  490. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  491. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  492. chemm3m_oucopybTS, chemm3m_olcopybTS,
  493. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  494. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  495. #else
  496. 0, 0, 0,
  497. NULL,
  498. NULL, NULL,
  499. NULL, NULL,
  500. NULL, NULL,
  501. NULL, NULL,
  502. NULL, NULL,
  503. NULL, NULL,
  504. NULL, NULL,
  505. NULL, NULL,
  506. NULL, NULL,
  507. NULL, NULL,
  508. NULL, NULL,
  509. NULL, NULL,
  510. NULL, NULL,
  511. NULL, NULL,
  512. NULL, NULL,
  513. NULL, NULL,
  514. NULL, NULL,
  515. NULL, NULL,
  516. #endif
  517. #endif
  518. #if (BUILD_COMPLEX)
  519. #ifndef NO_LAPACK
  520. cneg_tcopyTS,
  521. claswp_ncopyTS,
  522. #else
  523. NULL, NULL,
  524. #endif
  525. #endif
  526. #if BUILD_COMPLEX16 == 1
  527. 0, 0, 0,
  528. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  529. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  530. ZGEMM_DEFAULT_UNROLL_MN,
  531. #else
  532. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  533. #endif
  534. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  535. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  536. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  537. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  538. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  539. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  540. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  541. zsymv_LTS, zsymv_UTS,
  542. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  543. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  544. zgemm_betaTS,
  545. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  546. zgemm_incopyTS, zgemm_itcopyTS,
  547. #else
  548. zgemm_oncopyTS, zgemm_otcopyTS,
  549. #endif
  550. zgemm_oncopyTS, zgemm_otcopyTS,
  551. #ifdef SMALL_MATRIX_OPT
  552. zgemm_small_matrix_permitTS,
  553. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  554. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  555. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  556. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  557. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  558. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  559. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  560. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  561. #endif
  562. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  563. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  564. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  565. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  566. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  567. #else
  568. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  569. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  570. #endif
  571. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  572. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  573. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  574. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  575. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  576. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  577. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  578. #else
  579. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  580. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  581. #endif
  582. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  583. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  584. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  585. zsymm_iutcopyTS, zsymm_iltcopyTS,
  586. #else
  587. zsymm_outcopyTS, zsymm_oltcopyTS,
  588. #endif
  589. zsymm_outcopyTS, zsymm_oltcopyTS,
  590. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  591. zhemm_iutcopyTS, zhemm_iltcopyTS,
  592. #else
  593. zhemm_outcopyTS, zhemm_oltcopyTS,
  594. #endif
  595. zhemm_outcopyTS, zhemm_oltcopyTS,
  596. 0, 0, 0,
  597. #if (USE_GEMM3M)
  598. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  599. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  600. #else
  601. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  602. #endif
  603. zgemm3m_kernelTS,
  604. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  605. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  606. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  607. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  608. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  609. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  610. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  611. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  612. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  613. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  614. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  615. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  616. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  617. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  618. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  619. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  620. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  621. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  622. #else
  623. 0, 0, 0,
  624. NULL,
  625. NULL, NULL,
  626. NULL, NULL,
  627. NULL, NULL,
  628. NULL, NULL,
  629. NULL, NULL,
  630. NULL, NULL,
  631. NULL, NULL,
  632. NULL, NULL,
  633. NULL, NULL,
  634. NULL, NULL,
  635. NULL, NULL,
  636. NULL, NULL,
  637. NULL, NULL,
  638. NULL, NULL,
  639. NULL, NULL,
  640. NULL, NULL,
  641. NULL, NULL,
  642. NULL, NULL,
  643. #endif
  644. #ifndef NO_LAPACK
  645. zneg_tcopyTS, zlaswp_ncopyTS,
  646. #else
  647. NULL, NULL,
  648. #endif
  649. #endif
  650. #ifdef EXPRECISION
  651. 0, 0, 0,
  652. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  653. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  654. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  655. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  656. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  657. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  658. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  659. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  660. xsymv_LTS, xsymv_UTS,
  661. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  662. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  663. xgemm_betaTS,
  664. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  665. xgemm_incopyTS, xgemm_itcopyTS,
  666. #else
  667. xgemm_oncopyTS, xgemm_otcopyTS,
  668. #endif
  669. xgemm_oncopyTS, xgemm_otcopyTS,
  670. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  671. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  672. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  673. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  674. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  675. #else
  676. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  677. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  678. #endif
  679. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  680. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  681. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  682. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  683. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  684. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  685. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  686. #else
  687. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  688. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  689. #endif
  690. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  691. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  692. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  693. xsymm_iutcopyTS, xsymm_iltcopyTS,
  694. #else
  695. xsymm_outcopyTS, xsymm_oltcopyTS,
  696. #endif
  697. xsymm_outcopyTS, xsymm_oltcopyTS,
  698. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  699. xhemm_iutcopyTS, xhemm_iltcopyTS,
  700. #else
  701. xhemm_outcopyTS, xhemm_oltcopyTS,
  702. #endif
  703. xhemm_outcopyTS, xhemm_oltcopyTS,
  704. 0, 0, 0,
  705. #if (USE_GEMM3M)
  706. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  707. xgemm3m_kernelTS,
  708. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  709. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  710. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  711. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  712. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  713. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  714. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  715. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  716. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  717. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  718. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  719. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  720. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  721. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  722. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  723. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  724. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  725. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  726. #else
  727. 0, 0, 0,
  728. NULL,
  729. NULL, NULL,
  730. NULL, NULL,
  731. NULL, NULL,
  732. NULL, NULL,
  733. NULL, NULL,
  734. NULL, NULL,
  735. NULL, NULL,
  736. NULL, NULL,
  737. NULL, NULL,
  738. NULL, NULL,
  739. NULL, NULL,
  740. NULL, NULL,
  741. NULL, NULL,
  742. NULL, NULL,
  743. NULL, NULL,
  744. NULL, NULL,
  745. NULL, NULL,
  746. NULL, NULL,
  747. #endif
  748. #ifndef NO_LAPACK
  749. xneg_tcopyTS, xlaswp_ncopyTS,
  750. #else
  751. NULL, NULL,
  752. #endif
  753. #endif
  754. init_parameter,
  755. SNUMOPT, DNUMOPT, QNUMOPT,
  756. #if BUILD_SINGLE == 1
  757. saxpby_kTS,
  758. #endif
  759. #if BUILD_DOUBLE == 1
  760. daxpby_kTS,
  761. #endif
  762. #if BUILD_COMPLEX == 1
  763. caxpby_kTS,
  764. #endif
  765. #if BUILD_COMPLEX16== 1
  766. zaxpby_kTS,
  767. #endif
  768. #if BUILD_SINGLE == 1
  769. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  770. #endif
  771. #if BUILD_DOUBLE== 1
  772. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  773. #endif
  774. #if BUILD_COMPLEX == 1
  775. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  776. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  777. #endif
  778. #if BUILD_COMPLEX16 == 1
  779. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  780. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  781. #endif
  782. #if BUILD_SINGLE == 1
  783. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  784. #endif
  785. #if BUILD_DOUBLE== 1
  786. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  787. #endif
  788. #if BUILD_COMPLEX== 1
  789. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  790. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  791. #endif
  792. #if BUILD_COMPLEX16==1
  793. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  794. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  795. #endif
  796. #if BUILD_SINGLE == 1
  797. sgeadd_kTS,
  798. #endif
  799. #if BUILD_DOUBLE==1
  800. dgeadd_kTS,
  801. #endif
  802. #if BUILD_COMPLEX==1
  803. cgeadd_kTS,
  804. #endif
  805. #if BUILD_COMPLEX16==1
  806. zgeadd_kTS,
  807. #endif
  808. };
  809. #if (ARCH_ARM64)
  810. static void init_parameter(void) {
  811. #if (BUILD_BFLOAT16)
  812. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  813. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  814. #endif
  815. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  816. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  817. #endif
  818. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  819. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  820. #endif
  821. #if BUILD_COMPLEX==1
  822. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  823. #endif
  824. #if BUILD_COMPLEX16==1
  825. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  826. #endif
  827. #if (BUILD_BFLOAT16)
  828. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  829. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  830. #endif
  831. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  832. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  833. #endif
  834. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  835. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  836. #endif
  837. #if BUILD_COMPLEX== 1
  838. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  839. #endif
  840. #if BUILD_COMPLEX16==1
  841. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  842. #endif
  843. #if (BUILD_BFLOAT16)
  844. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  845. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  846. #endif
  847. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  848. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  849. #endif
  850. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  851. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  852. #endif
  853. #if BUILD_COMPLEX==1
  854. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  855. #endif
  856. #if BUILD_COMPLEX16==1
  857. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  858. #endif
  859. #ifdef EXPRECISION
  860. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  861. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  862. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  863. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  864. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  865. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  866. #endif
  867. #if (USE_GEMM3M)
  868. #ifdef CGEMM3M_DEFAULT_P
  869. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  870. #else
  871. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  872. #endif
  873. #ifdef ZGEMM3M_DEFAULT_P
  874. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  875. #else
  876. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  877. #endif
  878. #ifdef CGEMM3M_DEFAULT_Q
  879. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  880. #else
  881. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  882. #endif
  883. #ifdef ZGEMM3M_DEFAULT_Q
  884. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  885. #else
  886. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  887. #endif
  888. #ifdef CGEMM3M_DEFAULT_R
  889. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  890. #else
  891. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  892. #endif
  893. #ifdef ZGEMM3M_DEFAULT_R
  894. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  895. #else
  896. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  897. #endif
  898. #ifdef EXPRECISION
  899. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  900. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  901. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  902. #endif
  903. #endif
  904. }
  905. #else // (ARCH_ARM64)
  906. #if defined(ARCH_MIPS64)
  907. static void init_parameter(void) {
  908. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  909. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  910. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  911. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  912. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  913. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  914. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  915. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  916. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  917. TABLE_NAME.dgemm_r = 640;
  918. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  919. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  920. #ifdef EXPRECISION
  921. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  922. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  923. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  924. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  925. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  926. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  927. #endif
  928. #if defined(USE_GEMM3M)
  929. #ifdef CGEMM3M_DEFAULT_P
  930. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  931. #else
  932. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  933. #endif
  934. #ifdef ZGEMM3M_DEFAULT_P
  935. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  936. #else
  937. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  938. #endif
  939. #ifdef CGEMM3M_DEFAULT_Q
  940. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  941. #else
  942. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  943. #endif
  944. #ifdef ZGEMM3M_DEFAULT_Q
  945. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  946. #else
  947. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  948. #endif
  949. #ifdef CGEMM3M_DEFAULT_R
  950. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  951. #else
  952. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  953. #endif
  954. #ifdef ZGEMM3M_DEFAULT_R
  955. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  956. #else
  957. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  958. #endif
  959. #ifdef EXPRECISION
  960. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  961. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  962. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  963. #endif
  964. #endif
  965. }
  966. #else // (ARCH_MIPS64)
  967. #if (ARCH_LOONGARCH64)
  968. static int get_L3_size() {
  969. int ret = 0, id = 0x14;
  970. __asm__ volatile (
  971. "cpucfg %[ret], %[id]"
  972. : [ret]"=r"(ret)
  973. : [id]"r"(id)
  974. : "memory"
  975. );
  976. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  977. }
  978. static void init_parameter(void) {
  979. #ifdef BUILD_BFLOAT16
  980. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  981. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  982. #endif
  983. #ifdef BUILD_BFLOAT16
  984. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  985. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  986. #endif
  987. #if defined(LA464)
  988. int L3_size = get_L3_size();
  989. #ifdef SMP
  990. if(blas_num_threads == 1){
  991. #endif
  992. //single thread
  993. if (L3_size == 32){ // 3C5000 and 3D5000
  994. TABLE_NAME.sgemm_p = 256;
  995. TABLE_NAME.sgemm_q = 384;
  996. TABLE_NAME.sgemm_r = 8192;
  997. TABLE_NAME.dgemm_p = 112;
  998. TABLE_NAME.dgemm_q = 289;
  999. TABLE_NAME.dgemm_r = 4096;
  1000. TABLE_NAME.cgemm_p = 128;
  1001. TABLE_NAME.cgemm_q = 256;
  1002. TABLE_NAME.cgemm_r = 4096;
  1003. TABLE_NAME.zgemm_p = 128;
  1004. TABLE_NAME.zgemm_q = 128;
  1005. TABLE_NAME.zgemm_r = 2048;
  1006. } else { // 3A5000 and 3C5000L
  1007. TABLE_NAME.sgemm_p = 256;
  1008. TABLE_NAME.sgemm_q = 384;
  1009. TABLE_NAME.sgemm_r = 4096;
  1010. TABLE_NAME.dgemm_p = 112;
  1011. TABLE_NAME.dgemm_q = 300;
  1012. TABLE_NAME.dgemm_r = 3024;
  1013. TABLE_NAME.cgemm_p = 128;
  1014. TABLE_NAME.cgemm_q = 256;
  1015. TABLE_NAME.cgemm_r = 2048;
  1016. TABLE_NAME.zgemm_p = 128;
  1017. TABLE_NAME.zgemm_q = 128;
  1018. TABLE_NAME.zgemm_r = 1024;
  1019. }
  1020. #ifdef SMP
  1021. }else{
  1022. //multi thread
  1023. if (L3_size == 32){ // 3C5000 and 3D5000
  1024. TABLE_NAME.sgemm_p = 256;
  1025. TABLE_NAME.sgemm_q = 384;
  1026. TABLE_NAME.sgemm_r = 1024;
  1027. TABLE_NAME.dgemm_p = 112;
  1028. TABLE_NAME.dgemm_q = 289;
  1029. TABLE_NAME.dgemm_r = 342;
  1030. TABLE_NAME.cgemm_p = 128;
  1031. TABLE_NAME.cgemm_q = 256;
  1032. TABLE_NAME.cgemm_r = 512;
  1033. TABLE_NAME.zgemm_p = 128;
  1034. TABLE_NAME.zgemm_q = 128;
  1035. TABLE_NAME.zgemm_r = 512;
  1036. } else { // 3A5000 and 3C5000L
  1037. TABLE_NAME.sgemm_p = 256;
  1038. TABLE_NAME.sgemm_q = 384;
  1039. TABLE_NAME.sgemm_r = 2048;
  1040. TABLE_NAME.dgemm_p = 112;
  1041. TABLE_NAME.dgemm_q = 300;
  1042. TABLE_NAME.dgemm_r = 738;
  1043. TABLE_NAME.cgemm_p = 128;
  1044. TABLE_NAME.cgemm_q = 256;
  1045. TABLE_NAME.cgemm_r = 1024;
  1046. TABLE_NAME.zgemm_p = 128;
  1047. TABLE_NAME.zgemm_q = 128;
  1048. TABLE_NAME.zgemm_r = 1024;
  1049. }
  1050. }
  1051. #endif
  1052. #else
  1053. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1054. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1055. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1056. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1057. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1058. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1059. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1060. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1061. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1062. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1063. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1064. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1065. #endif
  1066. #ifdef BUILD_BFLOAT16
  1067. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1068. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1069. #endif
  1070. }
  1071. #else // (ARCH_LOONGARCH64)
  1072. #if (ARCH_POWER)
  1073. static void init_parameter(void) {
  1074. #ifdef BUILD_BFLOAT16
  1075. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1076. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1077. #endif
  1078. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1079. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1080. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1081. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1082. #ifdef BUILD_BFLOAT16
  1083. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1084. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1085. #endif
  1086. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1087. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1088. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1089. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1090. #ifdef BUILD_BFLOAT16
  1091. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1092. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1093. #endif
  1094. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1095. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1096. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1097. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1098. }
  1099. #else //POWER
  1100. #if (ARCH_ZARCH)
  1101. static void init_parameter(void) {
  1102. #ifdef BUILD_BFLOAT16
  1103. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1104. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1105. #endif
  1106. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1107. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1108. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1109. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1110. #ifdef BUILD_BFLOAT16
  1111. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1112. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1113. #endif
  1114. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1115. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1116. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1117. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1118. #ifdef BUILD_BFLOAT16
  1119. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1120. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1121. #endif
  1122. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1123. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1124. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1125. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1126. }
  1127. #else //ZARCH
  1128. #if (ARCH_RISCV64)
  1129. static void init_parameter(void) {
  1130. #ifdef BUILD_BFLOAT16
  1131. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1132. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1133. #endif
  1134. #ifdef BUILD_HFLOAT16
  1135. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1136. #endif
  1137. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1138. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1139. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1140. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1141. #ifdef BUILD_BFLOAT16
  1142. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1143. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1144. #endif
  1145. #ifdef BUILD_HFLOAT16
  1146. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  1147. #endif
  1148. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1149. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1150. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1151. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1152. #ifdef BUILD_BFLOAT16
  1153. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1154. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1155. #endif
  1156. #ifdef BUILD_HFLOAT16
  1157. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1158. #endif
  1159. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1160. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1161. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1162. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1163. }
  1164. #else //RISCV64
  1165. #ifdef ARCH_X86
  1166. static int get_l2_size_old(void){
  1167. int i, eax, ebx, ecx, edx, cpuid_level;
  1168. int info[15];
  1169. cpuid(2, &eax, &ebx, &ecx, &edx);
  1170. info[ 0] = BITMASK(eax, 8, 0xff);
  1171. info[ 1] = BITMASK(eax, 16, 0xff);
  1172. info[ 2] = BITMASK(eax, 24, 0xff);
  1173. info[ 3] = BITMASK(ebx, 0, 0xff);
  1174. info[ 4] = BITMASK(ebx, 8, 0xff);
  1175. info[ 5] = BITMASK(ebx, 16, 0xff);
  1176. info[ 6] = BITMASK(ebx, 24, 0xff);
  1177. info[ 7] = BITMASK(ecx, 0, 0xff);
  1178. info[ 8] = BITMASK(ecx, 8, 0xff);
  1179. info[ 9] = BITMASK(ecx, 16, 0xff);
  1180. info[10] = BITMASK(ecx, 24, 0xff);
  1181. info[11] = BITMASK(edx, 0, 0xff);
  1182. info[12] = BITMASK(edx, 8, 0xff);
  1183. info[13] = BITMASK(edx, 16, 0xff);
  1184. info[14] = BITMASK(edx, 24, 0xff);
  1185. for (i = 0; i < 15; i++){
  1186. switch (info[i]){
  1187. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1188. case 0x1a :
  1189. return 96;
  1190. case 0x39 :
  1191. case 0x3b :
  1192. case 0x41 :
  1193. case 0x79 :
  1194. case 0x81 :
  1195. return 128;
  1196. case 0x3a :
  1197. return 192;
  1198. case 0x21 :
  1199. case 0x3c :
  1200. case 0x42 :
  1201. case 0x7a :
  1202. case 0x7e :
  1203. case 0x82 :
  1204. return 256;
  1205. case 0x3d :
  1206. return 384;
  1207. case 0x3e :
  1208. case 0x43 :
  1209. case 0x7b :
  1210. case 0x7f :
  1211. case 0x83 :
  1212. case 0x86 :
  1213. return 512;
  1214. case 0x44 :
  1215. case 0x78 :
  1216. case 0x7c :
  1217. case 0x84 :
  1218. case 0x87 :
  1219. return 1024;
  1220. case 0x45 :
  1221. case 0x7d :
  1222. case 0x85 :
  1223. return 2048;
  1224. case 0x48 :
  1225. return 3184;
  1226. case 0x49 :
  1227. return 4096;
  1228. case 0x4e :
  1229. return 6144;
  1230. }
  1231. }
  1232. // return 0;
  1233. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1234. return 256;
  1235. }
  1236. #endif
  1237. static __inline__ int get_l2_size(void){
  1238. int eax, ebx, ecx, edx, l2;
  1239. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1240. if (l2 != 0)
  1241. return l2;
  1242. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1243. l2 = BITMASK(ecx, 16, 0xffff);
  1244. #ifndef ARCH_X86
  1245. if (l2 <= 0) {
  1246. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1247. return 256;
  1248. }
  1249. return l2;
  1250. #else
  1251. if (l2 > 0) return l2;
  1252. return get_l2_size_old();
  1253. #endif
  1254. }
  1255. static __inline__ int get_l3_size(void){
  1256. int eax, ebx, ecx, edx;
  1257. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1258. return BITMASK(edx, 18, 0x3fff) * 512;
  1259. }
  1260. static void init_parameter(void) {
  1261. int l2 = get_l2_size();
  1262. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1263. /* where the GEMM unrolling parameters do not depend on l2 */
  1264. #ifdef BUILD_BFLOAT16
  1265. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1266. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1267. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1268. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1269. #endif
  1270. #ifdef BUILD_HFLOAT16
  1271. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1272. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1273. #endif
  1274. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1275. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1276. #endif
  1277. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1278. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1279. #endif
  1280. #if BUILD_COMPLEX == 1
  1281. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1282. #endif
  1283. #if BUILD_COMPLEX16==1
  1284. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1285. #endif
  1286. #if BUILD_COMPLEX == 1
  1287. #ifdef CGEMM3M_DEFAULT_Q
  1288. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1289. #else
  1290. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1291. #endif
  1292. #endif
  1293. #if BUILD_COMPLEX16 == 1
  1294. #ifdef ZGEMM3M_DEFAULT_Q
  1295. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1296. #else
  1297. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1298. #endif
  1299. #endif
  1300. #ifdef EXPRECISION
  1301. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1302. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1303. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1304. #endif
  1305. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1306. #ifdef DEBUG
  1307. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1308. #endif
  1309. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1310. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1311. #endif
  1312. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1313. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1314. #endif
  1315. #if BUILD_COMPLEX==1
  1316. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1317. #endif
  1318. #if BUILD_COMPLEX16==1
  1319. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1320. #endif
  1321. #ifdef EXPRECISION
  1322. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1323. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1324. #endif
  1325. #endif
  1326. #ifdef CORE_NORTHWOOD
  1327. #ifdef DEBUG
  1328. fprintf(stderr, "Northwood\n");
  1329. #endif
  1330. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1331. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1332. #endif
  1333. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1334. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1335. #endif
  1336. #if BUILD_COMPLEX==1
  1337. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1338. #endif
  1339. #if BUILD_COMPLEX16==1
  1340. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1341. #endif
  1342. #ifdef EXPRECISION
  1343. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1344. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1345. #endif
  1346. #endif
  1347. #ifdef ATOM
  1348. #ifdef DEBUG
  1349. fprintf(stderr, "Atom\n");
  1350. #endif
  1351. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1352. TABLE_NAME.sgemm_p = 256;
  1353. #endif
  1354. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1355. TABLE_NAME.dgemm_p = 128;
  1356. #endif
  1357. #if BUILD_COMPLEX==1
  1358. TABLE_NAME.cgemm_p = 128;
  1359. #endif
  1360. #if BUILD_COMPLEX16==1
  1361. TABLE_NAME.zgemm_p = 64;
  1362. #endif
  1363. #ifdef EXPRECISION
  1364. TABLE_NAME.qgemm_p = 64;
  1365. TABLE_NAME.xgemm_p = 32;
  1366. #endif
  1367. #endif
  1368. #ifdef CORE_PRESCOTT
  1369. #ifdef DEBUG
  1370. fprintf(stderr, "Prescott\n");
  1371. #endif
  1372. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1373. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1374. #endif
  1375. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1376. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1377. #endif
  1378. #if BUILD_COMPLEX==1
  1379. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1380. #endif
  1381. #if BUILD_COMPLEX16 == 1
  1382. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1383. #endif
  1384. #ifdef EXPRECISION
  1385. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1386. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1387. #endif
  1388. #endif
  1389. #ifdef CORE2
  1390. #ifdef DEBUG
  1391. fprintf(stderr, "Core2\n");
  1392. #endif
  1393. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1394. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1395. #endif
  1396. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1397. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1398. #endif
  1399. #if BUILD_COMPLEX==1
  1400. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1401. #endif
  1402. #if BUILD_COMPLEX16==1
  1403. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1404. #endif
  1405. #ifdef EXPRECISION
  1406. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1407. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1408. #endif
  1409. #endif
  1410. #ifdef PENRYN
  1411. #ifdef DEBUG
  1412. fprintf(stderr, "Penryn\n");
  1413. #endif
  1414. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1415. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1416. #endif
  1417. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1418. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1419. #endif
  1420. #if BUILD_COMPLEX==1
  1421. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1422. #endif
  1423. #if BUILD_COMPLEX16==1
  1424. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1425. #endif
  1426. #ifdef EXPRECISION
  1427. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1428. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1429. #endif
  1430. #endif
  1431. #ifdef DUNNINGTON
  1432. #ifdef DEBUG
  1433. fprintf(stderr, "Dunnington\n");
  1434. #endif
  1435. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1436. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1437. #endif
  1438. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1439. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1440. #endif
  1441. #if BUILD_COMPLEX==1
  1442. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1443. #endif
  1444. #if BUILD_COMPLEX16==1
  1445. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1446. #endif
  1447. #ifdef EXPRECISION
  1448. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1449. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1450. #endif
  1451. #endif
  1452. #ifdef NEHALEM
  1453. #ifdef DEBUG
  1454. fprintf(stderr, "Nehalem\n");
  1455. #endif
  1456. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1457. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1458. #endif
  1459. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1460. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1461. #endif
  1462. #if BUILD_COMPLEX
  1463. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1464. #endif
  1465. #if BUILD_COMPLEX16
  1466. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1467. #endif
  1468. #ifdef EXPRECISION
  1469. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1470. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1471. #endif
  1472. #endif
  1473. #ifdef SANDYBRIDGE
  1474. #ifdef DEBUG
  1475. fprintf(stderr, "Sandybridge\n");
  1476. #endif
  1477. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1478. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1479. #endif
  1480. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1481. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1482. #endif
  1483. #if BUILD_COMPLEX
  1484. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1485. #endif
  1486. #if BUILD_COMPLEX16
  1487. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1488. #endif
  1489. #ifdef EXPRECISION
  1490. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1491. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1492. #endif
  1493. #endif
  1494. #ifdef HASWELL
  1495. #ifdef DEBUG
  1496. fprintf(stderr, "Haswell\n");
  1497. #endif
  1498. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1499. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1500. #endif
  1501. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1502. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1503. #endif
  1504. #if BUILD_COMPLEX
  1505. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1506. #endif
  1507. #if BUILD_COMPLEX16
  1508. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1509. #endif
  1510. #ifdef EXPRECISION
  1511. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1512. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1513. #endif
  1514. #endif
  1515. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1516. #ifdef DEBUG
  1517. fprintf(stderr, "SkylakeX\n");
  1518. #endif
  1519. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1520. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1521. #endif
  1522. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1523. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1524. #endif
  1525. #if BUILD_COMPLEX
  1526. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1527. #endif
  1528. #if BUILD_COMPLEX16
  1529. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1530. #endif
  1531. #ifdef EXPRECISION
  1532. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1533. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1534. #endif
  1535. #endif
  1536. #ifdef OPTERON
  1537. #ifdef DEBUG
  1538. fprintf(stderr, "Opteron\n");
  1539. #endif
  1540. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1541. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1542. #endif
  1543. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1544. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1545. #endif
  1546. #if BUILD_COMPLEX
  1547. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1548. #endif
  1549. #if BUILD_COMPLEX16
  1550. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1551. #endif
  1552. #ifdef EXPRECISION
  1553. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1554. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1555. #endif
  1556. #endif
  1557. #ifdef BARCELONA
  1558. #ifdef DEBUG
  1559. fprintf(stderr, "Barcelona\n");
  1560. #endif
  1561. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1562. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1563. #endif
  1564. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1565. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1566. #endif
  1567. #if BUILD_COMPLEX
  1568. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1569. #endif
  1570. #if BUILD_COMPLEX16
  1571. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1572. #endif
  1573. #ifdef EXPRECISION
  1574. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1575. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1576. #endif
  1577. #endif
  1578. #ifdef BOBCAT
  1579. #ifdef DEBUG
  1580. fprintf(stderr, "Bobcate\n");
  1581. #endif
  1582. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1583. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1584. #endif
  1585. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1586. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1587. #endif
  1588. #if BUILD_COMPLEX
  1589. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1590. #endif
  1591. #if BUILD_COMPLEX16
  1592. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1593. #endif
  1594. #ifdef EXPRECISION
  1595. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1596. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1597. #endif
  1598. #endif
  1599. #ifdef BULLDOZER
  1600. #ifdef DEBUG
  1601. fprintf(stderr, "Bulldozer\n");
  1602. #endif
  1603. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1604. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1605. #endif
  1606. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1607. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1608. #endif
  1609. #if BUILD_COMPLEX
  1610. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1611. #endif
  1612. #if BUILD_COMPLEX16
  1613. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1614. #endif
  1615. #ifdef EXPRECISION
  1616. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1617. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1618. #endif
  1619. #endif
  1620. #ifdef EXCAVATOR
  1621. #ifdef DEBUG
  1622. fprintf(stderr, "Excavator\n");
  1623. #endif
  1624. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1625. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1626. #endif
  1627. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1628. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1629. #endif
  1630. #if BUILD_COMPLEX
  1631. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1632. #endif
  1633. #if BUILD_COMPLEX16
  1634. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1635. #endif
  1636. #ifdef EXPRECISION
  1637. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1638. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1639. #endif
  1640. #endif
  1641. #ifdef PILEDRIVER
  1642. #ifdef DEBUG
  1643. fprintf(stderr, "Piledriver\n");
  1644. #endif
  1645. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1646. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1647. #endif
  1648. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1649. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1650. #endif
  1651. #if BUILD_COMPLEX
  1652. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1653. #endif
  1654. #if BUILD_COMPLEX16
  1655. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1656. #endif
  1657. #ifdef EXPRECISION
  1658. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1659. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1660. #endif
  1661. #endif
  1662. #ifdef STEAMROLLER
  1663. #ifdef DEBUG
  1664. fprintf(stderr, "Steamroller\n");
  1665. #endif
  1666. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1667. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1668. #endif
  1669. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1670. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1671. #endif
  1672. #if BUILD_COMPLEX
  1673. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1674. #endif
  1675. #if BUILD_COMPLEX16
  1676. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1677. #endif
  1678. #ifdef EXPRECISION
  1679. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1680. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1681. #endif
  1682. #endif
  1683. #ifdef ZEN
  1684. #ifdef DEBUG
  1685. fprintf(stderr, "Zen\n");
  1686. #endif
  1687. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1688. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1689. #endif
  1690. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1691. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1692. #endif
  1693. #if BUILD_COMPLEX
  1694. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1695. #endif
  1696. #if BUILD_COMPLEX16
  1697. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1698. #endif
  1699. #ifdef EXPRECISION
  1700. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1701. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1702. #endif
  1703. #endif
  1704. #ifdef NANO
  1705. #ifdef DEBUG
  1706. fprintf(stderr, "NANO\n");
  1707. #endif
  1708. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1709. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1710. #endif
  1711. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1712. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1713. #endif
  1714. #if (BUILD_COMPLEX==1)
  1715. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1716. #endif
  1717. #if (BUILD_COMPLEX16==1)
  1718. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1719. #endif
  1720. #ifdef EXPRECISION
  1721. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1722. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1723. #endif
  1724. #endif
  1725. #ifdef SAPPHIRERAPIDS
  1726. #if (BUILD_BFLOAT16 == 1)
  1727. TABLE_NAME.need_amxtile_permission = 1;
  1728. #endif
  1729. #endif
  1730. #if BUILD_COMPLEX==1
  1731. #ifdef CGEMM3M_DEFAULT_P
  1732. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1733. #else
  1734. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1735. #endif
  1736. #endif
  1737. #if BUILD_COMPLEX16==1
  1738. #ifdef ZGEMM3M_DEFAULT_P
  1739. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1740. #else
  1741. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1742. #endif
  1743. #endif
  1744. #ifdef EXPRECISION
  1745. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1746. #endif
  1747. #if BUILD_SINGLE == 1
  1748. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1749. #endif
  1750. #if BUILD_DOUBLE== 1
  1751. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1752. #endif
  1753. #if BUILD_COMPLEX==1
  1754. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1755. #endif
  1756. #if BUILD_COMPLEX16==1
  1757. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1758. #endif
  1759. #if BUILD_COMPLEX==1
  1760. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1761. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1762. #else
  1763. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1764. #endif
  1765. #endif
  1766. #if BUILD_COMPLEX16==1
  1767. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1768. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1769. #else
  1770. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1771. #endif
  1772. #endif
  1773. #ifdef QUAD_PRECISION
  1774. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1775. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1776. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1777. #endif
  1778. #ifdef DEBUG
  1779. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1780. #endif
  1781. #if BUILD_BFLOAT16==1
  1782. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1783. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1784. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1785. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1786. TABLE_NAME.bgemm_r = (((BUFFER_SIZE -
  1787. ((TABLE_NAME.bgemm_p * TABLE_NAME.bgemm_q * 4 + TABLE_NAME.offsetA
  1788. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1789. ) / (TABLE_NAME.bgemm_q * 4) - 15) & ~15);
  1790. #endif
  1791. #if BUILD_HFLOAT16==1
  1792. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1793. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1794. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1795. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1796. #endif
  1797. #if BUILD_SINGLE==1
  1798. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1799. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1800. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1801. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1802. #endif
  1803. #if BUILD_DOUBLE==1
  1804. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1805. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1806. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1807. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1808. #endif
  1809. #ifdef EXPRECISION
  1810. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1811. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1812. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1813. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1814. #endif
  1815. #if BUILD_COMPLEX ==1
  1816. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1817. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1818. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1819. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1820. #endif
  1821. #if BUILD_COMPLEX16 ==1
  1822. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1823. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1824. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1825. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1826. #endif
  1827. #if BUILD_COMPLEX == 1
  1828. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1829. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1830. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1831. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1832. #endif
  1833. #if BUILD_COMPLEX16 == 1
  1834. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1835. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1836. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1837. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1838. #endif
  1839. #ifdef EXPRECISION
  1840. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1841. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1842. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1843. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1844. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1845. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1846. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1847. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1848. #endif
  1849. }
  1850. #endif //RISCV64
  1851. #endif //POWER
  1852. #endif //ZARCH
  1853. #endif //(ARCH_LOONGARCH64)
  1854. #endif //(ARCH_MIPS64)
  1855. #endif //(ARCH_ARM64)