You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 58 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  114. 0, 0, 0,
  115. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SGEMM_DEFAULT_UNROLL_MN
  117. SGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. #endif
  122. #ifdef HAVE_EXCLUSIVE_CACHE
  123. 1,
  124. #else
  125. 0,
  126. #endif
  127. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  128. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  129. #endif
  130. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  131. isamax_kTS,
  132. #endif
  133. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  134. isamin_kTS, ismax_kTS, ismin_kTS,
  135. snrm2_kTS, sasum_kTS,
  136. #endif
  137. #if BUILD_SINGLE == 1
  138. ssum_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. scopy_kTS, sdot_kTS,
  142. // dsdot_kTS,
  143. srot_kTS, saxpy_kTS,
  144. #endif
  145. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  146. sscal_kTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. sswap_kTS,
  150. sgemv_nTS, sgemv_tTS,
  151. #endif
  152. #if BUILD_SINGLE == 1
  153. sger_kTS,
  154. #endif
  155. #if BUILD_SINGLE == 1
  156. ssymv_LTS, ssymv_UTS,
  157. #endif
  158. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  159. #ifdef ARCH_X86_64
  160. sgemm_directTS,
  161. sgemm_direct_performantTS,
  162. #endif
  163. #ifdef ARCH_ARM64
  164. #ifdef HAVE_SME
  165. sgemm_directTS,
  166. #endif
  167. #endif
  168. sgemm_kernelTS, sgemm_betaTS,
  169. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  170. sgemm_incopyTS, sgemm_itcopyTS,
  171. #else
  172. sgemm_oncopyTS, sgemm_otcopyTS,
  173. #endif
  174. sgemm_oncopyTS, sgemm_otcopyTS,
  175. #endif
  176. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  177. #ifdef SMALL_MATRIX_OPT
  178. sgemm_small_matrix_permitTS,
  179. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  180. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  181. #endif
  182. #endif
  183. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  184. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  185. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  186. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  187. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  188. #else
  189. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  190. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  191. #endif
  192. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  193. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  194. #endif
  195. #if (BUILD_SINGLE==1)
  196. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  197. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  198. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  199. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  200. #else
  201. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  202. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  203. #endif
  204. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  205. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  206. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  207. ssymm_iutcopyTS, ssymm_iltcopyTS,
  208. #else
  209. ssymm_outcopyTS, ssymm_oltcopyTS,
  210. #endif
  211. ssymm_outcopyTS, ssymm_oltcopyTS,
  212. #ifndef NO_LAPACK
  213. sneg_tcopyTS, slaswp_ncopyTS,
  214. #else
  215. NULL,NULL,
  216. #endif
  217. #endif
  218. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  219. 0, 0, 0,
  220. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  221. #ifdef DGEMM_DEFAULT_UNROLL_MN
  222. DGEMM_DEFAULT_UNROLL_MN,
  223. #else
  224. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  225. #endif
  226. #endif
  227. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  228. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  229. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  230. dnrm2_kTS, dasum_kTS,
  231. #endif
  232. #if (BUILD_DOUBLE==1)
  233. dsum_kTS,
  234. #endif
  235. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  236. dcopy_kTS, ddot_kTS,
  237. #endif
  238. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  239. dsdot_kTS,
  240. #endif
  241. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  242. drot_kTS,
  243. daxpy_kTS,
  244. dscal_kTS,
  245. dswap_kTS,
  246. dgemv_nTS, dgemv_tTS,
  247. #endif
  248. #if (BUILD_DOUBLE==1)
  249. dger_kTS,
  250. dsymv_LTS, dsymv_UTS,
  251. #endif
  252. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  253. dgemm_kernelTS, dgemm_betaTS,
  254. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  255. dgemm_incopyTS, dgemm_itcopyTS,
  256. #else
  257. dgemm_oncopyTS, dgemm_otcopyTS,
  258. #endif
  259. dgemm_oncopyTS, dgemm_otcopyTS,
  260. #endif
  261. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  262. #ifdef SMALL_MATRIX_OPT
  263. dgemm_small_matrix_permitTS,
  264. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  265. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  266. #endif
  267. #endif
  268. #if (BUILD_DOUBLE==1)
  269. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  270. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  271. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  272. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  273. #else
  274. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  275. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  276. #endif
  277. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  278. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  279. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  280. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  281. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  282. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  283. #else
  284. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  285. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  286. #endif
  287. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  288. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  289. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  290. dsymm_iutcopyTS, dsymm_iltcopyTS,
  291. #else
  292. dsymm_outcopyTS, dsymm_oltcopyTS,
  293. #endif
  294. dsymm_outcopyTS, dsymm_oltcopyTS,
  295. #ifndef NO_LAPACK
  296. dneg_tcopyTS, dlaswp_ncopyTS,
  297. #else
  298. NULL, NULL,
  299. #endif
  300. #endif
  301. #ifdef EXPRECISION
  302. 0, 0, 0,
  303. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  304. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  305. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  306. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  307. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  308. qgemv_nTS, qgemv_tTS, qger_kTS,
  309. qsymv_LTS, qsymv_UTS,
  310. qgemm_kernelTS, qgemm_betaTS,
  311. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  312. qgemm_incopyTS, qgemm_itcopyTS,
  313. #else
  314. qgemm_oncopyTS, qgemm_otcopyTS,
  315. #endif
  316. qgemm_oncopyTS, qgemm_otcopyTS,
  317. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  318. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  319. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  320. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  321. #else
  322. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  323. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  324. #endif
  325. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  326. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  327. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  328. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  329. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  330. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  331. #else
  332. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  333. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  334. #endif
  335. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  336. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  337. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  338. qsymm_iutcopyTS, qsymm_iltcopyTS,
  339. #else
  340. qsymm_outcopyTS, qsymm_oltcopyTS,
  341. #endif
  342. qsymm_outcopyTS, qsymm_oltcopyTS,
  343. #ifndef NO_LAPACK
  344. qneg_tcopyTS, qlaswp_ncopyTS,
  345. #else
  346. NULL, NULL,
  347. #endif
  348. #endif
  349. #if (BUILD_COMPLEX)
  350. 0, 0, 0,
  351. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  352. #ifdef CGEMM_DEFAULT_UNROLL_MN
  353. CGEMM_DEFAULT_UNROLL_MN,
  354. #else
  355. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  356. #endif
  357. #if (BUILD_COMPLEX)
  358. camax_kTS, camin_kTS,
  359. #endif
  360. #if (BUILD_COMPLEX)
  361. icamax_kTS,
  362. #endif
  363. #if (BUILD_COMPLEX)
  364. icamin_kTS,
  365. cnrm2_kTS, casum_kTS, csum_kTS,
  366. #endif
  367. #if (BUILD_COMPLEX)
  368. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  369. #endif
  370. #if (BUILD_COMPLEX)
  371. csrot_kTS,
  372. #endif
  373. #if (BUILD_COMPLEX)
  374. caxpy_kTS,
  375. caxpyc_kTS,
  376. cscal_kTS,
  377. cswap_kTS,
  378. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  379. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  380. #endif
  381. #if (BUILD_COMPLEX)
  382. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  383. csymv_LTS, csymv_UTS,
  384. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  385. #endif
  386. #if (BUILD_COMPLEX)
  387. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  388. cgemm_betaTS,
  389. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  390. cgemm_incopyTS, cgemm_itcopyTS,
  391. #else
  392. cgemm_oncopyTS, cgemm_otcopyTS,
  393. #endif
  394. cgemm_oncopyTS, cgemm_otcopyTS,
  395. #ifdef SMALL_MATRIX_OPT
  396. cgemm_small_matrix_permitTS,
  397. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  398. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  399. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  400. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  401. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  402. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  403. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  404. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  405. #endif
  406. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  407. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  408. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  409. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  410. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  411. #else
  412. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  413. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  414. #endif
  415. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  416. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  417. #endif
  418. #endif
  419. #if (BUILD_COMPLEX)
  420. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  421. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  422. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  423. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  424. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  425. #else
  426. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  427. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  428. #endif
  429. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  430. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  431. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  432. csymm_iutcopyTS, csymm_iltcopyTS,
  433. #else
  434. csymm_outcopyTS, csymm_oltcopyTS,
  435. #endif
  436. csymm_outcopyTS, csymm_oltcopyTS,
  437. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  438. chemm_iutcopyTS, chemm_iltcopyTS,
  439. #else
  440. chemm_outcopyTS, chemm_oltcopyTS,
  441. #endif
  442. chemm_outcopyTS, chemm_oltcopyTS,
  443. 0, 0, 0,
  444. #if (USE_GEMM3M)
  445. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  446. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  447. #else
  448. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  449. #endif
  450. cgemm3m_kernelTS,
  451. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  452. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  453. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  454. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  455. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  456. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  457. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  458. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  459. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  460. csymm3m_oucopybTS, csymm3m_olcopybTS,
  461. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  462. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  463. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  464. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  465. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  466. chemm3m_oucopybTS, chemm3m_olcopybTS,
  467. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  468. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  469. #else
  470. 0, 0, 0,
  471. NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. NULL, NULL,
  483. NULL, NULL,
  484. NULL, NULL,
  485. NULL, NULL,
  486. NULL, NULL,
  487. NULL, NULL,
  488. NULL, NULL,
  489. NULL, NULL,
  490. #endif
  491. #endif
  492. #if (BUILD_COMPLEX)
  493. #ifndef NO_LAPACK
  494. cneg_tcopyTS,
  495. claswp_ncopyTS,
  496. #else
  497. NULL, NULL,
  498. #endif
  499. #endif
  500. #if BUILD_COMPLEX16 == 1
  501. 0, 0, 0,
  502. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  503. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  504. ZGEMM_DEFAULT_UNROLL_MN,
  505. #else
  506. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  507. #endif
  508. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  509. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  510. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  511. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  512. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  513. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  514. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  515. zsymv_LTS, zsymv_UTS,
  516. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  517. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  518. zgemm_betaTS,
  519. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  520. zgemm_incopyTS, zgemm_itcopyTS,
  521. #else
  522. zgemm_oncopyTS, zgemm_otcopyTS,
  523. #endif
  524. zgemm_oncopyTS, zgemm_otcopyTS,
  525. #ifdef SMALL_MATRIX_OPT
  526. zgemm_small_matrix_permitTS,
  527. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  528. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  529. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  530. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  531. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  532. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  533. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  534. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  535. #endif
  536. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  537. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  538. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  539. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  540. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  541. #else
  542. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  543. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  544. #endif
  545. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  546. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  547. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  548. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  549. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  550. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  551. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  552. #else
  553. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  554. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  555. #endif
  556. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  557. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  558. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  559. zsymm_iutcopyTS, zsymm_iltcopyTS,
  560. #else
  561. zsymm_outcopyTS, zsymm_oltcopyTS,
  562. #endif
  563. zsymm_outcopyTS, zsymm_oltcopyTS,
  564. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  565. zhemm_iutcopyTS, zhemm_iltcopyTS,
  566. #else
  567. zhemm_outcopyTS, zhemm_oltcopyTS,
  568. #endif
  569. zhemm_outcopyTS, zhemm_oltcopyTS,
  570. 0, 0, 0,
  571. #if (USE_GEMM3M)
  572. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  573. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  574. #else
  575. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  576. #endif
  577. zgemm3m_kernelTS,
  578. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  579. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  580. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  581. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  582. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  583. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  584. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  585. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  586. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  587. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  588. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  589. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  590. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  591. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  592. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  593. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  594. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  595. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  596. #else
  597. 0, 0, 0,
  598. NULL,
  599. NULL, NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. NULL, NULL,
  610. NULL, NULL,
  611. NULL, NULL,
  612. NULL, NULL,
  613. NULL, NULL,
  614. NULL, NULL,
  615. NULL, NULL,
  616. NULL, NULL,
  617. #endif
  618. #ifndef NO_LAPACK
  619. zneg_tcopyTS, zlaswp_ncopyTS,
  620. #else
  621. NULL, NULL,
  622. #endif
  623. #endif
  624. #ifdef EXPRECISION
  625. 0, 0, 0,
  626. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  627. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  628. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  629. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  630. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  631. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  632. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  633. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  634. xsymv_LTS, xsymv_UTS,
  635. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  636. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  637. xgemm_betaTS,
  638. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  639. xgemm_incopyTS, xgemm_itcopyTS,
  640. #else
  641. xgemm_oncopyTS, xgemm_otcopyTS,
  642. #endif
  643. xgemm_oncopyTS, xgemm_otcopyTS,
  644. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  645. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  646. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  647. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  648. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  649. #else
  650. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  651. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  652. #endif
  653. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  654. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  655. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  656. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  657. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  658. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  659. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  660. #else
  661. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  662. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  663. #endif
  664. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  665. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  666. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  667. xsymm_iutcopyTS, xsymm_iltcopyTS,
  668. #else
  669. xsymm_outcopyTS, xsymm_oltcopyTS,
  670. #endif
  671. xsymm_outcopyTS, xsymm_oltcopyTS,
  672. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  673. xhemm_iutcopyTS, xhemm_iltcopyTS,
  674. #else
  675. xhemm_outcopyTS, xhemm_oltcopyTS,
  676. #endif
  677. xhemm_outcopyTS, xhemm_oltcopyTS,
  678. 0, 0, 0,
  679. #if (USE_GEMM3M)
  680. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  681. xgemm3m_kernelTS,
  682. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  683. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  684. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  685. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  686. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  687. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  688. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  689. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  690. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  691. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  692. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  693. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  694. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  695. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  696. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  697. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  698. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  699. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  700. #else
  701. 0, 0, 0,
  702. NULL,
  703. NULL, NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. NULL, NULL,
  714. NULL, NULL,
  715. NULL, NULL,
  716. NULL, NULL,
  717. NULL, NULL,
  718. NULL, NULL,
  719. NULL, NULL,
  720. NULL, NULL,
  721. #endif
  722. #ifndef NO_LAPACK
  723. xneg_tcopyTS, xlaswp_ncopyTS,
  724. #else
  725. NULL, NULL,
  726. #endif
  727. #endif
  728. init_parameter,
  729. SNUMOPT, DNUMOPT, QNUMOPT,
  730. #if BUILD_SINGLE == 1
  731. saxpby_kTS,
  732. #endif
  733. #if BUILD_DOUBLE == 1
  734. daxpby_kTS,
  735. #endif
  736. #if BUILD_COMPLEX == 1
  737. caxpby_kTS,
  738. #endif
  739. #if BUILD_COMPLEX16== 1
  740. zaxpby_kTS,
  741. #endif
  742. #if BUILD_SINGLE == 1
  743. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  744. #endif
  745. #if BUILD_DOUBLE== 1
  746. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  747. #endif
  748. #if BUILD_COMPLEX == 1
  749. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  750. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  751. #endif
  752. #if BUILD_COMPLEX16 == 1
  753. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  754. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  755. #endif
  756. #if BUILD_SINGLE == 1
  757. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  758. #endif
  759. #if BUILD_DOUBLE== 1
  760. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  761. #endif
  762. #if BUILD_COMPLEX== 1
  763. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  764. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  765. #endif
  766. #if BUILD_COMPLEX16==1
  767. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  768. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  769. #endif
  770. #if BUILD_SINGLE == 1
  771. sgeadd_kTS,
  772. #endif
  773. #if BUILD_DOUBLE==1
  774. dgeadd_kTS,
  775. #endif
  776. #if BUILD_COMPLEX==1
  777. cgeadd_kTS,
  778. #endif
  779. #if BUILD_COMPLEX16==1
  780. zgeadd_kTS,
  781. #endif
  782. };
  783. #if (ARCH_ARM64)
  784. static void init_parameter(void) {
  785. #if (BUILD_BFLOAT16)
  786. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  787. #endif
  788. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  789. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  790. #endif
  791. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  792. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  793. #endif
  794. #if BUILD_COMPLEX==1
  795. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  796. #endif
  797. #if BUILD_COMPLEX16==1
  798. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  799. #endif
  800. #if (BUILD_BFLOAT16)
  801. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  802. #endif
  803. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  804. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  805. #endif
  806. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  807. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  808. #endif
  809. #if BUILD_COMPLEX== 1
  810. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  811. #endif
  812. #if BUILD_COMPLEX16==1
  813. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  814. #endif
  815. #if (BUILD_BFLOAT16)
  816. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  817. #endif
  818. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  819. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  820. #endif
  821. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  822. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  823. #endif
  824. #if BUILD_COMPLEX==1
  825. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  826. #endif
  827. #if BUILD_COMPLEX16==1
  828. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  829. #endif
  830. #ifdef EXPRECISION
  831. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  832. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  833. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  834. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  835. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  836. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  837. #endif
  838. #if (USE_GEMM3M)
  839. #ifdef CGEMM3M_DEFAULT_P
  840. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  841. #else
  842. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  843. #endif
  844. #ifdef ZGEMM3M_DEFAULT_P
  845. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  846. #else
  847. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  848. #endif
  849. #ifdef CGEMM3M_DEFAULT_Q
  850. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  851. #else
  852. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  853. #endif
  854. #ifdef ZGEMM3M_DEFAULT_Q
  855. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  856. #else
  857. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  858. #endif
  859. #ifdef CGEMM3M_DEFAULT_R
  860. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  861. #else
  862. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  863. #endif
  864. #ifdef ZGEMM3M_DEFAULT_R
  865. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  866. #else
  867. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  868. #endif
  869. #ifdef EXPRECISION
  870. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  871. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  872. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  873. #endif
  874. #endif
  875. }
  876. #else // (ARCH_ARM64)
  877. #if defined(ARCH_MIPS64)
  878. static void init_parameter(void) {
  879. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  880. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  881. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  882. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  883. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  884. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  885. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  886. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  887. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  888. TABLE_NAME.dgemm_r = 640;
  889. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  890. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  891. #ifdef EXPRECISION
  892. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  893. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  894. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  895. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  896. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  897. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  898. #endif
  899. #if defined(USE_GEMM3M)
  900. #ifdef CGEMM3M_DEFAULT_P
  901. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  902. #else
  903. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  904. #endif
  905. #ifdef ZGEMM3M_DEFAULT_P
  906. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  907. #else
  908. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  909. #endif
  910. #ifdef CGEMM3M_DEFAULT_Q
  911. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  912. #else
  913. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  914. #endif
  915. #ifdef ZGEMM3M_DEFAULT_Q
  916. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  917. #else
  918. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  919. #endif
  920. #ifdef CGEMM3M_DEFAULT_R
  921. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  922. #else
  923. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  924. #endif
  925. #ifdef ZGEMM3M_DEFAULT_R
  926. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  927. #else
  928. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  929. #endif
  930. #ifdef EXPRECISION
  931. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  932. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  933. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  934. #endif
  935. #endif
  936. }
  937. #else // (ARCH_MIPS64)
  938. #if (ARCH_LOONGARCH64)
  939. static int get_L3_size() {
  940. int ret = 0, id = 0x14;
  941. __asm__ volatile (
  942. "cpucfg %[ret], %[id]"
  943. : [ret]"=r"(ret)
  944. : [id]"r"(id)
  945. : "memory"
  946. );
  947. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  948. }
  949. static void init_parameter(void) {
  950. #ifdef BUILD_BFLOAT16
  951. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  952. #endif
  953. #ifdef BUILD_BFLOAT16
  954. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  955. #endif
  956. #if defined(LA464)
  957. int L3_size = get_L3_size();
  958. #ifdef SMP
  959. if(blas_num_threads == 1){
  960. #endif
  961. //single thread
  962. if (L3_size == 32){ // 3C5000 and 3D5000
  963. TABLE_NAME.sgemm_p = 256;
  964. TABLE_NAME.sgemm_q = 384;
  965. TABLE_NAME.sgemm_r = 8192;
  966. TABLE_NAME.dgemm_p = 112;
  967. TABLE_NAME.dgemm_q = 289;
  968. TABLE_NAME.dgemm_r = 4096;
  969. TABLE_NAME.cgemm_p = 128;
  970. TABLE_NAME.cgemm_q = 256;
  971. TABLE_NAME.cgemm_r = 4096;
  972. TABLE_NAME.zgemm_p = 128;
  973. TABLE_NAME.zgemm_q = 128;
  974. TABLE_NAME.zgemm_r = 2048;
  975. } else { // 3A5000 and 3C5000L
  976. TABLE_NAME.sgemm_p = 256;
  977. TABLE_NAME.sgemm_q = 384;
  978. TABLE_NAME.sgemm_r = 4096;
  979. TABLE_NAME.dgemm_p = 112;
  980. TABLE_NAME.dgemm_q = 300;
  981. TABLE_NAME.dgemm_r = 3024;
  982. TABLE_NAME.cgemm_p = 128;
  983. TABLE_NAME.cgemm_q = 256;
  984. TABLE_NAME.cgemm_r = 2048;
  985. TABLE_NAME.zgemm_p = 128;
  986. TABLE_NAME.zgemm_q = 128;
  987. TABLE_NAME.zgemm_r = 1024;
  988. }
  989. #ifdef SMP
  990. }else{
  991. //multi thread
  992. if (L3_size == 32){ // 3C5000 and 3D5000
  993. TABLE_NAME.sgemm_p = 256;
  994. TABLE_NAME.sgemm_q = 384;
  995. TABLE_NAME.sgemm_r = 1024;
  996. TABLE_NAME.dgemm_p = 112;
  997. TABLE_NAME.dgemm_q = 289;
  998. TABLE_NAME.dgemm_r = 342;
  999. TABLE_NAME.cgemm_p = 128;
  1000. TABLE_NAME.cgemm_q = 256;
  1001. TABLE_NAME.cgemm_r = 512;
  1002. TABLE_NAME.zgemm_p = 128;
  1003. TABLE_NAME.zgemm_q = 128;
  1004. TABLE_NAME.zgemm_r = 512;
  1005. } else { // 3A5000 and 3C5000L
  1006. TABLE_NAME.sgemm_p = 256;
  1007. TABLE_NAME.sgemm_q = 384;
  1008. TABLE_NAME.sgemm_r = 2048;
  1009. TABLE_NAME.dgemm_p = 112;
  1010. TABLE_NAME.dgemm_q = 300;
  1011. TABLE_NAME.dgemm_r = 738;
  1012. TABLE_NAME.cgemm_p = 128;
  1013. TABLE_NAME.cgemm_q = 256;
  1014. TABLE_NAME.cgemm_r = 1024;
  1015. TABLE_NAME.zgemm_p = 128;
  1016. TABLE_NAME.zgemm_q = 128;
  1017. TABLE_NAME.zgemm_r = 1024;
  1018. }
  1019. }
  1020. #endif
  1021. #else
  1022. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1023. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1024. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1025. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1026. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1027. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1028. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1029. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1030. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1031. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1032. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1033. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1034. #endif
  1035. #ifdef BUILD_BFLOAT16
  1036. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1037. #endif
  1038. }
  1039. #else // (ARCH_LOONGARCH64)
  1040. #if (ARCH_POWER)
  1041. static void init_parameter(void) {
  1042. #ifdef BUILD_BFLOAT16
  1043. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1044. #endif
  1045. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1046. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1047. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1048. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1049. #ifdef BUILD_BFLOAT16
  1050. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1051. #endif
  1052. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1053. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1054. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1055. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1056. #ifdef BUILD_BFLOAT16
  1057. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1058. #endif
  1059. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1060. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1061. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1062. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1063. }
  1064. #else //POWER
  1065. #if (ARCH_ZARCH)
  1066. static void init_parameter(void) {
  1067. #ifdef BUILD_BFLOAT16
  1068. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1069. #endif
  1070. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1071. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1072. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1073. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1074. #ifdef BUILD_BFLOAT16
  1075. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1076. #endif
  1077. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1078. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1079. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1080. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1081. #ifdef BUILD_BFLOAT16
  1082. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1083. #endif
  1084. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1085. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1086. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1087. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1088. }
  1089. #else //ZARCH
  1090. #if (ARCH_RISCV64)
  1091. static void init_parameter(void) {
  1092. #ifdef BUILD_BFLOAT16
  1093. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1094. #endif
  1095. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1096. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1097. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1098. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1099. #ifdef BUILD_BFLOAT16
  1100. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1101. #endif
  1102. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1103. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1104. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1105. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1106. #ifdef BUILD_BFLOAT16
  1107. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1108. #endif
  1109. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1110. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1111. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1112. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1113. }
  1114. #else //RISCV64
  1115. #ifdef ARCH_X86
  1116. static int get_l2_size_old(void){
  1117. int i, eax, ebx, ecx, edx, cpuid_level;
  1118. int info[15];
  1119. cpuid(2, &eax, &ebx, &ecx, &edx);
  1120. info[ 0] = BITMASK(eax, 8, 0xff);
  1121. info[ 1] = BITMASK(eax, 16, 0xff);
  1122. info[ 2] = BITMASK(eax, 24, 0xff);
  1123. info[ 3] = BITMASK(ebx, 0, 0xff);
  1124. info[ 4] = BITMASK(ebx, 8, 0xff);
  1125. info[ 5] = BITMASK(ebx, 16, 0xff);
  1126. info[ 6] = BITMASK(ebx, 24, 0xff);
  1127. info[ 7] = BITMASK(ecx, 0, 0xff);
  1128. info[ 8] = BITMASK(ecx, 8, 0xff);
  1129. info[ 9] = BITMASK(ecx, 16, 0xff);
  1130. info[10] = BITMASK(ecx, 24, 0xff);
  1131. info[11] = BITMASK(edx, 0, 0xff);
  1132. info[12] = BITMASK(edx, 8, 0xff);
  1133. info[13] = BITMASK(edx, 16, 0xff);
  1134. info[14] = BITMASK(edx, 24, 0xff);
  1135. for (i = 0; i < 15; i++){
  1136. switch (info[i]){
  1137. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1138. case 0x1a :
  1139. return 96;
  1140. case 0x39 :
  1141. case 0x3b :
  1142. case 0x41 :
  1143. case 0x79 :
  1144. case 0x81 :
  1145. return 128;
  1146. case 0x3a :
  1147. return 192;
  1148. case 0x21 :
  1149. case 0x3c :
  1150. case 0x42 :
  1151. case 0x7a :
  1152. case 0x7e :
  1153. case 0x82 :
  1154. return 256;
  1155. case 0x3d :
  1156. return 384;
  1157. case 0x3e :
  1158. case 0x43 :
  1159. case 0x7b :
  1160. case 0x7f :
  1161. case 0x83 :
  1162. case 0x86 :
  1163. return 512;
  1164. case 0x44 :
  1165. case 0x78 :
  1166. case 0x7c :
  1167. case 0x84 :
  1168. case 0x87 :
  1169. return 1024;
  1170. case 0x45 :
  1171. case 0x7d :
  1172. case 0x85 :
  1173. return 2048;
  1174. case 0x48 :
  1175. return 3184;
  1176. case 0x49 :
  1177. return 4096;
  1178. case 0x4e :
  1179. return 6144;
  1180. }
  1181. }
  1182. // return 0;
  1183. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1184. return 256;
  1185. }
  1186. #endif
  1187. static __inline__ int get_l2_size(void){
  1188. int eax, ebx, ecx, edx, l2;
  1189. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1190. if (l2 != 0)
  1191. return l2;
  1192. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1193. l2 = BITMASK(ecx, 16, 0xffff);
  1194. #ifndef ARCH_X86
  1195. if (l2 <= 0) {
  1196. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1197. return 256;
  1198. }
  1199. return l2;
  1200. #else
  1201. if (l2 > 0) return l2;
  1202. return get_l2_size_old();
  1203. #endif
  1204. }
  1205. static __inline__ int get_l3_size(void){
  1206. int eax, ebx, ecx, edx;
  1207. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1208. return BITMASK(edx, 18, 0x3fff) * 512;
  1209. }
  1210. static void init_parameter(void) {
  1211. int l2 = get_l2_size();
  1212. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1213. /* where the GEMM unrolling parameters do not depend on l2 */
  1214. #ifdef BUILD_BFLOAT16
  1215. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1216. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1217. #endif
  1218. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1219. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1220. #endif
  1221. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1222. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1223. #endif
  1224. #if BUILD_COMPLEX == 1
  1225. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1226. #endif
  1227. #if BUILD_COMPLEX16==1
  1228. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1229. #endif
  1230. #if BUILD_COMPLEX == 1
  1231. #ifdef CGEMM3M_DEFAULT_Q
  1232. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1233. #else
  1234. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1235. #endif
  1236. #endif
  1237. #if BUILD_COMPLEX16 == 1
  1238. #ifdef ZGEMM3M_DEFAULT_Q
  1239. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1240. #else
  1241. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1242. #endif
  1243. #endif
  1244. #ifdef EXPRECISION
  1245. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1246. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1247. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1248. #endif
  1249. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1250. #ifdef DEBUG
  1251. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1252. #endif
  1253. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1254. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1255. #endif
  1256. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1257. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1258. #endif
  1259. #if BUILD_COMPLEX==1
  1260. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1261. #endif
  1262. #if BUILD_COMPLEX16==1
  1263. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1264. #endif
  1265. #ifdef EXPRECISION
  1266. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1267. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1268. #endif
  1269. #endif
  1270. #ifdef CORE_NORTHWOOD
  1271. #ifdef DEBUG
  1272. fprintf(stderr, "Northwood\n");
  1273. #endif
  1274. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1275. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1276. #endif
  1277. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1278. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1279. #endif
  1280. #if BUILD_COMPLEX==1
  1281. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1282. #endif
  1283. #if BUILD_COMPLEX16==1
  1284. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1285. #endif
  1286. #ifdef EXPRECISION
  1287. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1288. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1289. #endif
  1290. #endif
  1291. #ifdef ATOM
  1292. #ifdef DEBUG
  1293. fprintf(stderr, "Atom\n");
  1294. #endif
  1295. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1296. TABLE_NAME.sgemm_p = 256;
  1297. #endif
  1298. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1299. TABLE_NAME.dgemm_p = 128;
  1300. #endif
  1301. #if BUILD_COMPLEX==1
  1302. TABLE_NAME.cgemm_p = 128;
  1303. #endif
  1304. #if BUILD_COMPLEX16==1
  1305. TABLE_NAME.zgemm_p = 64;
  1306. #endif
  1307. #ifdef EXPRECISION
  1308. TABLE_NAME.qgemm_p = 64;
  1309. TABLE_NAME.xgemm_p = 32;
  1310. #endif
  1311. #endif
  1312. #ifdef CORE_PRESCOTT
  1313. #ifdef DEBUG
  1314. fprintf(stderr, "Prescott\n");
  1315. #endif
  1316. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1317. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1318. #endif
  1319. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1320. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1321. #endif
  1322. #if BUILD_COMPLEX==1
  1323. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1324. #endif
  1325. #if BUILD_COMPLEX16 == 1
  1326. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1327. #endif
  1328. #ifdef EXPRECISION
  1329. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1330. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1331. #endif
  1332. #endif
  1333. #ifdef CORE2
  1334. #ifdef DEBUG
  1335. fprintf(stderr, "Core2\n");
  1336. #endif
  1337. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1338. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1339. #endif
  1340. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1341. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1342. #endif
  1343. #if BUILD_COMPLEX==1
  1344. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1345. #endif
  1346. #if BUILD_COMPLEX16==1
  1347. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1348. #endif
  1349. #ifdef EXPRECISION
  1350. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1351. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1352. #endif
  1353. #endif
  1354. #ifdef PENRYN
  1355. #ifdef DEBUG
  1356. fprintf(stderr, "Penryn\n");
  1357. #endif
  1358. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1359. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1360. #endif
  1361. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1362. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1363. #endif
  1364. #if BUILD_COMPLEX==1
  1365. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1366. #endif
  1367. #if BUILD_COMPLEX16==1
  1368. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1369. #endif
  1370. #ifdef EXPRECISION
  1371. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1372. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1373. #endif
  1374. #endif
  1375. #ifdef DUNNINGTON
  1376. #ifdef DEBUG
  1377. fprintf(stderr, "Dunnington\n");
  1378. #endif
  1379. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1380. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1381. #endif
  1382. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1383. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1384. #endif
  1385. #if BUILD_COMPLEX==1
  1386. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1387. #endif
  1388. #if BUILD_COMPLEX16==1
  1389. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1390. #endif
  1391. #ifdef EXPRECISION
  1392. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1393. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1394. #endif
  1395. #endif
  1396. #ifdef NEHALEM
  1397. #ifdef DEBUG
  1398. fprintf(stderr, "Nehalem\n");
  1399. #endif
  1400. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1401. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1402. #endif
  1403. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1404. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1405. #endif
  1406. #if BUILD_COMPLEX
  1407. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1408. #endif
  1409. #if BUILD_COMPLEX16
  1410. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1411. #endif
  1412. #ifdef EXPRECISION
  1413. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1414. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1415. #endif
  1416. #endif
  1417. #ifdef SANDYBRIDGE
  1418. #ifdef DEBUG
  1419. fprintf(stderr, "Sandybridge\n");
  1420. #endif
  1421. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1422. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1423. #endif
  1424. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1425. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1426. #endif
  1427. #if BUILD_COMPLEX
  1428. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1429. #endif
  1430. #if BUILD_COMPLEX16
  1431. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1432. #endif
  1433. #ifdef EXPRECISION
  1434. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1435. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1436. #endif
  1437. #endif
  1438. #ifdef HASWELL
  1439. #ifdef DEBUG
  1440. fprintf(stderr, "Haswell\n");
  1441. #endif
  1442. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1443. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1444. #endif
  1445. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1446. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1447. #endif
  1448. #if BUILD_COMPLEX
  1449. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1450. #endif
  1451. #if BUILD_COMPLEX16
  1452. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1453. #endif
  1454. #ifdef EXPRECISION
  1455. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1456. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1457. #endif
  1458. #endif
  1459. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1460. #ifdef DEBUG
  1461. fprintf(stderr, "SkylakeX\n");
  1462. #endif
  1463. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1464. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1465. #endif
  1466. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1467. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1468. #endif
  1469. #if BUILD_COMPLEX
  1470. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1471. #endif
  1472. #if BUILD_COMPLEX16
  1473. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1474. #endif
  1475. #ifdef EXPRECISION
  1476. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1477. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1478. #endif
  1479. #endif
  1480. #ifdef OPTERON
  1481. #ifdef DEBUG
  1482. fprintf(stderr, "Opteron\n");
  1483. #endif
  1484. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1485. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1486. #endif
  1487. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1488. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1489. #endif
  1490. #if BUILD_COMPLEX
  1491. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1492. #endif
  1493. #if BUILD_COMPLEX16
  1494. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1495. #endif
  1496. #ifdef EXPRECISION
  1497. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1498. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1499. #endif
  1500. #endif
  1501. #ifdef BARCELONA
  1502. #ifdef DEBUG
  1503. fprintf(stderr, "Barcelona\n");
  1504. #endif
  1505. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1506. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1507. #endif
  1508. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1509. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1510. #endif
  1511. #if BUILD_COMPLEX
  1512. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1513. #endif
  1514. #if BUILD_COMPLEX16
  1515. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1516. #endif
  1517. #ifdef EXPRECISION
  1518. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1519. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1520. #endif
  1521. #endif
  1522. #ifdef BOBCAT
  1523. #ifdef DEBUG
  1524. fprintf(stderr, "Bobcate\n");
  1525. #endif
  1526. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1527. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1528. #endif
  1529. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1530. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1531. #endif
  1532. #if BUILD_COMPLEX
  1533. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1534. #endif
  1535. #if BUILD_COMPLEX16
  1536. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1537. #endif
  1538. #ifdef EXPRECISION
  1539. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1540. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1541. #endif
  1542. #endif
  1543. #ifdef BULLDOZER
  1544. #ifdef DEBUG
  1545. fprintf(stderr, "Bulldozer\n");
  1546. #endif
  1547. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1548. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1549. #endif
  1550. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1551. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1552. #endif
  1553. #if BUILD_COMPLEX
  1554. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1555. #endif
  1556. #if BUILD_COMPLEX16
  1557. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1558. #endif
  1559. #ifdef EXPRECISION
  1560. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1561. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1562. #endif
  1563. #endif
  1564. #ifdef EXCAVATOR
  1565. #ifdef DEBUG
  1566. fprintf(stderr, "Excavator\n");
  1567. #endif
  1568. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1569. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1570. #endif
  1571. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1572. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1573. #endif
  1574. #if BUILD_COMPLEX
  1575. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1576. #endif
  1577. #if BUILD_COMPLEX16
  1578. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1579. #endif
  1580. #ifdef EXPRECISION
  1581. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1582. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1583. #endif
  1584. #endif
  1585. #ifdef PILEDRIVER
  1586. #ifdef DEBUG
  1587. fprintf(stderr, "Piledriver\n");
  1588. #endif
  1589. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1590. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1591. #endif
  1592. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1593. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1594. #endif
  1595. #if BUILD_COMPLEX
  1596. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1597. #endif
  1598. #if BUILD_COMPLEX16
  1599. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1600. #endif
  1601. #ifdef EXPRECISION
  1602. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1603. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1604. #endif
  1605. #endif
  1606. #ifdef STEAMROLLER
  1607. #ifdef DEBUG
  1608. fprintf(stderr, "Steamroller\n");
  1609. #endif
  1610. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1611. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1612. #endif
  1613. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1614. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1615. #endif
  1616. #if BUILD_COMPLEX
  1617. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1618. #endif
  1619. #if BUILD_COMPLEX16
  1620. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1621. #endif
  1622. #ifdef EXPRECISION
  1623. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1624. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1625. #endif
  1626. #endif
  1627. #ifdef ZEN
  1628. #ifdef DEBUG
  1629. fprintf(stderr, "Zen\n");
  1630. #endif
  1631. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1632. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1633. #endif
  1634. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1635. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1636. #endif
  1637. #if BUILD_COMPLEX
  1638. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1639. #endif
  1640. #if BUILD_COMPLEX16
  1641. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1642. #endif
  1643. #ifdef EXPRECISION
  1644. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1645. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1646. #endif
  1647. #endif
  1648. #ifdef NANO
  1649. #ifdef DEBUG
  1650. fprintf(stderr, "NANO\n");
  1651. #endif
  1652. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1653. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1654. #endif
  1655. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1656. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1657. #endif
  1658. #if (BUILD_COMPLEX==1)
  1659. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1660. #endif
  1661. #if (BUILD_COMPLEX16==1)
  1662. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1663. #endif
  1664. #ifdef EXPRECISION
  1665. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1666. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1667. #endif
  1668. #endif
  1669. #ifdef SAPPHIRERAPIDS
  1670. #if (BUILD_BFLOAT16 == 1)
  1671. TABLE_NAME.need_amxtile_permission = 1;
  1672. #endif
  1673. #endif
  1674. #if BUILD_COMPLEX==1
  1675. #ifdef CGEMM3M_DEFAULT_P
  1676. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1677. #else
  1678. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1679. #endif
  1680. #endif
  1681. #if BUILD_COMPLEX16==1
  1682. #ifdef ZGEMM3M_DEFAULT_P
  1683. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1684. #else
  1685. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1686. #endif
  1687. #endif
  1688. #ifdef EXPRECISION
  1689. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1690. #endif
  1691. #if BUILD_SINGLE == 1
  1692. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1693. #endif
  1694. #if BUILD_DOUBLE== 1
  1695. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1696. #endif
  1697. #if BUILD_COMPLEX==1
  1698. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1699. #endif
  1700. #if BUILD_COMPLEX16==1
  1701. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1702. #endif
  1703. #if BUILD_COMPLEX==1
  1704. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1705. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1706. #else
  1707. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1708. #endif
  1709. #endif
  1710. #if BUILD_COMPLEX16==1
  1711. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1712. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1713. #else
  1714. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1715. #endif
  1716. #endif
  1717. #ifdef QUAD_PRECISION
  1718. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1719. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1720. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1721. #endif
  1722. #ifdef DEBUG
  1723. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1724. #endif
  1725. #if BUILD_BFLOAT16==1
  1726. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1727. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1728. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1729. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1730. #endif
  1731. #if BUILD_SINGLE==1
  1732. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1733. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1734. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1735. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1736. #endif
  1737. #if BUILD_DOUBLE==1
  1738. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1739. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1740. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1741. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1742. #endif
  1743. #ifdef EXPRECISION
  1744. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1745. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1746. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1747. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1748. #endif
  1749. #if BUILD_COMPLEX ==1
  1750. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1751. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1752. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1753. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1754. #endif
  1755. #if BUILD_COMPLEX16 ==1
  1756. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1757. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1758. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1759. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1760. #endif
  1761. #if BUILD_COMPLEX == 1
  1762. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1763. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1764. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1765. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1766. #endif
  1767. #if BUILD_COMPLEX16 == 1
  1768. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1769. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1770. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1771. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1772. #endif
  1773. #ifdef EXPRECISION
  1774. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1775. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1776. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1777. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1778. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1779. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1780. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1781. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1782. #endif
  1783. }
  1784. #endif //RISCV64
  1785. #endif //POWER
  1786. #endif //ZARCH
  1787. #endif //(ARCH_LOONGARCH64)
  1788. #endif //(ARCH_MIPS64)
  1789. #endif //(ARCH_ARM64)