You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 53 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. sbgemm_kernelTS, sbgemm_betaTS,
  66. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  67. sbgemm_incopyTS, sbgemm_itcopyTS,
  68. #else
  69. sbgemm_oncopyTS, sbgemm_otcopyTS,
  70. #endif
  71. sbgemm_oncopyTS, sbgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #ifdef SMALL_MATRIX_OPT
  104. sbgemm_small_matrix_permitTS,
  105. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  106. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  107. #endif
  108. #endif
  109. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  110. 0, 0, 0,
  111. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  112. #ifdef SGEMM_DEFAULT_UNROLL_MN
  113. SGEMM_DEFAULT_UNROLL_MN,
  114. #else
  115. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  116. #endif
  117. #endif
  118. #ifdef HAVE_EXCLUSIVE_CACHE
  119. 1,
  120. #else
  121. 0,
  122. #endif
  123. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  124. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  125. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  126. snrm2_kTS, sasum_kTS,
  127. #endif
  128. #if BUILD_SINGLE == 1
  129. ssum_kTS,
  130. #endif
  131. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  132. scopy_kTS, sdot_kTS,
  133. // dsdot_kTS,
  134. srot_kTS, saxpy_kTS,
  135. #endif
  136. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  137. sscal_kTS,
  138. #endif
  139. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  140. sswap_kTS,
  141. sgemv_nTS, sgemv_tTS,
  142. #endif
  143. #if BUILD_SINGLE == 1
  144. sger_kTS,
  145. ssymv_LTS, ssymv_UTS,
  146. #endif
  147. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  148. #ifdef ARCH_X86_64
  149. sgemm_directTS,
  150. sgemm_direct_performantTS,
  151. #endif
  152. sgemm_kernelTS, sgemm_betaTS,
  153. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  154. sgemm_incopyTS, sgemm_itcopyTS,
  155. #else
  156. sgemm_oncopyTS, sgemm_otcopyTS,
  157. #endif
  158. sgemm_oncopyTS, sgemm_otcopyTS,
  159. #endif
  160. #if BUILD_SINGLE == 1
  161. #ifdef SMALL_MATRIX_OPT
  162. sgemm_small_matrix_permitTS,
  163. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  164. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  165. #endif
  166. #endif
  167. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  168. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  169. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  170. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  171. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  172. #else
  173. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  174. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  175. #endif
  176. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  177. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  178. #endif
  179. #if BUILD_SINGLE == 1
  180. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  181. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  182. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  183. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  184. #else
  185. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  186. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  187. #endif
  188. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  189. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  190. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  191. ssymm_iutcopyTS, ssymm_iltcopyTS,
  192. #else
  193. ssymm_outcopyTS, ssymm_oltcopyTS,
  194. #endif
  195. ssymm_outcopyTS, ssymm_oltcopyTS,
  196. #endif
  197. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  198. #ifndef NO_LAPACK
  199. sneg_tcopyTS, slaswp_ncopyTS,
  200. #else
  201. NULL,NULL,
  202. #endif
  203. #endif
  204. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  205. 0, 0, 0,
  206. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  207. #ifdef DGEMM_DEFAULT_UNROLL_MN
  208. DGEMM_DEFAULT_UNROLL_MN,
  209. #else
  210. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  211. #endif
  212. #endif
  213. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  214. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  215. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  216. dnrm2_kTS, dasum_kTS,
  217. #endif
  218. #if (BUILD_DOUBLE==1)
  219. dsum_kTS,
  220. #endif
  221. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  222. dcopy_kTS, ddot_kTS,
  223. #endif
  224. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  225. dsdot_kTS,
  226. #endif
  227. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  228. drot_kTS,
  229. daxpy_kTS,
  230. dscal_kTS,
  231. dswap_kTS,
  232. dgemv_nTS, dgemv_tTS,
  233. #endif
  234. #if (BUILD_DOUBLE==1)
  235. dger_kTS,
  236. dsymv_LTS, dsymv_UTS,
  237. #endif
  238. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  239. dgemm_kernelTS, dgemm_betaTS,
  240. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  241. dgemm_incopyTS, dgemm_itcopyTS,
  242. #else
  243. dgemm_oncopyTS, dgemm_otcopyTS,
  244. #endif
  245. dgemm_oncopyTS, dgemm_otcopyTS,
  246. #endif
  247. #if (BUILD_DOUBLE==1)
  248. #ifdef SMALL_MATRIX_OPT
  249. dgemm_small_matrix_permitTS,
  250. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  251. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  252. #endif
  253. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  254. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  255. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  256. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  257. #else
  258. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  259. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  260. #endif
  261. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  262. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  263. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  264. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  265. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  266. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  267. #else
  268. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  269. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  270. #endif
  271. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  272. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  273. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  274. dsymm_iutcopyTS, dsymm_iltcopyTS,
  275. #else
  276. dsymm_outcopyTS, dsymm_oltcopyTS,
  277. #endif
  278. dsymm_outcopyTS, dsymm_oltcopyTS,
  279. #ifndef NO_LAPACK
  280. dneg_tcopyTS, dlaswp_ncopyTS,
  281. #else
  282. NULL, NULL,
  283. #endif
  284. #endif
  285. #ifdef EXPRECISION
  286. 0, 0, 0,
  287. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  288. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  289. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  290. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  291. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  292. qgemv_nTS, qgemv_tTS, qger_kTS,
  293. qsymv_LTS, qsymv_UTS,
  294. qgemm_kernelTS, qgemm_betaTS,
  295. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  296. qgemm_incopyTS, qgemm_itcopyTS,
  297. #else
  298. qgemm_oncopyTS, qgemm_otcopyTS,
  299. #endif
  300. qgemm_oncopyTS, qgemm_otcopyTS,
  301. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  302. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  303. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  304. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  305. #else
  306. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  307. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  308. #endif
  309. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  310. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  311. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  312. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  313. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  314. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  315. #else
  316. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  317. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  318. #endif
  319. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  320. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  321. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  322. qsymm_iutcopyTS, qsymm_iltcopyTS,
  323. #else
  324. qsymm_outcopyTS, qsymm_oltcopyTS,
  325. #endif
  326. qsymm_outcopyTS, qsymm_oltcopyTS,
  327. #ifndef NO_LAPACK
  328. qneg_tcopyTS, qlaswp_ncopyTS,
  329. #else
  330. NULL, NULL,
  331. #endif
  332. #endif
  333. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  334. 0, 0, 0,
  335. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  336. #ifdef CGEMM_DEFAULT_UNROLL_MN
  337. CGEMM_DEFAULT_UNROLL_MN,
  338. #else
  339. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  340. #endif
  341. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  342. #endif
  343. #if (BUILD_COMPLEX)
  344. cnrm2_kTS, casum_kTS, csum_kTS,
  345. #endif
  346. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  347. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  348. #endif
  349. #if (BUILD_COMPLEX)
  350. csrot_kTS,
  351. #endif
  352. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  353. caxpy_kTS,
  354. caxpyc_kTS,
  355. cscal_kTS,
  356. cswap_kTS,
  357. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  358. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  359. #endif
  360. #if (BUILD_COMPLEX)
  361. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  362. csymv_LTS, csymv_UTS,
  363. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  364. #endif
  365. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  366. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  367. cgemm_betaTS,
  368. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  369. cgemm_incopyTS, cgemm_itcopyTS,
  370. #else
  371. cgemm_oncopyTS, cgemm_otcopyTS,
  372. #endif
  373. cgemm_oncopyTS, cgemm_otcopyTS,
  374. #ifdef SMALL_MATRIX_OPT
  375. cgemm_small_matrix_permitTS,
  376. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  377. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  378. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  379. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  380. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  381. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  382. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  383. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  384. #endif
  385. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  386. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  387. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  388. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  389. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  390. #else
  391. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  392. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  393. #endif
  394. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  395. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  396. #endif
  397. #if (BUILD_COMPLEX)
  398. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  399. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  400. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  401. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  402. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  403. #else
  404. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  405. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  406. #endif
  407. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  408. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  409. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  410. csymm_iutcopyTS, csymm_iltcopyTS,
  411. #else
  412. csymm_outcopyTS, csymm_oltcopyTS,
  413. #endif
  414. csymm_outcopyTS, csymm_oltcopyTS,
  415. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  416. chemm_iutcopyTS, chemm_iltcopyTS,
  417. #else
  418. chemm_outcopyTS, chemm_oltcopyTS,
  419. #endif
  420. chemm_outcopyTS, chemm_oltcopyTS,
  421. 0, 0, 0,
  422. #if (USE_GEMM3M)
  423. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  424. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  425. #else
  426. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  427. #endif
  428. cgemm3m_kernelTS,
  429. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  430. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  431. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  432. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  433. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  434. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  435. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  436. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  437. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  438. csymm3m_oucopybTS, csymm3m_olcopybTS,
  439. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  440. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  441. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  442. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  443. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  444. chemm3m_oucopybTS, chemm3m_olcopybTS,
  445. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  446. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  447. #else
  448. 0, 0, 0,
  449. NULL,
  450. NULL, NULL,
  451. NULL, NULL,
  452. NULL, NULL,
  453. NULL, NULL,
  454. NULL, NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. #endif
  469. #endif
  470. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  471. #ifndef NO_LAPACK
  472. cneg_tcopyTS,
  473. claswp_ncopyTS,
  474. #else
  475. NULL, NULL,
  476. #endif
  477. #endif
  478. #if BUILD_COMPLEX16 == 1
  479. 0, 0, 0,
  480. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  481. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  482. ZGEMM_DEFAULT_UNROLL_MN,
  483. #else
  484. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  485. #endif
  486. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  487. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  488. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  489. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  490. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  491. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  492. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  493. zsymv_LTS, zsymv_UTS,
  494. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  495. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  496. zgemm_betaTS,
  497. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  498. zgemm_incopyTS, zgemm_itcopyTS,
  499. #else
  500. zgemm_oncopyTS, zgemm_otcopyTS,
  501. #endif
  502. zgemm_oncopyTS, zgemm_otcopyTS,
  503. #ifdef SMALL_MATRIX_OPT
  504. zgemm_small_matrix_permitTS,
  505. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  506. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  507. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  508. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  509. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  510. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  511. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  512. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  513. #endif
  514. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  515. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  516. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  517. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  518. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  519. #else
  520. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  521. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  522. #endif
  523. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  524. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  525. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  526. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  527. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  528. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  529. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  530. #else
  531. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  532. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  533. #endif
  534. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  535. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  536. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  537. zsymm_iutcopyTS, zsymm_iltcopyTS,
  538. #else
  539. zsymm_outcopyTS, zsymm_oltcopyTS,
  540. #endif
  541. zsymm_outcopyTS, zsymm_oltcopyTS,
  542. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  543. zhemm_iutcopyTS, zhemm_iltcopyTS,
  544. #else
  545. zhemm_outcopyTS, zhemm_oltcopyTS,
  546. #endif
  547. zhemm_outcopyTS, zhemm_oltcopyTS,
  548. 0, 0, 0,
  549. #if (USE_GEMM3M)
  550. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  551. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  552. #else
  553. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  554. #endif
  555. zgemm3m_kernelTS,
  556. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  557. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  558. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  559. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  560. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  561. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  562. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  563. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  564. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  565. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  566. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  567. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  568. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  569. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  570. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  571. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  572. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  573. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  574. #else
  575. 0, 0, 0,
  576. NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. NULL, NULL,
  586. NULL, NULL,
  587. NULL, NULL,
  588. NULL, NULL,
  589. NULL, NULL,
  590. NULL, NULL,
  591. NULL, NULL,
  592. NULL, NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. #endif
  596. #ifndef NO_LAPACK
  597. zneg_tcopyTS, zlaswp_ncopyTS,
  598. #else
  599. NULL, NULL,
  600. #endif
  601. #endif
  602. #ifdef EXPRECISION
  603. 0, 0, 0,
  604. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  605. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  606. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  607. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  608. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  609. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  610. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  611. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  612. xsymv_LTS, xsymv_UTS,
  613. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  614. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  615. xgemm_betaTS,
  616. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  617. xgemm_incopyTS, xgemm_itcopyTS,
  618. #else
  619. xgemm_oncopyTS, xgemm_otcopyTS,
  620. #endif
  621. xgemm_oncopyTS, xgemm_otcopyTS,
  622. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  623. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  624. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  625. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  626. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  627. #else
  628. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  629. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  630. #endif
  631. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  632. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  633. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  634. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  635. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  636. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  637. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  638. #else
  639. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  640. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  641. #endif
  642. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  643. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  644. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  645. xsymm_iutcopyTS, xsymm_iltcopyTS,
  646. #else
  647. xsymm_outcopyTS, xsymm_oltcopyTS,
  648. #endif
  649. xsymm_outcopyTS, xsymm_oltcopyTS,
  650. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  651. xhemm_iutcopyTS, xhemm_iltcopyTS,
  652. #else
  653. xhemm_outcopyTS, xhemm_oltcopyTS,
  654. #endif
  655. xhemm_outcopyTS, xhemm_oltcopyTS,
  656. 0, 0, 0,
  657. #if (USE_GEMM3M)
  658. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  659. xgemm3m_kernelTS,
  660. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  661. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  662. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  663. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  664. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  665. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  666. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  667. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  668. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  669. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  670. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  671. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  672. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  673. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  674. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  675. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  676. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  677. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  678. #else
  679. 0, 0, 0,
  680. NULL,
  681. NULL, NULL,
  682. NULL, NULL,
  683. NULL, NULL,
  684. NULL, NULL,
  685. NULL, NULL,
  686. NULL, NULL,
  687. NULL, NULL,
  688. NULL, NULL,
  689. NULL, NULL,
  690. NULL, NULL,
  691. NULL, NULL,
  692. NULL, NULL,
  693. NULL, NULL,
  694. NULL, NULL,
  695. NULL, NULL,
  696. NULL, NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. #endif
  700. #ifndef NO_LAPACK
  701. xneg_tcopyTS, xlaswp_ncopyTS,
  702. #else
  703. NULL, NULL,
  704. #endif
  705. #endif
  706. init_parameter,
  707. SNUMOPT, DNUMOPT, QNUMOPT,
  708. #if BUILD_SINGLE == 1
  709. saxpby_kTS,
  710. #endif
  711. #if BUILD_DOUBLE == 1
  712. daxpby_kTS,
  713. #endif
  714. #if BUILD_COMPLEX == 1
  715. caxpby_kTS,
  716. #endif
  717. #if BUILD_COMPLEX16== 1
  718. zaxpby_kTS,
  719. #endif
  720. #if BUILD_SINGLE == 1
  721. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  722. #endif
  723. #if BUILD_DOUBLE== 1
  724. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  725. #endif
  726. #if BUILD_COMPLEX == 1
  727. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  728. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  729. #endif
  730. #if BUILD_COMPLEX16 == 1
  731. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  732. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  733. #endif
  734. #if BUILD_SINGLE == 1
  735. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  736. #endif
  737. #if BUILD_DOUBLE== 1
  738. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  739. #endif
  740. #if BUILD_COMPLEX== 1
  741. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  742. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  743. #endif
  744. #if BUILD_COMPLEX16==1
  745. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  746. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  747. #endif
  748. #if BUILD_SINGLE == 1
  749. sgeadd_kTS,
  750. #endif
  751. #if BUILD_DOUBLE==1
  752. dgeadd_kTS,
  753. #endif
  754. #if BUILD_COMPLEX==1
  755. cgeadd_kTS,
  756. #endif
  757. #if BUILD_COMPLEX16==1
  758. zgeadd_kTS
  759. #endif
  760. };
  761. #if (ARCH_ARM64)
  762. static void init_parameter(void) {
  763. #if (BUILD_BFLOAT16)
  764. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  765. #endif
  766. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  767. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  768. #endif
  769. #if BUILD_DOUBLE == 1
  770. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  771. #endif
  772. #if BUILD_COMPLEX==1
  773. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  774. #endif
  775. #if BUILD_COMPLEX16==1
  776. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  777. #endif
  778. #if (BUILD_BFLOAT16)
  779. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  780. #endif
  781. #if BUILD_SINGLE == 1
  782. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  783. #endif
  784. #if BUILD_DOUBLE== 1
  785. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  786. #endif
  787. #if BUILD_COMPLEX== 1
  788. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  789. #endif
  790. #if BUILD_COMPLEX16==1
  791. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  792. #endif
  793. #if (BUILD_BFLOAT16)
  794. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  795. #endif
  796. #if BUILD_SINGLE == 1
  797. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  798. #endif
  799. #if BUILD_DOUBLE==1
  800. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  801. #endif
  802. #if BUILD_COMPLEX==1
  803. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  804. #endif
  805. #if BUILD_COMPLEX16==1
  806. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  807. #endif
  808. #ifdef EXPRECISION
  809. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  810. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  811. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  812. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  813. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  814. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  815. #endif
  816. #if (USE_GEMM3M)
  817. #ifdef CGEMM3M_DEFAULT_P
  818. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  819. #else
  820. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  821. #endif
  822. #ifdef ZGEMM3M_DEFAULT_P
  823. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  824. #else
  825. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  826. #endif
  827. #ifdef CGEMM3M_DEFAULT_Q
  828. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  829. #else
  830. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  831. #endif
  832. #ifdef ZGEMM3M_DEFAULT_Q
  833. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  834. #else
  835. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  836. #endif
  837. #ifdef CGEMM3M_DEFAULT_R
  838. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  839. #else
  840. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  841. #endif
  842. #ifdef ZGEMM3M_DEFAULT_R
  843. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  844. #else
  845. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  846. #endif
  847. #ifdef EXPRECISION
  848. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  849. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  850. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  851. #endif
  852. #endif
  853. }
  854. #else // (ARCH_ARM64)
  855. #if defined(ARCH_MIPS64)
  856. static void init_parameter(void) {
  857. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  858. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  859. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  860. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  861. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  862. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  863. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  864. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  865. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  866. TABLE_NAME.dgemm_r = 640;
  867. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  868. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  869. #ifdef EXPRECISION
  870. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  871. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  872. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  873. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  874. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  875. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  876. #endif
  877. #if defined(USE_GEMM3M)
  878. #ifdef CGEMM3M_DEFAULT_P
  879. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  880. #else
  881. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  882. #endif
  883. #ifdef ZGEMM3M_DEFAULT_P
  884. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  885. #else
  886. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  887. #endif
  888. #ifdef CGEMM3M_DEFAULT_Q
  889. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  890. #else
  891. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  892. #endif
  893. #ifdef ZGEMM3M_DEFAULT_Q
  894. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  895. #else
  896. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  897. #endif
  898. #ifdef CGEMM3M_DEFAULT_R
  899. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  900. #else
  901. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  902. #endif
  903. #ifdef ZGEMM3M_DEFAULT_R
  904. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  905. #else
  906. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  907. #endif
  908. #ifdef EXPRECISION
  909. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  910. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  911. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  912. #endif
  913. #endif
  914. }
  915. #else // (ARCH_MIPS64)
  916. #if (ARCH_POWER)
  917. static void init_parameter(void) {
  918. #ifdef BUILD_BFLOAT16
  919. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  920. #endif
  921. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  922. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  923. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  924. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  925. #ifdef BUILD_BFLOAT16
  926. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  927. #endif
  928. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  929. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  930. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  931. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  932. #ifdef BUILD_BFLOAT16
  933. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  934. #endif
  935. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  936. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  937. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  938. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  939. }
  940. #else //POWER
  941. #if (ARCH_ZARCH)
  942. static void init_parameter(void) {
  943. #ifdef BUILD_BFLOAT16
  944. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  945. #endif
  946. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  947. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  948. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  949. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  950. #ifdef BUILD_BFLOAT16
  951. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  952. #endif
  953. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  954. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  955. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  956. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  957. #ifdef BUILD_BFLOAT16
  958. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  959. #endif
  960. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  961. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  962. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  963. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  964. }
  965. #else //ZARCH
  966. #ifdef ARCH_X86
  967. static int get_l2_size_old(void){
  968. int i, eax, ebx, ecx, edx, cpuid_level;
  969. int info[15];
  970. cpuid(2, &eax, &ebx, &ecx, &edx);
  971. info[ 0] = BITMASK(eax, 8, 0xff);
  972. info[ 1] = BITMASK(eax, 16, 0xff);
  973. info[ 2] = BITMASK(eax, 24, 0xff);
  974. info[ 3] = BITMASK(ebx, 0, 0xff);
  975. info[ 4] = BITMASK(ebx, 8, 0xff);
  976. info[ 5] = BITMASK(ebx, 16, 0xff);
  977. info[ 6] = BITMASK(ebx, 24, 0xff);
  978. info[ 7] = BITMASK(ecx, 0, 0xff);
  979. info[ 8] = BITMASK(ecx, 8, 0xff);
  980. info[ 9] = BITMASK(ecx, 16, 0xff);
  981. info[10] = BITMASK(ecx, 24, 0xff);
  982. info[11] = BITMASK(edx, 0, 0xff);
  983. info[12] = BITMASK(edx, 8, 0xff);
  984. info[13] = BITMASK(edx, 16, 0xff);
  985. info[14] = BITMASK(edx, 24, 0xff);
  986. for (i = 0; i < 15; i++){
  987. switch (info[i]){
  988. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  989. case 0x1a :
  990. return 96;
  991. case 0x39 :
  992. case 0x3b :
  993. case 0x41 :
  994. case 0x79 :
  995. case 0x81 :
  996. return 128;
  997. case 0x3a :
  998. return 192;
  999. case 0x21 :
  1000. case 0x3c :
  1001. case 0x42 :
  1002. case 0x7a :
  1003. case 0x7e :
  1004. case 0x82 :
  1005. return 256;
  1006. case 0x3d :
  1007. return 384;
  1008. case 0x3e :
  1009. case 0x43 :
  1010. case 0x7b :
  1011. case 0x7f :
  1012. case 0x83 :
  1013. case 0x86 :
  1014. return 512;
  1015. case 0x44 :
  1016. case 0x78 :
  1017. case 0x7c :
  1018. case 0x84 :
  1019. case 0x87 :
  1020. return 1024;
  1021. case 0x45 :
  1022. case 0x7d :
  1023. case 0x85 :
  1024. return 2048;
  1025. case 0x48 :
  1026. return 3184;
  1027. case 0x49 :
  1028. return 4096;
  1029. case 0x4e :
  1030. return 6144;
  1031. }
  1032. }
  1033. // return 0;
  1034. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1035. return 256;
  1036. }
  1037. #endif
  1038. static __inline__ int get_l2_size(void){
  1039. int eax, ebx, ecx, edx, l2;
  1040. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1041. l2 = BITMASK(ecx, 16, 0xffff);
  1042. #ifndef ARCH_X86
  1043. if (l2 <= 0) {
  1044. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1045. return 256;
  1046. }
  1047. return l2;
  1048. #else
  1049. if (l2 > 0) return l2;
  1050. return get_l2_size_old();
  1051. #endif
  1052. }
  1053. static __inline__ int get_l3_size(void){
  1054. int eax, ebx, ecx, edx;
  1055. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1056. return BITMASK(edx, 18, 0x3fff) * 512;
  1057. }
  1058. static void init_parameter(void) {
  1059. int l2 = get_l2_size();
  1060. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1061. /* where the GEMM unrolling parameters do not depend on l2 */
  1062. #ifdef BUILD_BFLOAT16
  1063. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1064. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1065. #endif
  1066. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1067. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1068. #endif
  1069. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1070. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1071. #endif
  1072. #if BUILD_COMPLEX == 1
  1073. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1074. #endif
  1075. #if BUILD_COMPLEX16==1
  1076. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1077. #endif
  1078. #if BUILD_COMPLEX == 1
  1079. #ifdef CGEMM3M_DEFAULT_Q
  1080. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1081. #else
  1082. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1083. #endif
  1084. #endif
  1085. #if BUILD_COMPLEX16 == 1
  1086. #ifdef ZGEMM3M_DEFAULT_Q
  1087. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1088. #else
  1089. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1090. #endif
  1091. #endif
  1092. #ifdef EXPRECISION
  1093. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1094. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1095. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1096. #endif
  1097. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1098. #ifdef DEBUG
  1099. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1100. #endif
  1101. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1102. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1103. #endif
  1104. #if BUILD_DOUBLE == 1
  1105. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1106. #endif
  1107. #if BUILD_COMPLEX==1
  1108. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1109. #endif
  1110. #if BUILD_COMPLEX16==1
  1111. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1112. #endif
  1113. #ifdef EXPRECISION
  1114. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1115. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1116. #endif
  1117. #endif
  1118. #ifdef CORE_NORTHWOOD
  1119. #ifdef DEBUG
  1120. fprintf(stderr, "Northwood\n");
  1121. #endif
  1122. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1123. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1124. #endif
  1125. #if BUILD_DOUBLE == 1
  1126. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1127. #endif
  1128. #if BUILD_COMPLEX==1
  1129. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1130. #endif
  1131. #if BUILD_COMPLEX16==1
  1132. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1133. #endif
  1134. #ifdef EXPRECISION
  1135. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1136. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1137. #endif
  1138. #endif
  1139. #ifdef ATOM
  1140. #ifdef DEBUG
  1141. fprintf(stderr, "Atom\n");
  1142. #endif
  1143. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1144. TABLE_NAME.sgemm_p = 256;
  1145. #endif
  1146. #if BUILD_DOUBLE ==1
  1147. TABLE_NAME.dgemm_p = 128;
  1148. #endif
  1149. #if BUILD_COMPLEX==1
  1150. TABLE_NAME.cgemm_p = 128;
  1151. #endif
  1152. #if BUILD_COMPLEX16==1
  1153. TABLE_NAME.zgemm_p = 64;
  1154. #endif
  1155. #ifdef EXPRECISION
  1156. TABLE_NAME.qgemm_p = 64;
  1157. TABLE_NAME.xgemm_p = 32;
  1158. #endif
  1159. #endif
  1160. #ifdef CORE_PRESCOTT
  1161. #ifdef DEBUG
  1162. fprintf(stderr, "Prescott\n");
  1163. #endif
  1164. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1165. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1166. #endif
  1167. #if BUILD_DOUBLE ==1
  1168. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1169. #endif
  1170. #if BUILD_COMPLEX==1
  1171. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1172. #endif
  1173. #if BUILD_COMPLEX16 == 1
  1174. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1175. #endif
  1176. #ifdef EXPRECISION
  1177. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1178. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1179. #endif
  1180. #endif
  1181. #ifdef CORE2
  1182. #ifdef DEBUG
  1183. fprintf(stderr, "Core2\n");
  1184. #endif
  1185. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1186. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1187. #endif
  1188. #if BUILD_DOUBLE==1
  1189. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1190. #endif
  1191. #if BUILD_COMPLEX==1
  1192. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1193. #endif
  1194. #if BUILD_COMPLEX16==1
  1195. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1196. #endif
  1197. #ifdef EXPRECISION
  1198. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1199. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1200. #endif
  1201. #endif
  1202. #ifdef PENRYN
  1203. #ifdef DEBUG
  1204. fprintf(stderr, "Penryn\n");
  1205. #endif
  1206. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1207. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1208. #endif
  1209. #if BUILD_DOUBLE == 1
  1210. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1211. #endif
  1212. #if BUILD_COMPLEX==1
  1213. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1214. #endif
  1215. #if BUILD_COMPLEX16==1
  1216. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1217. #endif
  1218. #ifdef EXPRECISION
  1219. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1220. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1221. #endif
  1222. #endif
  1223. #ifdef DUNNINGTON
  1224. #ifdef DEBUG
  1225. fprintf(stderr, "Dunnington\n");
  1226. #endif
  1227. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1228. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1229. #endif
  1230. #if BUILD_DOUBLE ==1
  1231. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1232. #endif
  1233. #if BUILD_COMPLEX==1
  1234. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1235. #endif
  1236. #if BUILD_COMPLEX16==1
  1237. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1238. #endif
  1239. #ifdef EXPRECISION
  1240. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1241. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1242. #endif
  1243. #endif
  1244. #ifdef NEHALEM
  1245. #ifdef DEBUG
  1246. fprintf(stderr, "Nehalem\n");
  1247. #endif
  1248. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1249. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1250. #endif
  1251. #if BUILD_DOUBLE
  1252. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1253. #endif
  1254. #if BUILD_COMPLEX
  1255. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1256. #endif
  1257. #if BUILD_COMPLEX16
  1258. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1259. #endif
  1260. #ifdef EXPRECISION
  1261. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1262. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1263. #endif
  1264. #endif
  1265. #ifdef SANDYBRIDGE
  1266. #ifdef DEBUG
  1267. fprintf(stderr, "Sandybridge\n");
  1268. #endif
  1269. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1270. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1271. #endif
  1272. #if BUILD_DOUBLE
  1273. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1274. #endif
  1275. #if BUILD_COMPLEX
  1276. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1277. #endif
  1278. #if BUILD_COMPLEX16
  1279. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1280. #endif
  1281. #ifdef EXPRECISION
  1282. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1283. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1284. #endif
  1285. #endif
  1286. #ifdef HASWELL
  1287. #ifdef DEBUG
  1288. fprintf(stderr, "Haswell\n");
  1289. #endif
  1290. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1291. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1292. #endif
  1293. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1294. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1295. #endif
  1296. #if BUILD_COMPLEX
  1297. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1298. #endif
  1299. #if BUILD_COMPLEX16
  1300. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1301. #endif
  1302. #ifdef EXPRECISION
  1303. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1304. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1305. #endif
  1306. #endif
  1307. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1308. #ifdef DEBUG
  1309. fprintf(stderr, "SkylakeX\n");
  1310. #endif
  1311. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1312. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1313. #endif
  1314. #if BUILD_DOUBLE
  1315. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1316. #endif
  1317. #if BUILD_COMPLEX
  1318. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1319. #endif
  1320. #if BUILD_COMPLEX16
  1321. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1322. #endif
  1323. #ifdef EXPRECISION
  1324. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1325. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1326. #endif
  1327. #endif
  1328. #ifdef OPTERON
  1329. #ifdef DEBUG
  1330. fprintf(stderr, "Opteron\n");
  1331. #endif
  1332. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1333. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1334. #endif
  1335. #if BUILD_DOUBLE
  1336. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1337. #endif
  1338. #if BUILD_COMPLEX
  1339. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1340. #endif
  1341. #if BUILD_COMPLEX16
  1342. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1343. #endif
  1344. #ifdef EXPRECISION
  1345. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1346. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1347. #endif
  1348. #endif
  1349. #ifdef BARCELONA
  1350. #ifdef DEBUG
  1351. fprintf(stderr, "Barcelona\n");
  1352. #endif
  1353. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1354. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1355. #endif
  1356. #if BUILD_DOUBLE
  1357. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1358. #endif
  1359. #if BUILD_COMPLEX
  1360. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1361. #endif
  1362. #if BUILD_COMPLEX16
  1363. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1364. #endif
  1365. #ifdef EXPRECISION
  1366. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1367. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1368. #endif
  1369. #endif
  1370. #ifdef BOBCAT
  1371. #ifdef DEBUG
  1372. fprintf(stderr, "Bobcate\n");
  1373. #endif
  1374. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1375. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1376. #endif
  1377. #if BUILD_DOUBLE
  1378. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1379. #endif
  1380. #if BUILD_COMPLEX
  1381. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1382. #endif
  1383. #if BUILD_COMPLEX16
  1384. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1385. #endif
  1386. #ifdef EXPRECISION
  1387. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1388. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1389. #endif
  1390. #endif
  1391. #ifdef BULLDOZER
  1392. #ifdef DEBUG
  1393. fprintf(stderr, "Bulldozer\n");
  1394. #endif
  1395. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1396. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1397. #endif
  1398. #if BUILD_DOUBLE
  1399. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1400. #endif
  1401. #if BUILD_COMPLEX
  1402. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1403. #endif
  1404. #if BUILD_COMPLEX16
  1405. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1406. #endif
  1407. #ifdef EXPRECISION
  1408. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1409. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1410. #endif
  1411. #endif
  1412. #ifdef EXCAVATOR
  1413. #ifdef DEBUG
  1414. fprintf(stderr, "Excavator\n");
  1415. #endif
  1416. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1417. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1418. #endif
  1419. #if BUILD_DOUBLE
  1420. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1421. #endif
  1422. #if BUILD_COMPLEX
  1423. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1424. #endif
  1425. #if BUILD_COMPLEX16
  1426. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1427. #endif
  1428. #ifdef EXPRECISION
  1429. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1430. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1431. #endif
  1432. #endif
  1433. #ifdef PILEDRIVER
  1434. #ifdef DEBUG
  1435. fprintf(stderr, "Piledriver\n");
  1436. #endif
  1437. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1438. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1439. #endif
  1440. #if BUILD_DOUBLE
  1441. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1442. #endif
  1443. #if BUILD_COMPLEX
  1444. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1445. #endif
  1446. #if BUILD_COMPLEX16
  1447. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1448. #endif
  1449. #ifdef EXPRECISION
  1450. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1451. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1452. #endif
  1453. #endif
  1454. #ifdef STEAMROLLER
  1455. #ifdef DEBUG
  1456. fprintf(stderr, "Steamroller\n");
  1457. #endif
  1458. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1459. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1460. #endif
  1461. #if BUILD_DOUBLE
  1462. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1463. #endif
  1464. #if BUILD_COMPLEX
  1465. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1466. #endif
  1467. #if BUILD_COMPLEX16
  1468. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1469. #endif
  1470. #ifdef EXPRECISION
  1471. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1472. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1473. #endif
  1474. #endif
  1475. #ifdef ZEN
  1476. #ifdef DEBUG
  1477. fprintf(stderr, "Zen\n");
  1478. #endif
  1479. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1480. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1481. #endif
  1482. #if BUILD_DOUBLE
  1483. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1484. #endif
  1485. #if BUILD_COMPLEX
  1486. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1487. #endif
  1488. #if BUILD_COMPLEX16
  1489. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1490. #endif
  1491. #ifdef EXPRECISION
  1492. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1493. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1494. #endif
  1495. #endif
  1496. #ifdef NANO
  1497. #ifdef DEBUG
  1498. fprintf(stderr, "NANO\n");
  1499. #endif
  1500. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1501. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1502. #endif
  1503. #if (BUILD_DOUBLE==1)
  1504. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1505. #endif
  1506. #if (BUILD_COMPLEX==1)
  1507. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1508. #endif
  1509. #if (BUILD_COMPLEX16==1)
  1510. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1511. #endif
  1512. #ifdef EXPRECISION
  1513. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1514. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1515. #endif
  1516. #endif
  1517. #if BUILD_COMPLEX==1
  1518. #ifdef CGEMM3M_DEFAULT_P
  1519. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1520. #else
  1521. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1522. #endif
  1523. #endif
  1524. #if BUILD_COMPLEX16==1
  1525. #ifdef ZGEMM3M_DEFAULT_P
  1526. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1527. #else
  1528. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1529. #endif
  1530. #endif
  1531. #ifdef EXPRECISION
  1532. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1533. #endif
  1534. #if BUILD_SINGLE == 1
  1535. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1536. #endif
  1537. #if BUILD_DOUBLE== 1
  1538. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1539. #endif
  1540. #if BUILD_COMPLEX==1
  1541. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1542. #endif
  1543. #if BUILD_COMPLEX16==1
  1544. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1545. #endif
  1546. #if BUILD_COMPLEX==1
  1547. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1548. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1549. #else
  1550. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1551. #endif
  1552. #endif
  1553. #if BUILD_COMPLEX16==1
  1554. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1555. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1556. #else
  1557. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1558. #endif
  1559. #endif
  1560. #ifdef QUAD_PRECISION
  1561. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1562. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1563. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1564. #endif
  1565. #ifdef DEBUG
  1566. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1567. #endif
  1568. #if BUILD_BFLOAT16==1
  1569. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1570. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1571. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1572. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1573. #endif
  1574. #if BUILD_SINGLE==1
  1575. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1576. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1577. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1578. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1579. #endif
  1580. #if BUILD_DOUBLE==1
  1581. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1582. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1583. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1584. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1585. #endif
  1586. #ifdef EXPRECISION
  1587. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1588. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1589. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1590. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1591. #endif
  1592. #if BUILD_COMPLEX ==1
  1593. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1594. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1595. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1596. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1597. #endif
  1598. #if BUILD_COMPLEX16 ==1
  1599. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1600. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1601. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1602. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1603. #endif
  1604. #if BUILD_COMPLEX == 1
  1605. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1606. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1607. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1608. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1609. #endif
  1610. #if BUILD_COMPLEX16 == 1
  1611. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1612. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1613. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1614. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1615. #endif
  1616. #ifdef EXPRECISION
  1617. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1618. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1619. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1620. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1621. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1622. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1623. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1624. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1625. #endif
  1626. }
  1627. #endif //POWER
  1628. #endif //ZARCH
  1629. #endif //(ARCH_MIPS64)
  1630. #endif //(ARCH_ARM64)