You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 52 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. sbgemm_kernelTS, sbgemm_betaTS,
  66. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  67. sbgemm_incopyTS, sbgemm_itcopyTS,
  68. #else
  69. sbgemm_oncopyTS, sbgemm_otcopyTS,
  70. #endif
  71. sbgemm_oncopyTS, sbgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #ifdef SMALL_MATRIX_OPT
  104. sbgemm_small_matrix_permitTS,
  105. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  106. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  107. #endif
  108. #endif
  109. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  110. 0, 0, 0,
  111. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  112. #ifdef SGEMM_DEFAULT_UNROLL_MN
  113. SGEMM_DEFAULT_UNROLL_MN,
  114. #else
  115. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  116. #endif
  117. #endif
  118. #ifdef HAVE_EXCLUSIVE_CACHE
  119. 1,
  120. #else
  121. 0,
  122. #endif
  123. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  124. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  125. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  126. snrm2_kTS, sasum_kTS,
  127. #endif
  128. #if BUILD_SINGLE == 1
  129. ssum_kTS,
  130. #endif
  131. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  132. scopy_kTS, sdot_kTS,
  133. // dsdot_kTS,
  134. srot_kTS, saxpy_kTS,
  135. #endif
  136. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  137. sscal_kTS,
  138. #endif
  139. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  140. sswap_kTS,
  141. sgemv_nTS, sgemv_tTS,
  142. #endif
  143. #if BUILD_SINGLE == 1
  144. sger_kTS,
  145. ssymv_LTS, ssymv_UTS,
  146. #endif
  147. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  148. #ifdef ARCH_X86_64
  149. sgemm_directTS,
  150. sgemm_direct_performantTS,
  151. #endif
  152. sgemm_kernelTS, sgemm_betaTS,
  153. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  154. sgemm_incopyTS, sgemm_itcopyTS,
  155. #else
  156. sgemm_oncopyTS, sgemm_otcopyTS,
  157. #endif
  158. sgemm_oncopyTS, sgemm_otcopyTS,
  159. #endif
  160. #if BUILD_SINGLE == 1
  161. #ifdef SMALL_MATRIX_OPT
  162. sgemm_small_matrix_permitTS,
  163. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  164. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  165. #endif
  166. #endif
  167. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  168. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  169. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  170. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  171. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  172. #else
  173. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  174. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  175. #endif
  176. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  177. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  178. #endif
  179. #if BUILD_SINGLE == 1
  180. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  181. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  182. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  183. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  184. #else
  185. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  186. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  187. #endif
  188. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  189. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  190. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  191. ssymm_iutcopyTS, ssymm_iltcopyTS,
  192. #else
  193. ssymm_outcopyTS, ssymm_oltcopyTS,
  194. #endif
  195. ssymm_outcopyTS, ssymm_oltcopyTS,
  196. #endif
  197. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  198. #ifndef NO_LAPACK
  199. sneg_tcopyTS, slaswp_ncopyTS,
  200. #else
  201. NULL,NULL,
  202. #endif
  203. #endif
  204. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  205. 0, 0, 0,
  206. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  207. #ifdef DGEMM_DEFAULT_UNROLL_MN
  208. DGEMM_DEFAULT_UNROLL_MN,
  209. #else
  210. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  211. #endif
  212. #endif
  213. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  214. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  215. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  216. dnrm2_kTS, dasum_kTS,
  217. #endif
  218. #if (BUILD_DOUBLE==1)
  219. dsum_kTS,
  220. #endif
  221. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  222. dcopy_kTS, ddot_kTS,
  223. #endif
  224. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  225. dsdot_kTS,
  226. #endif
  227. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  228. drot_kTS,
  229. daxpy_kTS,
  230. dscal_kTS,
  231. dswap_kTS,
  232. dgemv_nTS, dgemv_tTS,
  233. #endif
  234. #if (BUILD_DOUBLE==1)
  235. dger_kTS,
  236. dsymv_LTS, dsymv_UTS,
  237. #endif
  238. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  239. dgemm_kernelTS, dgemm_betaTS,
  240. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  241. dgemm_incopyTS, dgemm_itcopyTS,
  242. #else
  243. dgemm_oncopyTS, dgemm_otcopyTS,
  244. #endif
  245. dgemm_oncopyTS, dgemm_otcopyTS,
  246. #endif
  247. #if (BUILD_DOUBLE==1)
  248. #ifdef SMALL_MATRIX_OPT
  249. dgemm_small_matrix_permitTS,
  250. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  251. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  252. #endif
  253. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  254. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  255. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  256. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  257. #else
  258. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  259. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  260. #endif
  261. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  262. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  263. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  264. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  265. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  266. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  267. #else
  268. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  269. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  270. #endif
  271. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  272. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  273. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  274. dsymm_iutcopyTS, dsymm_iltcopyTS,
  275. #else
  276. dsymm_outcopyTS, dsymm_oltcopyTS,
  277. #endif
  278. dsymm_outcopyTS, dsymm_oltcopyTS,
  279. #ifndef NO_LAPACK
  280. dneg_tcopyTS, dlaswp_ncopyTS,
  281. #else
  282. NULL, NULL,
  283. #endif
  284. #endif
  285. #ifdef EXPRECISION
  286. 0, 0, 0,
  287. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  288. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  289. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  290. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  291. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  292. qgemv_nTS, qgemv_tTS, qger_kTS,
  293. qsymv_LTS, qsymv_UTS,
  294. qgemm_kernelTS, qgemm_betaTS,
  295. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  296. qgemm_incopyTS, qgemm_itcopyTS,
  297. #else
  298. qgemm_oncopyTS, qgemm_otcopyTS,
  299. #endif
  300. qgemm_oncopyTS, qgemm_otcopyTS,
  301. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  302. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  303. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  304. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  305. #else
  306. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  307. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  308. #endif
  309. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  310. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  311. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  312. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  313. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  314. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  315. #else
  316. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  317. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  318. #endif
  319. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  320. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  321. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  322. qsymm_iutcopyTS, qsymm_iltcopyTS,
  323. #else
  324. qsymm_outcopyTS, qsymm_oltcopyTS,
  325. #endif
  326. qsymm_outcopyTS, qsymm_oltcopyTS,
  327. #ifndef NO_LAPACK
  328. qneg_tcopyTS, qlaswp_ncopyTS,
  329. #else
  330. NULL, NULL,
  331. #endif
  332. #endif
  333. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  334. 0, 0, 0,
  335. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  336. #ifdef CGEMM_DEFAULT_UNROLL_MN
  337. CGEMM_DEFAULT_UNROLL_MN,
  338. #else
  339. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  340. #endif
  341. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  342. #endif
  343. #if (BUILD_COMPLEX)
  344. cnrm2_kTS, casum_kTS, csum_kTS,
  345. #endif
  346. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  347. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  348. #endif
  349. #if (BUILD_COMPLEX)
  350. csrot_kTS,
  351. #endif
  352. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  353. caxpy_kTS,
  354. caxpyc_kTS,
  355. cscal_kTS,
  356. cswap_kTS,
  357. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  358. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  359. #endif
  360. #if (BUILD_COMPLEX)
  361. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  362. csymv_LTS, csymv_UTS,
  363. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  364. #endif
  365. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  366. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  367. cgemm_betaTS,
  368. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  369. cgemm_incopyTS, cgemm_itcopyTS,
  370. #else
  371. cgemm_oncopyTS, cgemm_otcopyTS,
  372. #endif
  373. cgemm_oncopyTS, cgemm_otcopyTS,
  374. #ifdef SMALL_MATRIX_OPT
  375. cgemm_small_matrix_permitTS,
  376. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  377. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  378. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  379. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  380. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  381. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  382. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  383. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  384. #endif
  385. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  386. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  387. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  388. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  389. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  390. #else
  391. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  392. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  393. #endif
  394. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  395. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  396. #endif
  397. #if (BUILD_COMPLEX)
  398. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  399. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  400. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  401. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  402. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  403. #else
  404. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  405. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  406. #endif
  407. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  408. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  409. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  410. csymm_iutcopyTS, csymm_iltcopyTS,
  411. #else
  412. csymm_outcopyTS, csymm_oltcopyTS,
  413. #endif
  414. csymm_outcopyTS, csymm_oltcopyTS,
  415. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  416. chemm_iutcopyTS, chemm_iltcopyTS,
  417. #else
  418. chemm_outcopyTS, chemm_oltcopyTS,
  419. #endif
  420. chemm_outcopyTS, chemm_oltcopyTS,
  421. 0, 0, 0,
  422. #if (USE_GEMM3M)
  423. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  424. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  425. #else
  426. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  427. #endif
  428. cgemm3m_kernelTS,
  429. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  430. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  431. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  432. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  433. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  434. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  435. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  436. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  437. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  438. csymm3m_oucopybTS, csymm3m_olcopybTS,
  439. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  440. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  441. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  442. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  443. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  444. chemm3m_oucopybTS, chemm3m_olcopybTS,
  445. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  446. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  447. #else
  448. 0, 0, 0,
  449. NULL,
  450. NULL, NULL,
  451. NULL, NULL,
  452. NULL, NULL,
  453. NULL, NULL,
  454. NULL, NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. #endif
  469. #endif
  470. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  471. #ifndef NO_LAPACK
  472. cneg_tcopyTS,
  473. claswp_ncopyTS,
  474. #else
  475. NULL, NULL,
  476. #endif
  477. #endif
  478. #if BUILD_COMPLEX16 == 1
  479. 0, 0, 0,
  480. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  481. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  482. ZGEMM_DEFAULT_UNROLL_MN,
  483. #else
  484. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  485. #endif
  486. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  487. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  488. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  489. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  490. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  491. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  492. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  493. zsymv_LTS, zsymv_UTS,
  494. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  495. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  496. zgemm_betaTS,
  497. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  498. zgemm_incopyTS, zgemm_itcopyTS,
  499. #else
  500. zgemm_oncopyTS, zgemm_otcopyTS,
  501. #endif
  502. zgemm_oncopyTS, zgemm_otcopyTS,
  503. #ifdef SMALL_MATRIX_OPT
  504. zgemm_small_matrix_permitTS,
  505. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  506. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  507. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  508. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  509. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  510. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  511. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  512. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  513. #endif
  514. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  515. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  516. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  517. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  518. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  519. #else
  520. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  521. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  522. #endif
  523. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  524. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  525. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  526. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  527. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  528. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  529. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  530. #else
  531. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  532. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  533. #endif
  534. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  535. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  536. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  537. zsymm_iutcopyTS, zsymm_iltcopyTS,
  538. #else
  539. zsymm_outcopyTS, zsymm_oltcopyTS,
  540. #endif
  541. zsymm_outcopyTS, zsymm_oltcopyTS,
  542. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  543. zhemm_iutcopyTS, zhemm_iltcopyTS,
  544. #else
  545. zhemm_outcopyTS, zhemm_oltcopyTS,
  546. #endif
  547. zhemm_outcopyTS, zhemm_oltcopyTS,
  548. 0, 0, 0,
  549. #if (USE_GEMM3M)
  550. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  551. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  552. #else
  553. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  554. #endif
  555. zgemm3m_kernelTS,
  556. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  557. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  558. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  559. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  560. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  561. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  562. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  563. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  564. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  565. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  566. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  567. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  568. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  569. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  570. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  571. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  572. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  573. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  574. #else
  575. 0, 0, 0,
  576. NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. NULL, NULL,
  586. NULL, NULL,
  587. NULL, NULL,
  588. NULL, NULL,
  589. NULL, NULL,
  590. NULL, NULL,
  591. NULL, NULL,
  592. NULL, NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. #endif
  596. #ifndef NO_LAPACK
  597. zneg_tcopyTS, zlaswp_ncopyTS,
  598. #else
  599. NULL, NULL,
  600. #endif
  601. #endif
  602. #ifdef EXPRECISION
  603. 0, 0, 0,
  604. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  605. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  606. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  607. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  608. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  609. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  610. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  611. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  612. xsymv_LTS, xsymv_UTS,
  613. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  614. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  615. xgemm_betaTS,
  616. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  617. xgemm_incopyTS, xgemm_itcopyTS,
  618. #else
  619. xgemm_oncopyTS, xgemm_otcopyTS,
  620. #endif
  621. xgemm_oncopyTS, xgemm_otcopyTS,
  622. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  623. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  624. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  625. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  626. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  627. #else
  628. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  629. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  630. #endif
  631. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  632. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  633. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  634. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  635. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  636. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  637. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  638. #else
  639. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  640. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  641. #endif
  642. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  643. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  644. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  645. xsymm_iutcopyTS, xsymm_iltcopyTS,
  646. #else
  647. xsymm_outcopyTS, xsymm_oltcopyTS,
  648. #endif
  649. xsymm_outcopyTS, xsymm_oltcopyTS,
  650. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  651. xhemm_iutcopyTS, xhemm_iltcopyTS,
  652. #else
  653. xhemm_outcopyTS, xhemm_oltcopyTS,
  654. #endif
  655. xhemm_outcopyTS, xhemm_oltcopyTS,
  656. 0, 0, 0,
  657. #if (USE_GEMM3M)
  658. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  659. xgemm3m_kernelTS,
  660. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  661. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  662. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  663. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  664. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  665. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  666. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  667. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  668. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  669. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  670. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  671. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  672. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  673. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  674. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  675. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  676. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  677. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  678. #else
  679. 0, 0, 0,
  680. NULL,
  681. NULL, NULL,
  682. NULL, NULL,
  683. NULL, NULL,
  684. NULL, NULL,
  685. NULL, NULL,
  686. NULL, NULL,
  687. NULL, NULL,
  688. NULL, NULL,
  689. NULL, NULL,
  690. NULL, NULL,
  691. NULL, NULL,
  692. NULL, NULL,
  693. NULL, NULL,
  694. NULL, NULL,
  695. NULL, NULL,
  696. NULL, NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. #endif
  700. #ifndef NO_LAPACK
  701. xneg_tcopyTS, xlaswp_ncopyTS,
  702. #else
  703. NULL, NULL,
  704. #endif
  705. #endif
  706. init_parameter,
  707. SNUMOPT, DNUMOPT, QNUMOPT,
  708. #if BUILD_SINGLE == 1
  709. saxpby_kTS,
  710. #endif
  711. #if BUILD_DOUBLE == 1
  712. daxpby_kTS,
  713. #endif
  714. #if BUILD_COMPLEX == 1
  715. caxpby_kTS,
  716. #endif
  717. #if BUILD_COMPLEX16== 1
  718. zaxpby_kTS,
  719. #endif
  720. #if BUILD_SINGLE == 1
  721. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  722. #endif
  723. #if BUILD_DOUBLE== 1
  724. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  725. #endif
  726. #if BUILD_COMPLEX == 1
  727. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  728. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  729. #endif
  730. #if BUILD_COMPLEX16 == 1
  731. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  732. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  733. #endif
  734. #if BUILD_SINGLE == 1
  735. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  736. #endif
  737. #if BUILD_DOUBLE== 1
  738. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  739. #endif
  740. #if BUILD_COMPLEX== 1
  741. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  742. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  743. #endif
  744. #if BUILD_COMPLEX16==1
  745. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  746. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  747. #endif
  748. #if BUILD_SINGLE == 1
  749. sgeadd_kTS,
  750. #endif
  751. #if BUILD_DOUBLE==1
  752. dgeadd_kTS,
  753. #endif
  754. #if BUILD_COMPLEX==1
  755. cgeadd_kTS,
  756. #endif
  757. #if BUILD_COMPLEX16==1
  758. zgeadd_kTS
  759. #endif
  760. };
  761. #if (ARCH_ARM64)
  762. static void init_parameter(void) {
  763. #if (BUILD_BFLOAT16)
  764. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  765. #endif
  766. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  767. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  768. #endif
  769. #if BUILD_DOUBLE == 1
  770. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  771. #endif
  772. #if BUILD_COMPLEX==1
  773. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  774. #endif
  775. #if BUILD_COMPLEX16==1
  776. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  777. #endif
  778. #if (BUILD_BFLOAT16)
  779. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  780. #endif
  781. #if BUILD_SINGLE == 1
  782. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  783. #endif
  784. #if BUILD_DOUBLE== 1
  785. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  786. #endif
  787. #if BUILD_COMPLEX== 1
  788. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  789. #endif
  790. #if BUILD_COMPLEX16==1
  791. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  792. #endif
  793. #if (BUILD_BFLOAT16)
  794. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  795. #endif
  796. #if BUILD_SINGLE == 1
  797. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  798. #endif
  799. #if BUILD_DOUBLE==1
  800. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  801. #endif
  802. #if BUILD_COMPLEX==1
  803. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  804. #endif
  805. #if BUILD_COMPLEX16==1
  806. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  807. #endif
  808. #ifdef EXPRECISION
  809. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  810. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  811. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  812. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  813. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  814. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  815. #endif
  816. #if (USE_GEMM3M)
  817. #ifdef CGEMM3M_DEFAULT_P
  818. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  819. #else
  820. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  821. #endif
  822. #ifdef ZGEMM3M_DEFAULT_P
  823. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  824. #else
  825. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  826. #endif
  827. #ifdef CGEMM3M_DEFAULT_Q
  828. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  829. #else
  830. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  831. #endif
  832. #ifdef ZGEMM3M_DEFAULT_Q
  833. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  834. #else
  835. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  836. #endif
  837. #ifdef CGEMM3M_DEFAULT_R
  838. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  839. #else
  840. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  841. #endif
  842. #ifdef ZGEMM3M_DEFAULT_R
  843. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  844. #else
  845. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  846. #endif
  847. #ifdef EXPRECISION
  848. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  849. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  850. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  851. #endif
  852. #endif
  853. }
  854. #else // (ARCH_ARM64)
  855. #if defined(ARCH_MIPS64)
  856. static void init_parameter(void) {
  857. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  858. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  859. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  860. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  861. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  862. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  863. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  864. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  865. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  866. TABLE_NAME.dgemm_r = 640;
  867. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  868. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  869. #ifdef EXPRECISION
  870. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  871. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  872. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  873. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  874. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  875. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  876. #endif
  877. #if defined(USE_GEMM3M)
  878. #ifdef CGEMM3M_DEFAULT_P
  879. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  880. #else
  881. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  882. #endif
  883. #ifdef ZGEMM3M_DEFAULT_P
  884. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  885. #else
  886. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  887. #endif
  888. #ifdef CGEMM3M_DEFAULT_Q
  889. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  890. #else
  891. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  892. #endif
  893. #ifdef ZGEMM3M_DEFAULT_Q
  894. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  895. #else
  896. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  897. #endif
  898. #ifdef CGEMM3M_DEFAULT_R
  899. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  900. #else
  901. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  902. #endif
  903. #ifdef ZGEMM3M_DEFAULT_R
  904. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  905. #else
  906. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  907. #endif
  908. #ifdef EXPRECISION
  909. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  910. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  911. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  912. #endif
  913. #endif
  914. }
  915. #else // (ARCH_MIPS64)
  916. #if (ARCH_POWER)
  917. static void init_parameter(void) {
  918. #ifdef BUILD_BFLOAT16
  919. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  920. #endif
  921. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  922. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  923. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  924. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  925. #ifdef BUILD_BFLOAT16
  926. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  927. #endif
  928. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  929. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  930. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  931. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  932. #ifdef BUILD_BFLOAT16
  933. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  934. #endif
  935. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  936. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  937. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  938. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  939. }
  940. #else //POWER
  941. #if (ARCH_ZARCH)
  942. static void init_parameter(void) {
  943. #ifdef BUILD_BFLOAT16
  944. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  945. #endif
  946. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  947. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  948. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  949. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  950. #ifdef BUILD_BFLOAT16
  951. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  952. #endif
  953. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  954. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  955. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  956. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  957. #ifdef BUILD_BFLOAT16
  958. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  959. #endif
  960. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  961. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  962. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  963. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  964. }
  965. #else //ZARCH
  966. #ifdef ARCH_X86
  967. static int get_l2_size_old(void){
  968. int i, eax, ebx, ecx, edx, cpuid_level;
  969. int info[15];
  970. cpuid(2, &eax, &ebx, &ecx, &edx);
  971. info[ 0] = BITMASK(eax, 8, 0xff);
  972. info[ 1] = BITMASK(eax, 16, 0xff);
  973. info[ 2] = BITMASK(eax, 24, 0xff);
  974. info[ 3] = BITMASK(ebx, 0, 0xff);
  975. info[ 4] = BITMASK(ebx, 8, 0xff);
  976. info[ 5] = BITMASK(ebx, 16, 0xff);
  977. info[ 6] = BITMASK(ebx, 24, 0xff);
  978. info[ 7] = BITMASK(ecx, 0, 0xff);
  979. info[ 8] = BITMASK(ecx, 8, 0xff);
  980. info[ 9] = BITMASK(ecx, 16, 0xff);
  981. info[10] = BITMASK(ecx, 24, 0xff);
  982. info[11] = BITMASK(edx, 0, 0xff);
  983. info[12] = BITMASK(edx, 8, 0xff);
  984. info[13] = BITMASK(edx, 16, 0xff);
  985. info[14] = BITMASK(edx, 24, 0xff);
  986. for (i = 0; i < 15; i++){
  987. switch (info[i]){
  988. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  989. case 0x1a :
  990. return 96;
  991. case 0x39 :
  992. case 0x3b :
  993. case 0x41 :
  994. case 0x79 :
  995. case 0x81 :
  996. return 128;
  997. case 0x3a :
  998. return 192;
  999. case 0x21 :
  1000. case 0x3c :
  1001. case 0x42 :
  1002. case 0x7a :
  1003. case 0x7e :
  1004. case 0x82 :
  1005. return 256;
  1006. case 0x3d :
  1007. return 384;
  1008. case 0x3e :
  1009. case 0x43 :
  1010. case 0x7b :
  1011. case 0x7f :
  1012. case 0x83 :
  1013. case 0x86 :
  1014. return 512;
  1015. case 0x44 :
  1016. case 0x78 :
  1017. case 0x7c :
  1018. case 0x84 :
  1019. case 0x87 :
  1020. return 1024;
  1021. case 0x45 :
  1022. case 0x7d :
  1023. case 0x85 :
  1024. return 2048;
  1025. case 0x48 :
  1026. return 3184;
  1027. case 0x49 :
  1028. return 4096;
  1029. case 0x4e :
  1030. return 6144;
  1031. }
  1032. }
  1033. // return 0;
  1034. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1035. return 256;
  1036. }
  1037. #endif
  1038. static __inline__ int get_l2_size(void){
  1039. int eax, ebx, ecx, edx, l2;
  1040. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1041. l2 = BITMASK(ecx, 16, 0xffff);
  1042. #ifndef ARCH_X86
  1043. if (l2 <= 0) {
  1044. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1045. return 256;
  1046. }
  1047. return l2;
  1048. #else
  1049. if (l2 > 0) return l2;
  1050. return get_l2_size_old();
  1051. #endif
  1052. }
  1053. static __inline__ int get_l3_size(void){
  1054. int eax, ebx, ecx, edx;
  1055. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1056. return BITMASK(edx, 18, 0x3fff) * 512;
  1057. }
  1058. static void init_parameter(void) {
  1059. int l2 = get_l2_size();
  1060. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1061. /* where the GEMM unrolling parameters do not depend on l2 */
  1062. #ifdef BUILD_BFLOAT16
  1063. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1064. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1065. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1066. #endif
  1067. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1068. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1069. #endif
  1070. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1071. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1072. #endif
  1073. #if BUILD_COMPLEX == 1
  1074. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1075. #endif
  1076. #if BUILD_COMPLEX16==1
  1077. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1078. #endif
  1079. #if BUILD_COMPLEX == 1
  1080. #ifdef CGEMM3M_DEFAULT_Q
  1081. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1082. #else
  1083. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1084. #endif
  1085. #endif
  1086. #if BUILD_COMPLEX16 == 1
  1087. #ifdef ZGEMM3M_DEFAULT_Q
  1088. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1089. #else
  1090. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1091. #endif
  1092. #endif
  1093. #ifdef EXPRECISION
  1094. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1095. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1096. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1097. #endif
  1098. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1099. #ifdef DEBUG
  1100. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1101. #endif
  1102. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1103. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1104. #endif
  1105. #if BUILD_DOUBLE == 1
  1106. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1107. #endif
  1108. #if BUILD_COMPLEX==1
  1109. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1110. #endif
  1111. #if BUILD_COMPLEX16==1
  1112. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1113. #endif
  1114. #ifdef EXPRECISION
  1115. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1116. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1117. #endif
  1118. #endif
  1119. #ifdef CORE_NORTHWOOD
  1120. #ifdef DEBUG
  1121. fprintf(stderr, "Northwood\n");
  1122. #endif
  1123. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1124. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1125. #endif
  1126. #if BUILD_DOUBLE == 1
  1127. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1128. #endif
  1129. #if BUILD_COMPLEX==1
  1130. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1131. #endif
  1132. #if BUILD_COMPLEX16==1
  1133. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1134. #endif
  1135. #ifdef EXPRECISION
  1136. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1137. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1138. #endif
  1139. #endif
  1140. #ifdef ATOM
  1141. #ifdef DEBUG
  1142. fprintf(stderr, "Atom\n");
  1143. #endif
  1144. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1145. TABLE_NAME.sgemm_p = 256;
  1146. #endif
  1147. #if BUILD_DOUBLE ==1
  1148. TABLE_NAME.dgemm_p = 128;
  1149. #endif
  1150. #if BUILD_COMPLEX==1
  1151. TABLE_NAME.cgemm_p = 128;
  1152. #endif
  1153. #if BUILD_COMPLEX16==1
  1154. TABLE_NAME.zgemm_p = 64;
  1155. #endif
  1156. #ifdef EXPRECISION
  1157. TABLE_NAME.qgemm_p = 64;
  1158. TABLE_NAME.xgemm_p = 32;
  1159. #endif
  1160. #endif
  1161. #ifdef CORE_PRESCOTT
  1162. #ifdef DEBUG
  1163. fprintf(stderr, "Prescott\n");
  1164. #endif
  1165. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1166. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1167. #endif
  1168. #if BUILD_DOUBLE ==1
  1169. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1170. #endif
  1171. #if BUILD_COMPLEX==1
  1172. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1173. #endif
  1174. #if BUILD_COMPLEX16 == 1
  1175. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1176. #endif
  1177. #ifdef EXPRECISION
  1178. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1179. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1180. #endif
  1181. #endif
  1182. #ifdef CORE2
  1183. #ifdef DEBUG
  1184. fprintf(stderr, "Core2\n");
  1185. #endif
  1186. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1187. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1188. #endif
  1189. #if BUILD_DOUBLE==1
  1190. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1191. #endif
  1192. #if BUILD_COMPLEX==1
  1193. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1194. #endif
  1195. #if BUILD_COMPLEX16==1
  1196. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1197. #endif
  1198. #ifdef EXPRECISION
  1199. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1200. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1201. #endif
  1202. #endif
  1203. #ifdef PENRYN
  1204. #ifdef DEBUG
  1205. fprintf(stderr, "Penryn\n");
  1206. #endif
  1207. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1208. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1209. #endif
  1210. #if BUILD_DOUBLE == 1
  1211. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1212. #endif
  1213. #if BUILD_COMPLEX==1
  1214. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1215. #endif
  1216. #if BUILD_COMPLEX16==1
  1217. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1218. #endif
  1219. #ifdef EXPRECISION
  1220. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1221. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1222. #endif
  1223. #endif
  1224. #ifdef DUNNINGTON
  1225. #ifdef DEBUG
  1226. fprintf(stderr, "Dunnington\n");
  1227. #endif
  1228. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1229. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1230. #endif
  1231. #if BUILD_DOUBLE ==1
  1232. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1233. #endif
  1234. #if BUILD_COMPLEX==1
  1235. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1236. #endif
  1237. #if BUILD_COMPLEX16==1
  1238. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1239. #endif
  1240. #ifdef EXPRECISION
  1241. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1242. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1243. #endif
  1244. #endif
  1245. #ifdef NEHALEM
  1246. #ifdef DEBUG
  1247. fprintf(stderr, "Nehalem\n");
  1248. #endif
  1249. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1250. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1251. #endif
  1252. #if BUILD_DOUBLE
  1253. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1254. #endif
  1255. #if BUILD_COMPLEX
  1256. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1257. #endif
  1258. #if BUILD_COMPLEX16
  1259. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1260. #endif
  1261. #ifdef EXPRECISION
  1262. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1263. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1264. #endif
  1265. #endif
  1266. #ifdef SANDYBRIDGE
  1267. #ifdef DEBUG
  1268. fprintf(stderr, "Sandybridge\n");
  1269. #endif
  1270. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1271. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1272. #endif
  1273. #if BUILD_DOUBLE
  1274. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1275. #endif
  1276. #if BUILD_COMPLEX
  1277. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1278. #endif
  1279. #if BUILD_COMPLEX16
  1280. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1281. #endif
  1282. #ifdef EXPRECISION
  1283. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1284. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1285. #endif
  1286. #endif
  1287. #ifdef HASWELL
  1288. #ifdef DEBUG
  1289. fprintf(stderr, "Haswell\n");
  1290. #endif
  1291. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1292. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1293. #endif
  1294. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1295. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1296. #endif
  1297. #if BUILD_COMPLEX
  1298. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1299. #endif
  1300. #if BUILD_COMPLEX16
  1301. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1302. #endif
  1303. #ifdef EXPRECISION
  1304. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1305. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1306. #endif
  1307. #endif
  1308. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1309. #ifdef DEBUG
  1310. fprintf(stderr, "SkylakeX\n");
  1311. #endif
  1312. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1313. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1314. #endif
  1315. #if BUILD_DOUBLE
  1316. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1317. #endif
  1318. #if BUILD_COMPLEX
  1319. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1320. #endif
  1321. #if BUILD_COMPLEX16
  1322. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1323. #endif
  1324. #ifdef EXPRECISION
  1325. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1326. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1327. #endif
  1328. #endif
  1329. #ifdef OPTERON
  1330. #ifdef DEBUG
  1331. fprintf(stderr, "Opteron\n");
  1332. #endif
  1333. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1334. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1335. #endif
  1336. #if BUILD_DOUBLE
  1337. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1338. #endif
  1339. #if BUILD_COMPLEX
  1340. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1341. #endif
  1342. #if BUILD_COMPLEX16
  1343. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1344. #endif
  1345. #ifdef EXPRECISION
  1346. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1347. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1348. #endif
  1349. #endif
  1350. #ifdef BARCELONA
  1351. #ifdef DEBUG
  1352. fprintf(stderr, "Barcelona\n");
  1353. #endif
  1354. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1355. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1356. #endif
  1357. #if BUILD_DOUBLE
  1358. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1359. #endif
  1360. #if BUILD_COMPLEX
  1361. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1362. #endif
  1363. #if BUILD_COMPLEX16
  1364. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1365. #endif
  1366. #ifdef EXPRECISION
  1367. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1368. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1369. #endif
  1370. #endif
  1371. #ifdef BOBCAT
  1372. #ifdef DEBUG
  1373. fprintf(stderr, "Bobcate\n");
  1374. #endif
  1375. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1376. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1377. #endif
  1378. #if BUILD_DOUBLE
  1379. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1380. #endif
  1381. #if BUILD_COMPLEX
  1382. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1383. #endif
  1384. #if BUILD_COMPLEX16
  1385. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1386. #endif
  1387. #ifdef EXPRECISION
  1388. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1389. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1390. #endif
  1391. #endif
  1392. #ifdef BULLDOZER
  1393. #ifdef DEBUG
  1394. fprintf(stderr, "Bulldozer\n");
  1395. #endif
  1396. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1397. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1398. #endif
  1399. #if BUILD_DOUBLE
  1400. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1401. #endif
  1402. #if BUILD_COMPLEX
  1403. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1404. #endif
  1405. #if BUILD_COMPLEX16
  1406. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1407. #endif
  1408. #ifdef EXPRECISION
  1409. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1410. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1411. #endif
  1412. #endif
  1413. #ifdef EXCAVATOR
  1414. #ifdef DEBUG
  1415. fprintf(stderr, "Excavator\n");
  1416. #endif
  1417. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1418. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1419. #endif
  1420. #if BUILD_DOUBLE
  1421. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1422. #endif
  1423. #if BUILD_COMPLEX
  1424. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1425. #endif
  1426. #if BUILD_COMPLEX16
  1427. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1428. #endif
  1429. #ifdef EXPRECISION
  1430. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1431. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1432. #endif
  1433. #endif
  1434. #ifdef PILEDRIVER
  1435. #ifdef DEBUG
  1436. fprintf(stderr, "Piledriver\n");
  1437. #endif
  1438. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1439. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1440. #endif
  1441. #if BUILD_DOUBLE
  1442. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1443. #endif
  1444. #if BUILD_COMPLEX
  1445. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1446. #endif
  1447. #if BUILD_COMPLEX16
  1448. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1449. #endif
  1450. #ifdef EXPRECISION
  1451. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1452. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1453. #endif
  1454. #endif
  1455. #ifdef STEAMROLLER
  1456. #ifdef DEBUG
  1457. fprintf(stderr, "Steamroller\n");
  1458. #endif
  1459. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1460. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1461. #endif
  1462. #if BUILD_DOUBLE
  1463. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1464. #endif
  1465. #if BUILD_COMPLEX
  1466. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1467. #endif
  1468. #if BUILD_COMPLEX16
  1469. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1470. #endif
  1471. #ifdef EXPRECISION
  1472. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1473. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1474. #endif
  1475. #endif
  1476. #ifdef ZEN
  1477. #ifdef DEBUG
  1478. fprintf(stderr, "Zen\n");
  1479. #endif
  1480. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1481. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1482. #endif
  1483. #if BUILD_DOUBLE
  1484. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1485. #endif
  1486. #if BUILD_COMPLEX
  1487. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1488. #endif
  1489. #if BUILD_COMPLEX16
  1490. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1491. #endif
  1492. #ifdef EXPRECISION
  1493. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1494. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1495. #endif
  1496. #endif
  1497. #ifdef NANO
  1498. #ifdef DEBUG
  1499. fprintf(stderr, "NANO\n");
  1500. #endif
  1501. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1502. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1503. #endif
  1504. #if (BUILD_DOUBLE==1)
  1505. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1506. #endif
  1507. #if (BUILD_COMPLEX==1)
  1508. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1509. #endif
  1510. #if (BUILD_COMPLEX16==1)
  1511. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1512. #endif
  1513. #ifdef EXPRECISION
  1514. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1515. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1516. #endif
  1517. #endif
  1518. #if BUILD_COMPLEX==1
  1519. #ifdef CGEMM3M_DEFAULT_P
  1520. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1521. #else
  1522. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1523. #endif
  1524. #endif
  1525. #if BUILD_COMPLEX16==1
  1526. #ifdef ZGEMM3M_DEFAULT_P
  1527. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1528. #else
  1529. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1530. #endif
  1531. #endif
  1532. #ifdef EXPRECISION
  1533. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1534. #endif
  1535. #if BUILD_SINGLE == 1
  1536. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1537. #endif
  1538. #if BUILD_DOUBLE== 1
  1539. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1540. #endif
  1541. #if BUILD_COMPLEX==1
  1542. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1543. #endif
  1544. #if BUILD_COMPLEX16==1
  1545. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1546. #endif
  1547. #if BUILD_COMPLEX==1
  1548. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1549. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1550. #else
  1551. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1552. #endif
  1553. #endif
  1554. #if BUILD_COMPLEX16==1
  1555. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1556. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1557. #else
  1558. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1559. #endif
  1560. #endif
  1561. #ifdef QUAD_PRECISION
  1562. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1563. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1564. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1565. #endif
  1566. #ifdef DEBUG
  1567. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1568. #endif
  1569. #if BUILD_SINGLE==1
  1570. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1571. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1572. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1573. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1574. #endif
  1575. #if BUILD_DOUBLE==1
  1576. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1577. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1578. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1579. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1580. #endif
  1581. #ifdef EXPRECISION
  1582. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1583. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1584. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1585. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1586. #endif
  1587. #if BUILD_COMPLEX ==1
  1588. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1589. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1590. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1591. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1592. #endif
  1593. #if BUILD_COMPLEX16 ==1
  1594. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1595. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1596. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1597. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1598. #endif
  1599. #if BUILD_COMPLEX == 1
  1600. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1601. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1602. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1603. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1604. #endif
  1605. #if BUILD_COMPLEX16 == 1
  1606. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1607. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1608. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1609. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1610. #endif
  1611. #ifdef EXPRECISION
  1612. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1613. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1614. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1615. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1616. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1617. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1618. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1619. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1620. #endif
  1621. }
  1622. #endif //POWER
  1623. #endif //ZARCH
  1624. #endif //(ARCH_MIPS64)
  1625. #endif //(ARCH_ARM64)