You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 50 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. sbgemm_kernelTS, sbgemm_betaTS,
  66. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  67. sbgemm_incopyTS, sbgemm_itcopyTS,
  68. #else
  69. sbgemm_oncopyTS, sbgemm_otcopyTS,
  70. #endif
  71. sbgemm_oncopyTS, sbgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #endif
  104. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  105. 0, 0, 0,
  106. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  107. #ifdef SGEMM_DEFAULT_UNROLL_MN
  108. SGEMM_DEFAULT_UNROLL_MN,
  109. #else
  110. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  111. #endif
  112. #endif
  113. #ifdef HAVE_EXCLUSIVE_CACHE
  114. 1,
  115. #else
  116. 0,
  117. #endif
  118. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  119. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  120. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  121. snrm2_kTS, sasum_kTS,
  122. #endif
  123. #if BUILD_SINGLE == 1
  124. ssum_kTS,
  125. #endif
  126. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  127. scopy_kTS, sdot_kTS,
  128. // dsdot_kTS,
  129. srot_kTS, saxpy_kTS,
  130. #endif
  131. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  132. sscal_kTS,
  133. #endif
  134. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  135. sswap_kTS,
  136. sgemv_nTS, sgemv_tTS,
  137. #endif
  138. #if BUILD_SINGLE == 1
  139. sger_kTS,
  140. ssymv_LTS, ssymv_UTS,
  141. #endif
  142. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  143. #ifdef ARCH_X86_64
  144. sgemm_directTS,
  145. sgemm_direct_performantTS,
  146. #endif
  147. sgemm_kernelTS, sgemm_betaTS,
  148. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  149. sgemm_incopyTS, sgemm_itcopyTS,
  150. #else
  151. sgemm_oncopyTS, sgemm_otcopyTS,
  152. #endif
  153. sgemm_oncopyTS, sgemm_otcopyTS,
  154. #endif
  155. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  156. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  157. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  158. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  159. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  160. #else
  161. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  162. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  163. #endif
  164. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  165. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  166. #endif
  167. #if BUILD_SINGLE == 1
  168. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  169. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  170. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  171. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  172. #else
  173. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  174. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  175. #endif
  176. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  177. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  178. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  179. ssymm_iutcopyTS, ssymm_iltcopyTS,
  180. #else
  181. ssymm_outcopyTS, ssymm_oltcopyTS,
  182. #endif
  183. ssymm_outcopyTS, ssymm_oltcopyTS,
  184. #endif
  185. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  186. #ifndef NO_LAPACK
  187. sneg_tcopyTS, slaswp_ncopyTS,
  188. #else
  189. NULL,NULL,
  190. #endif
  191. #endif
  192. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  193. 0, 0, 0,
  194. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  195. #ifdef DGEMM_DEFAULT_UNROLL_MN
  196. DGEMM_DEFAULT_UNROLL_MN,
  197. #else
  198. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  199. #endif
  200. #endif
  201. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  202. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  203. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  204. dnrm2_kTS, dasum_kTS,
  205. #endif
  206. #if (BUILD_DOUBLE==1)
  207. dsum_kTS,
  208. #endif
  209. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  210. dcopy_kTS, ddot_kTS,
  211. #endif
  212. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  213. dsdot_kTS,
  214. #endif
  215. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  216. drot_kTS,
  217. daxpy_kTS,
  218. dscal_kTS,
  219. dswap_kTS,
  220. dgemv_nTS, dgemv_tTS,
  221. #endif
  222. #if (BUILD_DOUBLE==1)
  223. dger_kTS,
  224. dsymv_LTS, dsymv_UTS,
  225. #endif
  226. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  227. dgemm_kernelTS, dgemm_betaTS,
  228. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  229. dgemm_incopyTS, dgemm_itcopyTS,
  230. #else
  231. dgemm_oncopyTS, dgemm_otcopyTS,
  232. #endif
  233. dgemm_oncopyTS, dgemm_otcopyTS,
  234. #endif
  235. #if (BUILD_DOUBLE==1)
  236. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  237. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  238. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  239. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  240. #else
  241. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  242. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  243. #endif
  244. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  245. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  246. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  247. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  248. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  249. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  250. #else
  251. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  252. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  253. #endif
  254. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  255. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  256. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  257. dsymm_iutcopyTS, dsymm_iltcopyTS,
  258. #else
  259. dsymm_outcopyTS, dsymm_oltcopyTS,
  260. #endif
  261. dsymm_outcopyTS, dsymm_oltcopyTS,
  262. #ifndef NO_LAPACK
  263. dneg_tcopyTS, dlaswp_ncopyTS,
  264. #else
  265. NULL, NULL,
  266. #endif
  267. #endif
  268. #ifdef EXPRECISION
  269. 0, 0, 0,
  270. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  271. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  272. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  273. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  274. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  275. qgemv_nTS, qgemv_tTS, qger_kTS,
  276. qsymv_LTS, qsymv_UTS,
  277. qgemm_kernelTS, qgemm_betaTS,
  278. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  279. qgemm_incopyTS, qgemm_itcopyTS,
  280. #else
  281. qgemm_oncopyTS, qgemm_otcopyTS,
  282. #endif
  283. qgemm_oncopyTS, qgemm_otcopyTS,
  284. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  285. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  286. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  287. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  288. #else
  289. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  290. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  291. #endif
  292. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  293. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  294. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  295. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  296. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  297. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  298. #else
  299. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  300. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  301. #endif
  302. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  303. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  304. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  305. qsymm_iutcopyTS, qsymm_iltcopyTS,
  306. #else
  307. qsymm_outcopyTS, qsymm_oltcopyTS,
  308. #endif
  309. qsymm_outcopyTS, qsymm_oltcopyTS,
  310. #ifndef NO_LAPACK
  311. qneg_tcopyTS, qlaswp_ncopyTS,
  312. #else
  313. NULL, NULL,
  314. #endif
  315. #endif
  316. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  317. 0, 0, 0,
  318. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  319. #ifdef CGEMM_DEFAULT_UNROLL_MN
  320. CGEMM_DEFAULT_UNROLL_MN,
  321. #else
  322. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  323. #endif
  324. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  325. #endif
  326. #if (BUILD_COMPLEX)
  327. cnrm2_kTS, casum_kTS, csum_kTS,
  328. #endif
  329. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  330. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  331. #endif
  332. #if (BUILD_COMPLEX)
  333. csrot_kTS,
  334. #endif
  335. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  336. caxpy_kTS,
  337. caxpyc_kTS,
  338. cscal_kTS,
  339. cswap_kTS,
  340. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  341. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  342. #endif
  343. #if (BUILD_COMPLEX)
  344. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  345. csymv_LTS, csymv_UTS,
  346. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  347. #endif
  348. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  349. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  350. cgemm_betaTS,
  351. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  352. cgemm_incopyTS, cgemm_itcopyTS,
  353. #else
  354. cgemm_oncopyTS, cgemm_otcopyTS,
  355. #endif
  356. cgemm_oncopyTS, cgemm_otcopyTS,
  357. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  358. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  359. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  360. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  361. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  362. #else
  363. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  364. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  365. #endif
  366. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  367. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  368. #endif
  369. #if (BUILD_COMPLEX)
  370. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  371. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  372. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  373. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  374. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  375. #else
  376. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  377. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  378. #endif
  379. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  380. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  381. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  382. csymm_iutcopyTS, csymm_iltcopyTS,
  383. #else
  384. csymm_outcopyTS, csymm_oltcopyTS,
  385. #endif
  386. csymm_outcopyTS, csymm_oltcopyTS,
  387. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  388. chemm_iutcopyTS, chemm_iltcopyTS,
  389. #else
  390. chemm_outcopyTS, chemm_oltcopyTS,
  391. #endif
  392. chemm_outcopyTS, chemm_oltcopyTS,
  393. 0, 0, 0,
  394. #if (USE_GEMM3M)
  395. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  396. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  397. #else
  398. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  399. #endif
  400. cgemm3m_kernelTS,
  401. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  402. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  403. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  404. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  405. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  406. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  407. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  408. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  409. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  410. csymm3m_oucopybTS, csymm3m_olcopybTS,
  411. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  412. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  413. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  414. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  415. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  416. chemm3m_oucopybTS, chemm3m_olcopybTS,
  417. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  418. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  419. #else
  420. 0, 0, 0,
  421. NULL,
  422. NULL, NULL,
  423. NULL, NULL,
  424. NULL, NULL,
  425. NULL, NULL,
  426. NULL, NULL,
  427. NULL, NULL,
  428. NULL, NULL,
  429. NULL, NULL,
  430. NULL, NULL,
  431. NULL, NULL,
  432. NULL, NULL,
  433. NULL, NULL,
  434. NULL, NULL,
  435. NULL, NULL,
  436. NULL, NULL,
  437. NULL, NULL,
  438. NULL, NULL,
  439. NULL, NULL,
  440. #endif
  441. #endif
  442. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  443. #ifndef NO_LAPACK
  444. cneg_tcopyTS,
  445. claswp_ncopyTS,
  446. #else
  447. NULL, NULL,
  448. #endif
  449. #endif
  450. #if BUILD_COMPLEX16 == 1
  451. 0, 0, 0,
  452. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  453. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  454. ZGEMM_DEFAULT_UNROLL_MN,
  455. #else
  456. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  457. #endif
  458. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  459. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  460. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  461. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  462. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  463. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  464. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  465. zsymv_LTS, zsymv_UTS,
  466. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  467. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  468. zgemm_betaTS,
  469. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  470. zgemm_incopyTS, zgemm_itcopyTS,
  471. #else
  472. zgemm_oncopyTS, zgemm_otcopyTS,
  473. #endif
  474. zgemm_oncopyTS, zgemm_otcopyTS,
  475. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  476. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  477. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  478. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  479. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  480. #else
  481. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  482. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  483. #endif
  484. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  485. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  486. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  487. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  488. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  489. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  490. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  491. #else
  492. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  493. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  494. #endif
  495. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  496. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  497. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  498. zsymm_iutcopyTS, zsymm_iltcopyTS,
  499. #else
  500. zsymm_outcopyTS, zsymm_oltcopyTS,
  501. #endif
  502. zsymm_outcopyTS, zsymm_oltcopyTS,
  503. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  504. zhemm_iutcopyTS, zhemm_iltcopyTS,
  505. #else
  506. zhemm_outcopyTS, zhemm_oltcopyTS,
  507. #endif
  508. zhemm_outcopyTS, zhemm_oltcopyTS,
  509. 0, 0, 0,
  510. #if (USE_GEMM3M)
  511. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  512. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  513. #else
  514. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  515. #endif
  516. zgemm3m_kernelTS,
  517. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  518. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  519. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  520. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  521. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  522. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  523. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  524. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  525. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  526. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  527. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  528. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  529. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  530. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  531. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  532. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  533. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  534. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  535. #else
  536. 0, 0, 0,
  537. NULL,
  538. NULL, NULL,
  539. NULL, NULL,
  540. NULL, NULL,
  541. NULL, NULL,
  542. NULL, NULL,
  543. NULL, NULL,
  544. NULL, NULL,
  545. NULL, NULL,
  546. NULL, NULL,
  547. NULL, NULL,
  548. NULL, NULL,
  549. NULL, NULL,
  550. NULL, NULL,
  551. NULL, NULL,
  552. NULL, NULL,
  553. NULL, NULL,
  554. NULL, NULL,
  555. NULL, NULL,
  556. #endif
  557. #ifndef NO_LAPACK
  558. zneg_tcopyTS, zlaswp_ncopyTS,
  559. #else
  560. NULL, NULL,
  561. #endif
  562. #endif
  563. #ifdef EXPRECISION
  564. 0, 0, 0,
  565. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  566. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  567. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  568. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  569. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  570. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  571. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  572. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  573. xsymv_LTS, xsymv_UTS,
  574. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  575. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  576. xgemm_betaTS,
  577. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  578. xgemm_incopyTS, xgemm_itcopyTS,
  579. #else
  580. xgemm_oncopyTS, xgemm_otcopyTS,
  581. #endif
  582. xgemm_oncopyTS, xgemm_otcopyTS,
  583. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  584. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  585. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  586. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  587. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  588. #else
  589. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  590. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  591. #endif
  592. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  593. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  594. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  595. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  596. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  597. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  598. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  599. #else
  600. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  601. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  602. #endif
  603. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  604. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  605. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  606. xsymm_iutcopyTS, xsymm_iltcopyTS,
  607. #else
  608. xsymm_outcopyTS, xsymm_oltcopyTS,
  609. #endif
  610. xsymm_outcopyTS, xsymm_oltcopyTS,
  611. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  612. xhemm_iutcopyTS, xhemm_iltcopyTS,
  613. #else
  614. xhemm_outcopyTS, xhemm_oltcopyTS,
  615. #endif
  616. xhemm_outcopyTS, xhemm_oltcopyTS,
  617. 0, 0, 0,
  618. #if (USE_GEMM3M)
  619. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  620. xgemm3m_kernelTS,
  621. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  622. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  623. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  624. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  625. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  626. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  627. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  628. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  629. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  630. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  631. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  632. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  633. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  634. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  635. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  636. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  637. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  638. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  639. #else
  640. 0, 0, 0,
  641. NULL,
  642. NULL, NULL,
  643. NULL, NULL,
  644. NULL, NULL,
  645. NULL, NULL,
  646. NULL, NULL,
  647. NULL, NULL,
  648. NULL, NULL,
  649. NULL, NULL,
  650. NULL, NULL,
  651. NULL, NULL,
  652. NULL, NULL,
  653. NULL, NULL,
  654. NULL, NULL,
  655. NULL, NULL,
  656. NULL, NULL,
  657. NULL, NULL,
  658. NULL, NULL,
  659. NULL, NULL,
  660. #endif
  661. #ifndef NO_LAPACK
  662. xneg_tcopyTS, xlaswp_ncopyTS,
  663. #else
  664. NULL, NULL,
  665. #endif
  666. #endif
  667. init_parameter,
  668. SNUMOPT, DNUMOPT, QNUMOPT,
  669. #if BUILD_SINGLE == 1
  670. saxpby_kTS,
  671. #endif
  672. #if BUILD_DOUBLE == 1
  673. daxpby_kTS,
  674. #endif
  675. #if BUILD_COMPLEX == 1
  676. caxpby_kTS,
  677. #endif
  678. #if BUILD_COMPLEX16== 1
  679. zaxpby_kTS,
  680. #endif
  681. #if BUILD_SINGLE == 1
  682. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  683. #endif
  684. #if BUILD_DOUBLE== 1
  685. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  686. #endif
  687. #if BUILD_COMPLEX == 1
  688. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  689. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  690. #endif
  691. #if BUILD_COMPLEX16 == 1
  692. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  693. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  694. #endif
  695. #if BUILD_SINGLE == 1
  696. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  697. #endif
  698. #if BUILD_DOUBLE== 1
  699. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  700. #endif
  701. #if BUILD_COMPLEX== 1
  702. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  703. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  704. #endif
  705. #if BUILD_COMPLEX16==1
  706. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  707. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  708. #endif
  709. #if BUILD_SINGLE == 1
  710. sgeadd_kTS,
  711. #endif
  712. #if BUILD_DOUBLE==1
  713. dgeadd_kTS,
  714. #endif
  715. #if BUILD_COMPLEX==1
  716. cgeadd_kTS,
  717. #endif
  718. #if BUILD_COMPLEX16==1
  719. zgeadd_kTS
  720. #endif
  721. };
  722. #if (ARCH_ARM64)
  723. static void init_parameter(void) {
  724. #if (BUILD_BFLOAT16)
  725. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  726. #endif
  727. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  728. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  729. #endif
  730. #if BUILD_DOUBLE == 1
  731. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  732. #endif
  733. #if BUILD_COMPLEX==1
  734. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  735. #endif
  736. #if BUILD_COMPLEX16==1
  737. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  738. #endif
  739. #if (BUILD_BFLOAT16)
  740. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  741. #endif
  742. #if BUILD_SINGLE == 1
  743. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  744. #endif
  745. #if BUILD_DOUBLE== 1
  746. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  747. #endif
  748. #if BUILD_COMPLEX== 1
  749. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  750. #endif
  751. #if BUILD_COMPLEX16==1
  752. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  753. #endif
  754. #if (BUILD_BFLOAT16)
  755. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  756. #endif
  757. #if BUILD_SINGLE == 1
  758. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  759. #endif
  760. #if BUILD_DOUBLE==1
  761. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  762. #endif
  763. #if BUILD_COMPLEX==1
  764. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  765. #endif
  766. #if BUILD_COMPLEX16==1
  767. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  768. #endif
  769. #ifdef EXPRECISION
  770. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  771. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  772. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  773. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  774. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  775. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  776. #endif
  777. #if (USE_GEMM3M)
  778. #ifdef CGEMM3M_DEFAULT_P
  779. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  780. #else
  781. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  782. #endif
  783. #ifdef ZGEMM3M_DEFAULT_P
  784. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  785. #else
  786. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  787. #endif
  788. #ifdef CGEMM3M_DEFAULT_Q
  789. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  790. #else
  791. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  792. #endif
  793. #ifdef ZGEMM3M_DEFAULT_Q
  794. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  795. #else
  796. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  797. #endif
  798. #ifdef CGEMM3M_DEFAULT_R
  799. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  800. #else
  801. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  802. #endif
  803. #ifdef ZGEMM3M_DEFAULT_R
  804. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  805. #else
  806. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  807. #endif
  808. #ifdef EXPRECISION
  809. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  810. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  811. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  812. #endif
  813. #endif
  814. }
  815. #else // (ARCH_ARM64)
  816. #if defined(ARCH_MIPS64)
  817. static void init_parameter(void) {
  818. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  819. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  820. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  821. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  822. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  823. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  824. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  825. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  826. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  827. TABLE_NAME.dgemm_r = 640;
  828. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  829. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  830. #ifdef EXPRECISION
  831. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  832. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  833. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  834. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  835. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  836. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  837. #endif
  838. #if defined(USE_GEMM3M)
  839. #ifdef CGEMM3M_DEFAULT_P
  840. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  841. #else
  842. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  843. #endif
  844. #ifdef ZGEMM3M_DEFAULT_P
  845. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  846. #else
  847. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  848. #endif
  849. #ifdef CGEMM3M_DEFAULT_Q
  850. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  851. #else
  852. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  853. #endif
  854. #ifdef ZGEMM3M_DEFAULT_Q
  855. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  856. #else
  857. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  858. #endif
  859. #ifdef CGEMM3M_DEFAULT_R
  860. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  861. #else
  862. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  863. #endif
  864. #ifdef ZGEMM3M_DEFAULT_R
  865. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  866. #else
  867. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  868. #endif
  869. #ifdef EXPRECISION
  870. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  871. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  872. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  873. #endif
  874. #endif
  875. }
  876. #else // (ARCH_MIPS64)
  877. #if (ARCH_POWER)
  878. static void init_parameter(void) {
  879. #ifdef BUILD_BFLOAT16
  880. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  881. #endif
  882. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  883. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  884. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  885. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  886. #ifdef BUILD_BFLOAT16
  887. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  888. #endif
  889. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  890. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  891. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  892. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  893. #ifdef BUILD_BFLOAT16
  894. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  895. #endif
  896. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  897. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  898. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  899. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  900. }
  901. #else //POWER
  902. #if (ARCH_ZARCH)
  903. static void init_parameter(void) {
  904. #ifdef BUILD_BFLOAT16
  905. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  906. #endif
  907. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  908. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  909. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  910. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  911. #ifdef BUILD_BFLOAT16
  912. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  913. #endif
  914. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  915. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  916. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  917. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  918. #ifdef BUILD_BFLOAT16
  919. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  920. #endif
  921. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  922. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  923. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  924. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  925. }
  926. #else //ZARCH
  927. #ifdef ARCH_X86
  928. static int get_l2_size_old(void){
  929. int i, eax, ebx, ecx, edx, cpuid_level;
  930. int info[15];
  931. cpuid(2, &eax, &ebx, &ecx, &edx);
  932. info[ 0] = BITMASK(eax, 8, 0xff);
  933. info[ 1] = BITMASK(eax, 16, 0xff);
  934. info[ 2] = BITMASK(eax, 24, 0xff);
  935. info[ 3] = BITMASK(ebx, 0, 0xff);
  936. info[ 4] = BITMASK(ebx, 8, 0xff);
  937. info[ 5] = BITMASK(ebx, 16, 0xff);
  938. info[ 6] = BITMASK(ebx, 24, 0xff);
  939. info[ 7] = BITMASK(ecx, 0, 0xff);
  940. info[ 8] = BITMASK(ecx, 8, 0xff);
  941. info[ 9] = BITMASK(ecx, 16, 0xff);
  942. info[10] = BITMASK(ecx, 24, 0xff);
  943. info[11] = BITMASK(edx, 0, 0xff);
  944. info[12] = BITMASK(edx, 8, 0xff);
  945. info[13] = BITMASK(edx, 16, 0xff);
  946. info[14] = BITMASK(edx, 24, 0xff);
  947. for (i = 0; i < 15; i++){
  948. switch (info[i]){
  949. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  950. case 0x1a :
  951. return 96;
  952. case 0x39 :
  953. case 0x3b :
  954. case 0x41 :
  955. case 0x79 :
  956. case 0x81 :
  957. return 128;
  958. case 0x3a :
  959. return 192;
  960. case 0x21 :
  961. case 0x3c :
  962. case 0x42 :
  963. case 0x7a :
  964. case 0x7e :
  965. case 0x82 :
  966. return 256;
  967. case 0x3d :
  968. return 384;
  969. case 0x3e :
  970. case 0x43 :
  971. case 0x7b :
  972. case 0x7f :
  973. case 0x83 :
  974. case 0x86 :
  975. return 512;
  976. case 0x44 :
  977. case 0x78 :
  978. case 0x7c :
  979. case 0x84 :
  980. case 0x87 :
  981. return 1024;
  982. case 0x45 :
  983. case 0x7d :
  984. case 0x85 :
  985. return 2048;
  986. case 0x48 :
  987. return 3184;
  988. case 0x49 :
  989. return 4096;
  990. case 0x4e :
  991. return 6144;
  992. }
  993. }
  994. // return 0;
  995. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  996. return 256;
  997. }
  998. #endif
  999. static __inline__ int get_l2_size(void){
  1000. int eax, ebx, ecx, edx, l2;
  1001. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1002. l2 = BITMASK(ecx, 16, 0xffff);
  1003. #ifndef ARCH_X86
  1004. if (l2 <= 0) {
  1005. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1006. return 256;
  1007. }
  1008. return l2;
  1009. #else
  1010. if (l2 > 0) return l2;
  1011. return get_l2_size_old();
  1012. #endif
  1013. }
  1014. static __inline__ int get_l3_size(void){
  1015. int eax, ebx, ecx, edx;
  1016. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1017. return BITMASK(edx, 18, 0x3fff) * 512;
  1018. }
  1019. static void init_parameter(void) {
  1020. int l2 = get_l2_size();
  1021. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1022. /* where the GEMM unrolling parameters do not depend on l2 */
  1023. #ifdef BUILD_BFLOAT16
  1024. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1025. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1026. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1027. #endif
  1028. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1029. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1030. #endif
  1031. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1032. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1033. #endif
  1034. #if BUILD_COMPLEX == 1
  1035. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1036. #endif
  1037. #if BUILD_COMPLEX16==1
  1038. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1039. #endif
  1040. #if BUILD_COMPLEX == 1
  1041. #ifdef CGEMM3M_DEFAULT_Q
  1042. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1043. #else
  1044. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1045. #endif
  1046. #endif
  1047. #if BUILD_COMPLEX16 == 1
  1048. #ifdef ZGEMM3M_DEFAULT_Q
  1049. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1050. #else
  1051. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1052. #endif
  1053. #endif
  1054. #ifdef EXPRECISION
  1055. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1056. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1057. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1058. #endif
  1059. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1060. #ifdef DEBUG
  1061. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1062. #endif
  1063. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1064. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1065. #endif
  1066. #if BUILD_DOUBLE == 1
  1067. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1068. #endif
  1069. #if BUILD_COMPLEX==1
  1070. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1071. #endif
  1072. #if BUILD_COMPLEX16==1
  1073. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1074. #endif
  1075. #ifdef EXPRECISION
  1076. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1077. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1078. #endif
  1079. #endif
  1080. #ifdef CORE_NORTHWOOD
  1081. #ifdef DEBUG
  1082. fprintf(stderr, "Northwood\n");
  1083. #endif
  1084. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1085. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1086. #endif
  1087. #if BUILD_DOUBLE == 1
  1088. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1089. #endif
  1090. #if BUILD_COMPLEX==1
  1091. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1092. #endif
  1093. #if BUILD_COMPLEX16==1
  1094. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1095. #endif
  1096. #ifdef EXPRECISION
  1097. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1098. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1099. #endif
  1100. #endif
  1101. #ifdef ATOM
  1102. #ifdef DEBUG
  1103. fprintf(stderr, "Atom\n");
  1104. #endif
  1105. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1106. TABLE_NAME.sgemm_p = 256;
  1107. #endif
  1108. #if BUILD_DOUBLE ==1
  1109. TABLE_NAME.dgemm_p = 128;
  1110. #endif
  1111. #if BUILD_COMPLEX==1
  1112. TABLE_NAME.cgemm_p = 128;
  1113. #endif
  1114. #if BUILD_COMPLEX16==1
  1115. TABLE_NAME.zgemm_p = 64;
  1116. #endif
  1117. #ifdef EXPRECISION
  1118. TABLE_NAME.qgemm_p = 64;
  1119. TABLE_NAME.xgemm_p = 32;
  1120. #endif
  1121. #endif
  1122. #ifdef CORE_PRESCOTT
  1123. #ifdef DEBUG
  1124. fprintf(stderr, "Prescott\n");
  1125. #endif
  1126. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1127. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1128. #endif
  1129. #if BUILD_DOUBLE ==1
  1130. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1131. #endif
  1132. #if BUILD_COMPLEX==1
  1133. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1134. #endif
  1135. #if BUILD_COMPLEX16 == 1
  1136. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1137. #endif
  1138. #ifdef EXPRECISION
  1139. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1140. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1141. #endif
  1142. #endif
  1143. #ifdef CORE2
  1144. #ifdef DEBUG
  1145. fprintf(stderr, "Core2\n");
  1146. #endif
  1147. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1148. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1149. #endif
  1150. #if BUILD_DOUBLE==1
  1151. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1152. #endif
  1153. #if BUILD_COMPLEX==1
  1154. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1155. #endif
  1156. #if BUILD_COMPLEX16==1
  1157. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1158. #endif
  1159. #ifdef EXPRECISION
  1160. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1161. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1162. #endif
  1163. #endif
  1164. #ifdef PENRYN
  1165. #ifdef DEBUG
  1166. fprintf(stderr, "Penryn\n");
  1167. #endif
  1168. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1169. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1170. #endif
  1171. #if BUILD_DOUBLE == 1
  1172. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1173. #endif
  1174. #if BUILD_COMPLEX==1
  1175. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1176. #endif
  1177. #if BUILD_COMPLEX16==1
  1178. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1179. #endif
  1180. #ifdef EXPRECISION
  1181. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1182. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1183. #endif
  1184. #endif
  1185. #ifdef DUNNINGTON
  1186. #ifdef DEBUG
  1187. fprintf(stderr, "Dunnington\n");
  1188. #endif
  1189. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1190. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1191. #endif
  1192. #if BUILD_DOUBLE ==1
  1193. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1194. #endif
  1195. #if BUILD_COMPLEX==1
  1196. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1197. #endif
  1198. #if BUILD_COMPLEX16==1
  1199. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1200. #endif
  1201. #ifdef EXPRECISION
  1202. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1203. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1204. #endif
  1205. #endif
  1206. #ifdef NEHALEM
  1207. #ifdef DEBUG
  1208. fprintf(stderr, "Nehalem\n");
  1209. #endif
  1210. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1211. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1212. #endif
  1213. #if BUILD_DOUBLE
  1214. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1215. #endif
  1216. #if BUILD_COMPLEX
  1217. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1218. #endif
  1219. #if BUILD_COMPLEX16
  1220. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1221. #endif
  1222. #ifdef EXPRECISION
  1223. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1224. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1225. #endif
  1226. #endif
  1227. #ifdef SANDYBRIDGE
  1228. #ifdef DEBUG
  1229. fprintf(stderr, "Sandybridge\n");
  1230. #endif
  1231. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1232. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1233. #endif
  1234. #if BUILD_DOUBLE
  1235. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1236. #endif
  1237. #if BUILD_COMPLEX
  1238. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1239. #endif
  1240. #if BUILD_COMPLEX16
  1241. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1242. #endif
  1243. #ifdef EXPRECISION
  1244. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1245. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1246. #endif
  1247. #endif
  1248. #ifdef HASWELL
  1249. #ifdef DEBUG
  1250. fprintf(stderr, "Haswell\n");
  1251. #endif
  1252. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1253. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1254. #endif
  1255. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1256. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1257. #endif
  1258. #if BUILD_COMPLEX
  1259. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1260. #endif
  1261. #if BUILD_COMPLEX16
  1262. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1263. #endif
  1264. #ifdef EXPRECISION
  1265. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1266. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1267. #endif
  1268. #endif
  1269. #if defined(SKYLAKEX) || defined(COOPERLAKE)
  1270. #ifdef DEBUG
  1271. fprintf(stderr, "SkylakeX\n");
  1272. #endif
  1273. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1274. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1275. #endif
  1276. #if BUILD_DOUBLE
  1277. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1278. #endif
  1279. #if BUILD_COMPLEX
  1280. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1281. #endif
  1282. #if BUILD_COMPLEX16
  1283. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1284. #endif
  1285. #ifdef EXPRECISION
  1286. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1287. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1288. #endif
  1289. #endif
  1290. #ifdef OPTERON
  1291. #ifdef DEBUG
  1292. fprintf(stderr, "Opteron\n");
  1293. #endif
  1294. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1295. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1296. #endif
  1297. #if BUILD_DOUBLE
  1298. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1299. #endif
  1300. #if BUILD_COMPLEX
  1301. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1302. #endif
  1303. #if BUILD_COMPLEX16
  1304. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1305. #endif
  1306. #ifdef EXPRECISION
  1307. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1308. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1309. #endif
  1310. #endif
  1311. #ifdef BARCELONA
  1312. #ifdef DEBUG
  1313. fprintf(stderr, "Barcelona\n");
  1314. #endif
  1315. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1316. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1317. #endif
  1318. #if BUILD_DOUBLE
  1319. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1320. #endif
  1321. #if BUILD_COMPLEX
  1322. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1323. #endif
  1324. #if BUILD_COMPLEX16
  1325. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1326. #endif
  1327. #ifdef EXPRECISION
  1328. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1329. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1330. #endif
  1331. #endif
  1332. #ifdef BOBCAT
  1333. #ifdef DEBUG
  1334. fprintf(stderr, "Bobcate\n");
  1335. #endif
  1336. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1337. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1338. #endif
  1339. #if BUILD_DOUBLE
  1340. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1341. #endif
  1342. #if BUILD_COMPLEX
  1343. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1344. #endif
  1345. #if BUILD_COMPLEX16
  1346. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1347. #endif
  1348. #ifdef EXPRECISION
  1349. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1350. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1351. #endif
  1352. #endif
  1353. #ifdef BULLDOZER
  1354. #ifdef DEBUG
  1355. fprintf(stderr, "Bulldozer\n");
  1356. #endif
  1357. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1358. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1359. #endif
  1360. #if BUILD_DOUBLE
  1361. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1362. #endif
  1363. #if BUILD_COMPLEX
  1364. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1365. #endif
  1366. #if BUILD_COMPLEX16
  1367. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1368. #endif
  1369. #ifdef EXPRECISION
  1370. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1371. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1372. #endif
  1373. #endif
  1374. #ifdef EXCAVATOR
  1375. #ifdef DEBUG
  1376. fprintf(stderr, "Excavator\n");
  1377. #endif
  1378. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1379. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1380. #endif
  1381. #if BUILD_DOUBLE
  1382. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1383. #endif
  1384. #if BUILD_COMPLEX
  1385. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1386. #endif
  1387. #if BUILD_COMPLEX16
  1388. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1389. #endif
  1390. #ifdef EXPRECISION
  1391. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1392. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1393. #endif
  1394. #endif
  1395. #ifdef PILEDRIVER
  1396. #ifdef DEBUG
  1397. fprintf(stderr, "Piledriver\n");
  1398. #endif
  1399. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1400. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1401. #endif
  1402. #if BUILD_DOUBLE
  1403. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1404. #endif
  1405. #if BUILD_COMPLEX
  1406. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1407. #endif
  1408. #if BUILD_COMPLEX16
  1409. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1410. #endif
  1411. #ifdef EXPRECISION
  1412. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1413. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1414. #endif
  1415. #endif
  1416. #ifdef STEAMROLLER
  1417. #ifdef DEBUG
  1418. fprintf(stderr, "Steamroller\n");
  1419. #endif
  1420. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1421. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1422. #endif
  1423. #if BUILD_DOUBLE
  1424. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1425. #endif
  1426. #if BUILD_COMPLEX
  1427. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1428. #endif
  1429. #if BUILD_COMPLEX16
  1430. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1431. #endif
  1432. #ifdef EXPRECISION
  1433. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1434. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1435. #endif
  1436. #endif
  1437. #ifdef ZEN
  1438. #ifdef DEBUG
  1439. fprintf(stderr, "Zen\n");
  1440. #endif
  1441. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1442. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1443. #endif
  1444. #if BUILD_DOUBLE
  1445. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1446. #endif
  1447. #if BUILD_COMPLEX
  1448. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1449. #endif
  1450. #if BUILD_COMPLEX16
  1451. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1452. #endif
  1453. #ifdef EXPRECISION
  1454. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1455. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1456. #endif
  1457. #endif
  1458. #ifdef NANO
  1459. #ifdef DEBUG
  1460. fprintf(stderr, "NANO\n");
  1461. #endif
  1462. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1463. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1464. #endif
  1465. #if (BUILD_DOUBLE==1)
  1466. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1467. #endif
  1468. #if (BUILD_COMPLEX==1)
  1469. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1470. #endif
  1471. #if (BUILD_COMPLEX16==1)
  1472. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1473. #endif
  1474. #ifdef EXPRECISION
  1475. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1476. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1477. #endif
  1478. #endif
  1479. #if BUILD_COMPLEX==1
  1480. #ifdef CGEMM3M_DEFAULT_P
  1481. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1482. #else
  1483. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1484. #endif
  1485. #endif
  1486. #if BUILD_COMPLEX16==1
  1487. #ifdef ZGEMM3M_DEFAULT_P
  1488. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1489. #else
  1490. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1491. #endif
  1492. #endif
  1493. #ifdef EXPRECISION
  1494. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1495. #endif
  1496. #if BUILD_SINGLE == 1
  1497. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1498. #endif
  1499. #if BUILD_DOUBLE== 1
  1500. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1501. #endif
  1502. #if BUILD_COMPLEX==1
  1503. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1504. #endif
  1505. #if BUILD_COMPLEX16==1
  1506. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1507. #endif
  1508. #if BUILD_COMPLEX==1
  1509. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1510. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1511. #else
  1512. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1513. #endif
  1514. #endif
  1515. #if BUILD_COMPLEX16==1
  1516. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1517. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1518. #else
  1519. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1520. #endif
  1521. #endif
  1522. #ifdef QUAD_PRECISION
  1523. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1524. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1525. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1526. #endif
  1527. #ifdef DEBUG
  1528. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1529. #endif
  1530. #if BUILD_SINGLE==1
  1531. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1532. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1533. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1534. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1535. #endif
  1536. #if BUILD_DOUBLE==1
  1537. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1538. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1539. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1540. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1541. #endif
  1542. #ifdef EXPRECISION
  1543. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1544. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1545. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1546. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1547. #endif
  1548. #if BUILD_COMPLEX ==1
  1549. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1550. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1551. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1552. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1553. #endif
  1554. #if BUILD_COMPLEX16 ==1
  1555. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1556. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1557. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1558. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1559. #endif
  1560. #if BUILD_COMPLEX == 1
  1561. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1562. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1563. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1564. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1565. #endif
  1566. #if BUILD_COMPLEX16 == 1
  1567. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1568. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1569. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1570. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1571. #endif
  1572. #ifdef EXPRECISION
  1573. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1574. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1575. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1576. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1577. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1578. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1579. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1580. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1581. #endif
  1582. }
  1583. #endif //POWER
  1584. #endif //ZARCH
  1585. #endif //(ARCH_MIPS64)
  1586. #endif //(ARCH_ARM64)