You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 48 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. shstobf16_kTS, shdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, shdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sgemv_nTS, sgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. shgemm_kernelTS, shgemm_betaTS,
  66. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  67. shgemm_incopyTS, shgemm_itcopyTS,
  68. #else
  69. shgemm_oncopyTS, shgemm_otcopyTS,
  70. #endif
  71. shgemm_oncopyTS, shgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #endif
  104. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  105. 0, 0, 0,
  106. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  107. #ifdef SGEMM_DEFAULT_UNROLL_MN
  108. SGEMM_DEFAULT_UNROLL_MN,
  109. #else
  110. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  111. #endif
  112. #endif
  113. #ifdef HAVE_EXCLUSIVE_CACHE
  114. 1,
  115. #else
  116. 0,
  117. #endif
  118. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  119. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  120. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  121. snrm2_kTS, sasum_kTS,
  122. #endif
  123. #if BUILD_SINGLE == 1
  124. ssum_kTS,
  125. #endif
  126. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  127. scopy_kTS, sdot_kTS,
  128. // dsdot_kTS,
  129. srot_kTS, saxpy_kTS,
  130. #endif
  131. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  132. sscal_kTS,
  133. #endif
  134. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  135. sswap_kTS,
  136. sgemv_nTS, sgemv_tTS,
  137. #endif
  138. #if BUILD_SINGLE == 1
  139. sger_kTS,
  140. ssymv_LTS, ssymv_UTS,
  141. #endif
  142. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  143. #ifdef ARCH_X86_64
  144. sgemm_directTS,
  145. sgemm_direct_performantTS,
  146. #endif
  147. sgemm_kernelTS, sgemm_betaTS,
  148. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  149. sgemm_incopyTS, sgemm_itcopyTS,
  150. #else
  151. sgemm_oncopyTS, sgemm_otcopyTS,
  152. #endif
  153. sgemm_oncopyTS, sgemm_otcopyTS,
  154. #endif
  155. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  156. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  157. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  158. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  159. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  160. #else
  161. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  162. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  163. #endif
  164. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  165. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  166. #endif
  167. #if BUILD_SINGLE == 1
  168. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  169. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  170. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  171. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  172. #else
  173. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  174. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  175. #endif
  176. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  177. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  178. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  179. ssymm_iutcopyTS, ssymm_iltcopyTS,
  180. #else
  181. ssymm_outcopyTS, ssymm_oltcopyTS,
  182. #endif
  183. ssymm_outcopyTS, ssymm_oltcopyTS,
  184. #endif
  185. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  186. #ifndef NO_LAPACK
  187. sneg_tcopyTS, slaswp_ncopyTS,
  188. #else
  189. NULL,NULL,
  190. #endif
  191. #endif
  192. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  193. 0, 0, 0,
  194. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  195. #ifdef DGEMM_DEFAULT_UNROLL_MN
  196. DGEMM_DEFAULT_UNROLL_MN,
  197. #else
  198. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  199. #endif
  200. #endif
  201. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  202. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  203. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  204. dnrm2_kTS, dasum_kTS,
  205. #endif
  206. #if (BUILD_DOUBLE==1)
  207. dsum_kTS,
  208. #endif
  209. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  210. dcopy_kTS, ddot_kTS,
  211. #endif
  212. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  213. dsdot_kTS,
  214. #endif
  215. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  216. drot_kTS,
  217. daxpy_kTS,
  218. dscal_kTS,
  219. dswap_kTS,
  220. dgemv_nTS, dgemv_tTS,
  221. #endif
  222. #if (BUILD_DOUBLE==1)
  223. dger_kTS,
  224. dsymv_LTS, dsymv_UTS,
  225. #endif
  226. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  227. dgemm_kernelTS, dgemm_betaTS,
  228. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  229. dgemm_incopyTS, dgemm_itcopyTS,
  230. #else
  231. dgemm_oncopyTS, dgemm_otcopyTS,
  232. #endif
  233. dgemm_oncopyTS, dgemm_otcopyTS,
  234. #endif
  235. #if (BUILD_DOUBLE==1)
  236. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  237. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  238. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  239. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  240. #else
  241. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  242. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  243. #endif
  244. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  245. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  246. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  247. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  248. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  249. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  250. #else
  251. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  252. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  253. #endif
  254. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  255. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  256. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  257. dsymm_iutcopyTS, dsymm_iltcopyTS,
  258. #else
  259. dsymm_outcopyTS, dsymm_oltcopyTS,
  260. #endif
  261. dsymm_outcopyTS, dsymm_oltcopyTS,
  262. #ifndef NO_LAPACK
  263. dneg_tcopyTS, dlaswp_ncopyTS,
  264. #else
  265. NULL, NULL,
  266. #endif
  267. #endif
  268. #ifdef EXPRECISION
  269. 0, 0, 0,
  270. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  271. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  272. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  273. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  274. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  275. qgemv_nTS, qgemv_tTS, qger_kTS,
  276. qsymv_LTS, qsymv_UTS,
  277. qgemm_kernelTS, qgemm_betaTS,
  278. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  279. qgemm_incopyTS, qgemm_itcopyTS,
  280. #else
  281. qgemm_oncopyTS, qgemm_otcopyTS,
  282. #endif
  283. qgemm_oncopyTS, qgemm_otcopyTS,
  284. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  285. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  286. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  287. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  288. #else
  289. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  290. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  291. #endif
  292. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  293. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  294. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  295. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  296. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  297. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  298. #else
  299. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  300. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  301. #endif
  302. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  303. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  304. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  305. qsymm_iutcopyTS, qsymm_iltcopyTS,
  306. #else
  307. qsymm_outcopyTS, qsymm_oltcopyTS,
  308. #endif
  309. qsymm_outcopyTS, qsymm_oltcopyTS,
  310. #ifndef NO_LAPACK
  311. qneg_tcopyTS, qlaswp_ncopyTS,
  312. #else
  313. NULL, NULL,
  314. #endif
  315. #endif
  316. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  317. 0, 0, 0,
  318. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  319. #ifdef CGEMM_DEFAULT_UNROLL_MN
  320. CGEMM_DEFAULT_UNROLL_MN,
  321. #else
  322. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  323. #endif
  324. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  325. #endif
  326. #if (BUILD_COMPLEX)
  327. cnrm2_kTS, casum_kTS, csum_kTS,
  328. #endif
  329. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  330. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  331. #endif
  332. #if (BUILD_COMPLEX)
  333. csrot_kTS,
  334. #endif
  335. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  336. caxpy_kTS,
  337. caxpyc_kTS,
  338. cscal_kTS,
  339. cswap_kTS,
  340. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  341. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  342. #endif
  343. #if (BUILD_COMPLEX)
  344. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  345. csymv_LTS, csymv_UTS,
  346. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  347. #endif
  348. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  349. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  350. cgemm_betaTS,
  351. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  352. cgemm_incopyTS, cgemm_itcopyTS,
  353. #else
  354. cgemm_oncopyTS, cgemm_otcopyTS,
  355. #endif
  356. cgemm_oncopyTS, cgemm_otcopyTS,
  357. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  358. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  359. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  360. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  361. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  362. #else
  363. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  364. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  365. #endif
  366. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  367. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  368. #endif
  369. #if (BUILD_COMPLEX)
  370. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  371. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  372. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  373. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  374. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  375. #else
  376. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  377. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  378. #endif
  379. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  380. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  381. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  382. csymm_iutcopyTS, csymm_iltcopyTS,
  383. #else
  384. csymm_outcopyTS, csymm_oltcopyTS,
  385. #endif
  386. csymm_outcopyTS, csymm_oltcopyTS,
  387. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  388. chemm_iutcopyTS, chemm_iltcopyTS,
  389. #else
  390. chemm_outcopyTS, chemm_oltcopyTS,
  391. #endif
  392. chemm_outcopyTS, chemm_oltcopyTS,
  393. 0, 0, 0,
  394. #if (USE_GEMM3M)
  395. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  396. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  397. #else
  398. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  399. #endif
  400. cgemm3m_kernelTS,
  401. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  402. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  403. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  404. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  405. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  406. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  407. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  408. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  409. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  410. csymm3m_oucopybTS, csymm3m_olcopybTS,
  411. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  412. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  413. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  414. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  415. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  416. chemm3m_oucopybTS, chemm3m_olcopybTS,
  417. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  418. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  419. #else
  420. 0, 0, 0,
  421. NULL,
  422. NULL, NULL,
  423. NULL, NULL,
  424. NULL, NULL,
  425. NULL, NULL,
  426. NULL, NULL,
  427. NULL, NULL,
  428. NULL, NULL,
  429. NULL, NULL,
  430. NULL, NULL,
  431. NULL, NULL,
  432. NULL, NULL,
  433. NULL, NULL,
  434. NULL, NULL,
  435. NULL, NULL,
  436. NULL, NULL,
  437. NULL, NULL,
  438. NULL, NULL,
  439. NULL, NULL,
  440. #endif
  441. #endif
  442. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  443. #ifndef NO_LAPACK
  444. cneg_tcopyTS,
  445. claswp_ncopyTS,
  446. #else
  447. NULL, NULL,
  448. #endif
  449. #endif
  450. #if BUILD_COMPLEX16 == 1
  451. 0, 0, 0,
  452. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  453. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  454. ZGEMM_DEFAULT_UNROLL_MN,
  455. #else
  456. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  457. #endif
  458. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  459. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  460. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  461. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  462. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  463. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  464. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  465. zsymv_LTS, zsymv_UTS,
  466. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  467. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  468. zgemm_betaTS,
  469. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  470. zgemm_incopyTS, zgemm_itcopyTS,
  471. #else
  472. zgemm_oncopyTS, zgemm_otcopyTS,
  473. #endif
  474. zgemm_oncopyTS, zgemm_otcopyTS,
  475. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  476. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  477. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  478. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  479. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  480. #else
  481. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  482. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  483. #endif
  484. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  485. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  486. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  487. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  488. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  489. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  490. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  491. #else
  492. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  493. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  494. #endif
  495. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  496. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  497. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  498. zsymm_iutcopyTS, zsymm_iltcopyTS,
  499. #else
  500. zsymm_outcopyTS, zsymm_oltcopyTS,
  501. #endif
  502. zsymm_outcopyTS, zsymm_oltcopyTS,
  503. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  504. zhemm_iutcopyTS, zhemm_iltcopyTS,
  505. #else
  506. zhemm_outcopyTS, zhemm_oltcopyTS,
  507. #endif
  508. zhemm_outcopyTS, zhemm_oltcopyTS,
  509. 0, 0, 0,
  510. #if (USE_GEMM3M)
  511. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  512. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  513. #else
  514. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  515. #endif
  516. zgemm3m_kernelTS,
  517. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  518. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  519. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  520. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  521. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  522. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  523. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  524. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  525. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  526. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  527. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  528. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  529. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  530. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  531. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  532. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  533. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  534. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  535. #else
  536. 0, 0, 0,
  537. NULL,
  538. NULL, NULL,
  539. NULL, NULL,
  540. NULL, NULL,
  541. NULL, NULL,
  542. NULL, NULL,
  543. NULL, NULL,
  544. NULL, NULL,
  545. NULL, NULL,
  546. NULL, NULL,
  547. NULL, NULL,
  548. NULL, NULL,
  549. NULL, NULL,
  550. NULL, NULL,
  551. NULL, NULL,
  552. NULL, NULL,
  553. NULL, NULL,
  554. NULL, NULL,
  555. NULL, NULL,
  556. #endif
  557. #ifndef NO_LAPACK
  558. zneg_tcopyTS, zlaswp_ncopyTS,
  559. #else
  560. NULL, NULL,
  561. #endif
  562. #endif
  563. #ifdef EXPRECISION
  564. 0, 0, 0,
  565. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  566. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  567. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  568. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  569. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  570. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  571. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  572. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  573. xsymv_LTS, xsymv_UTS,
  574. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  575. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  576. xgemm_betaTS,
  577. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  578. xgemm_incopyTS, xgemm_itcopyTS,
  579. #else
  580. xgemm_oncopyTS, xgemm_otcopyTS,
  581. #endif
  582. xgemm_oncopyTS, xgemm_otcopyTS,
  583. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  584. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  585. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  586. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  587. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  588. #else
  589. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  590. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  591. #endif
  592. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  593. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  594. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  595. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  596. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  597. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  598. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  599. #else
  600. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  601. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  602. #endif
  603. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  604. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  605. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  606. xsymm_iutcopyTS, xsymm_iltcopyTS,
  607. #else
  608. xsymm_outcopyTS, xsymm_oltcopyTS,
  609. #endif
  610. xsymm_outcopyTS, xsymm_oltcopyTS,
  611. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  612. xhemm_iutcopyTS, xhemm_iltcopyTS,
  613. #else
  614. xhemm_outcopyTS, xhemm_oltcopyTS,
  615. #endif
  616. xhemm_outcopyTS, xhemm_oltcopyTS,
  617. 0, 0, 0,
  618. #if (USE_GEMM3M)
  619. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  620. xgemm3m_kernelTS,
  621. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  622. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  623. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  624. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  625. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  626. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  627. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  628. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  629. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  630. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  631. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  632. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  633. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  634. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  635. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  636. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  637. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  638. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  639. #else
  640. 0, 0, 0,
  641. NULL,
  642. NULL, NULL,
  643. NULL, NULL,
  644. NULL, NULL,
  645. NULL, NULL,
  646. NULL, NULL,
  647. NULL, NULL,
  648. NULL, NULL,
  649. NULL, NULL,
  650. NULL, NULL,
  651. NULL, NULL,
  652. NULL, NULL,
  653. NULL, NULL,
  654. NULL, NULL,
  655. NULL, NULL,
  656. NULL, NULL,
  657. NULL, NULL,
  658. NULL, NULL,
  659. NULL, NULL,
  660. #endif
  661. #ifndef NO_LAPACK
  662. xneg_tcopyTS, xlaswp_ncopyTS,
  663. #else
  664. NULL, NULL,
  665. #endif
  666. #endif
  667. init_parameter,
  668. SNUMOPT, DNUMOPT, QNUMOPT,
  669. #if BUILD_SINGLE == 1
  670. saxpby_kTS,
  671. #endif
  672. #if BUILD_DOUBLE == 1
  673. daxpby_kTS,
  674. #endif
  675. #if BUILD_COMPLEX == 1
  676. caxpby_kTS,
  677. #endif
  678. #if BUILD_COMPLEX16== 1
  679. zaxpby_kTS,
  680. #endif
  681. #if BUILD_SINGLE == 1
  682. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  683. #endif
  684. #if BUILD_DOUBLE== 1
  685. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  686. #endif
  687. #if BUILD_COMPLEX == 1
  688. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  689. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  690. #endif
  691. #if BUILD_COMPLEX16 == 1
  692. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  693. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  694. #endif
  695. #if BUILD_SINGLE == 1
  696. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  697. #endif
  698. #if BUILD_DOUBLE== 1
  699. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  700. #endif
  701. #if BUILD_COMPLEX== 1
  702. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  703. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  704. #endif
  705. #if BUILD_COMPLEX16==1
  706. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  707. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  708. #endif
  709. #if BUILD_SINGLE == 1
  710. sgeadd_kTS,
  711. #endif
  712. #if BUILD_DOUBLE==1
  713. dgeadd_kTS,
  714. #endif
  715. #if BUILD_COMPLEX==1
  716. cgeadd_kTS,
  717. #endif
  718. #if BUILD_COMPLEX16==1
  719. zgeadd_kTS
  720. #endif
  721. };
  722. #if (ARCH_ARM64)
  723. static void init_parameter(void) {
  724. #if (BUILD_HALF)
  725. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  726. #endif
  727. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  728. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  729. #endif
  730. #if BUILD_DOUBLE == 1
  731. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  732. #endif
  733. #if BUILD_COMPLEX==1
  734. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  735. #endif
  736. #if BUILD_COMPLEX16==1
  737. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  738. #endif
  739. #if (BUILD_HALF)
  740. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  741. #endif
  742. #if BUILD_SINGLE == 1
  743. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  744. #endif
  745. #if BUILD_DOUBLE== 1
  746. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  747. #endif
  748. #if BUILD_COMPLEX== 1
  749. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  750. #endif
  751. #if BUILD_COMPLEX16==1
  752. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  753. #endif
  754. #if (BUILD_HALF)
  755. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  756. #endif
  757. #if BUILD_SINGLE == 1
  758. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  759. #endif
  760. #if BUILD_DOUBLE==1
  761. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  762. #endif
  763. #if BUILD_COMPLEX==1
  764. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  765. #endif
  766. #if BUILD_COMPLEX16==1
  767. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  768. #endif
  769. #ifdef EXPRECISION
  770. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  771. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  772. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  773. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  774. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  775. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  776. #endif
  777. #if (USE_GEMM3M)
  778. #ifdef CGEMM3M_DEFAULT_P
  779. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  780. #else
  781. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  782. #endif
  783. #ifdef ZGEMM3M_DEFAULT_P
  784. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  785. #else
  786. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  787. #endif
  788. #ifdef CGEMM3M_DEFAULT_Q
  789. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  790. #else
  791. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  792. #endif
  793. #ifdef ZGEMM3M_DEFAULT_Q
  794. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  795. #else
  796. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  797. #endif
  798. #ifdef CGEMM3M_DEFAULT_R
  799. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  800. #else
  801. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  802. #endif
  803. #ifdef ZGEMM3M_DEFAULT_R
  804. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  805. #else
  806. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  807. #endif
  808. #ifdef EXPRECISION
  809. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  810. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  811. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  812. #endif
  813. #endif
  814. }
  815. #else // (ARCH_ARM64)
  816. #if (ARCH_POWER)
  817. static void init_parameter(void) {
  818. #ifdef BUILD_HALF
  819. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  820. #endif
  821. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  822. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  823. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  824. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  825. #ifdef BUILD_HALF
  826. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  827. #endif
  828. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  829. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  830. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  831. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  832. #ifdef BUILD_HALF
  833. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  834. #endif
  835. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  836. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  837. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  838. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  839. }
  840. #else //POWER
  841. #if (ARCH_ZARCH)
  842. static void init_parameter(void) {
  843. #ifdef BUILD_HALF
  844. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  845. #endif
  846. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  847. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  848. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  849. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  850. #ifdef BUILD_HALF
  851. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  852. #endif
  853. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  854. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  855. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  856. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  857. #ifdef BUILD_HALF
  858. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  859. #endif
  860. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  861. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  862. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  863. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  864. }
  865. #else //ZARCH
  866. #ifdef ARCH_X86
  867. static int get_l2_size_old(void){
  868. int i, eax, ebx, ecx, edx, cpuid_level;
  869. int info[15];
  870. cpuid(2, &eax, &ebx, &ecx, &edx);
  871. info[ 0] = BITMASK(eax, 8, 0xff);
  872. info[ 1] = BITMASK(eax, 16, 0xff);
  873. info[ 2] = BITMASK(eax, 24, 0xff);
  874. info[ 3] = BITMASK(ebx, 0, 0xff);
  875. info[ 4] = BITMASK(ebx, 8, 0xff);
  876. info[ 5] = BITMASK(ebx, 16, 0xff);
  877. info[ 6] = BITMASK(ebx, 24, 0xff);
  878. info[ 7] = BITMASK(ecx, 0, 0xff);
  879. info[ 8] = BITMASK(ecx, 8, 0xff);
  880. info[ 9] = BITMASK(ecx, 16, 0xff);
  881. info[10] = BITMASK(ecx, 24, 0xff);
  882. info[11] = BITMASK(edx, 0, 0xff);
  883. info[12] = BITMASK(edx, 8, 0xff);
  884. info[13] = BITMASK(edx, 16, 0xff);
  885. info[14] = BITMASK(edx, 24, 0xff);
  886. for (i = 0; i < 15; i++){
  887. switch (info[i]){
  888. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  889. case 0x1a :
  890. return 96;
  891. case 0x39 :
  892. case 0x3b :
  893. case 0x41 :
  894. case 0x79 :
  895. case 0x81 :
  896. return 128;
  897. case 0x3a :
  898. return 192;
  899. case 0x21 :
  900. case 0x3c :
  901. case 0x42 :
  902. case 0x7a :
  903. case 0x7e :
  904. case 0x82 :
  905. return 256;
  906. case 0x3d :
  907. return 384;
  908. case 0x3e :
  909. case 0x43 :
  910. case 0x7b :
  911. case 0x7f :
  912. case 0x83 :
  913. case 0x86 :
  914. return 512;
  915. case 0x44 :
  916. case 0x78 :
  917. case 0x7c :
  918. case 0x84 :
  919. case 0x87 :
  920. return 1024;
  921. case 0x45 :
  922. case 0x7d :
  923. case 0x85 :
  924. return 2048;
  925. case 0x48 :
  926. return 3184;
  927. case 0x49 :
  928. return 4096;
  929. case 0x4e :
  930. return 6144;
  931. }
  932. }
  933. // return 0;
  934. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  935. return 256;
  936. }
  937. #endif
  938. static __inline__ int get_l2_size(void){
  939. int eax, ebx, ecx, edx, l2;
  940. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  941. l2 = BITMASK(ecx, 16, 0xffff);
  942. #ifndef ARCH_X86
  943. if (l2 <= 0) {
  944. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  945. return 256;
  946. }
  947. return l2;
  948. #else
  949. if (l2 > 0) return l2;
  950. return get_l2_size_old();
  951. #endif
  952. }
  953. static __inline__ int get_l3_size(void){
  954. int eax, ebx, ecx, edx;
  955. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  956. return BITMASK(edx, 18, 0x3fff) * 512;
  957. }
  958. static void init_parameter(void) {
  959. int l2 = get_l2_size();
  960. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  961. /* where the GEMM unrolling parameters do not depend on l2 */
  962. #ifdef BUILD_HALF
  963. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  964. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  965. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  966. #endif
  967. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  968. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  969. #endif
  970. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  971. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  972. #endif
  973. #if BUILD_COMPLEX == 1
  974. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  975. #endif
  976. #if BUILD_COMPLEX16==1
  977. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  978. #endif
  979. #if BUILD_COMPLEX == 1
  980. #ifdef CGEMM3M_DEFAULT_Q
  981. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  982. #else
  983. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  984. #endif
  985. #endif
  986. #if BUILD_COMPLEX16 == 1
  987. #ifdef ZGEMM3M_DEFAULT_Q
  988. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  989. #else
  990. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  991. #endif
  992. #endif
  993. #ifdef EXPRECISION
  994. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  995. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  996. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  997. #endif
  998. #if (CORE_KATMAI) || (CORE_COPPERMINE) || (CORE_BANIAS) || (CORE_YONAH) || (CORE_ATHLON)
  999. #ifdef DEBUG
  1000. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1001. #endif
  1002. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1003. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1004. #endif
  1005. #if BUILD_DOUBLE == 1
  1006. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1007. #endif
  1008. #if BUILD_COMPLEX==1
  1009. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1010. #endif
  1011. #if BUILD_COMPLEX16==1
  1012. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1013. #endif
  1014. #ifdef EXPRECISION
  1015. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1016. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1017. #endif
  1018. #endif
  1019. #ifdef CORE_NORTHWOOD
  1020. #ifdef DEBUG
  1021. fprintf(stderr, "Northwood\n");
  1022. #endif
  1023. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1024. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1025. #endif
  1026. #if BUILD_DOUBLE == 1
  1027. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1028. #endif
  1029. #if BUILD_COMPLEX==1
  1030. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1031. #endif
  1032. #if BUILD_COMPLEX16==1
  1033. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1034. #endif
  1035. #ifdef EXPRECISION
  1036. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1037. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1038. #endif
  1039. #endif
  1040. #ifdef ATOM
  1041. #ifdef DEBUG
  1042. fprintf(stderr, "Atom\n");
  1043. #endif
  1044. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1045. TABLE_NAME.sgemm_p = 256;
  1046. #endif
  1047. #if BUILD_DOUBLE ==1
  1048. TABLE_NAME.dgemm_p = 128;
  1049. #endif
  1050. #if BUILD_COMPLEX==1
  1051. TABLE_NAME.cgemm_p = 128;
  1052. #endif
  1053. #if BUILD_COMPLEX16==1
  1054. TABLE_NAME.zgemm_p = 64;
  1055. #endif
  1056. #ifdef EXPRECISION
  1057. TABLE_NAME.qgemm_p = 64;
  1058. TABLE_NAME.xgemm_p = 32;
  1059. #endif
  1060. #endif
  1061. #ifdef CORE_PRESCOTT
  1062. #ifdef DEBUG
  1063. fprintf(stderr, "Prescott\n");
  1064. #endif
  1065. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1066. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1067. #endif
  1068. #if BUILD_DOUBLE ==1
  1069. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1070. #endif
  1071. #if BUILD_COMPLEX==1
  1072. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1073. #endif
  1074. #if BUILD_COMPLEX16 == 1
  1075. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1076. #endif
  1077. #ifdef EXPRECISION
  1078. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1079. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1080. #endif
  1081. #endif
  1082. #ifdef CORE2
  1083. #ifdef DEBUG
  1084. fprintf(stderr, "Core2\n");
  1085. #endif
  1086. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1087. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1088. #endif
  1089. #if BUILD_DOUBLE==1
  1090. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1091. #endif
  1092. #if BUILD_COMPLEX==1
  1093. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1094. #endif
  1095. #if BUILD_COMPLEX16==1
  1096. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1097. #endif
  1098. #ifdef EXPRECISION
  1099. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1100. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1101. #endif
  1102. #endif
  1103. #ifdef PENRYN
  1104. #ifdef DEBUG
  1105. fprintf(stderr, "Penryn\n");
  1106. #endif
  1107. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1108. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1109. #endif
  1110. #if BUILD_DOUBLE == 1
  1111. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1112. #endif
  1113. #if BUILD_COMPLEX==1
  1114. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1115. #endif
  1116. #if BUILD_COMPLEX16==1
  1117. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1118. #endif
  1119. #ifdef EXPRECISION
  1120. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1121. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1122. #endif
  1123. #endif
  1124. #ifdef DUNNINGTON
  1125. #ifdef DEBUG
  1126. fprintf(stderr, "Dunnington\n");
  1127. #endif
  1128. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1129. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1130. #endif
  1131. #if BUILD_DOUBLE ==1
  1132. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1133. #endif
  1134. #if BUILD_COMPLEX==1
  1135. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1136. #endif
  1137. #if BUILD_COMPLEX16==1
  1138. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1139. #endif
  1140. #ifdef EXPRECISION
  1141. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1142. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1143. #endif
  1144. #endif
  1145. #ifdef NEHALEM
  1146. #ifdef DEBUG
  1147. fprintf(stderr, "Nehalem\n");
  1148. #endif
  1149. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1150. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1151. #endif
  1152. #if BUILD_DOUBLE
  1153. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1154. #endif
  1155. #if BUILD_COMPLEX
  1156. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1157. #endif
  1158. #if BUILD_COMPLEX16
  1159. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1160. #endif
  1161. #ifdef EXPRECISION
  1162. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1163. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1164. #endif
  1165. #endif
  1166. #ifdef SANDYBRIDGE
  1167. #ifdef DEBUG
  1168. fprintf(stderr, "Sandybridge\n");
  1169. #endif
  1170. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1171. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1172. #endif
  1173. #if BUILD_DOUBLE
  1174. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1175. #endif
  1176. #if BUILD_COMPLEX
  1177. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1178. #endif
  1179. #if BUILD_COMPLEX16
  1180. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1181. #endif
  1182. #ifdef EXPRECISION
  1183. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1184. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1185. #endif
  1186. #endif
  1187. #ifdef HASWELL
  1188. #ifdef DEBUG
  1189. fprintf(stderr, "Haswell\n");
  1190. #endif
  1191. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1192. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1193. #endif
  1194. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1195. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1196. #endif
  1197. #if BUILD_COMPLEX
  1198. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1199. #endif
  1200. #if BUILD_COMPLEX16
  1201. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1202. #endif
  1203. #ifdef EXPRECISION
  1204. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1205. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1206. #endif
  1207. #endif
  1208. #if defined(SKYLAKEX) || defined(COOPERLAKE)
  1209. #ifdef DEBUG
  1210. fprintf(stderr, "SkylakeX\n");
  1211. #endif
  1212. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1213. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1214. #endif
  1215. #if BUILD_DOUBLE
  1216. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1217. #endif
  1218. #if BUILD_COMPLEX
  1219. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1220. #endif
  1221. #if BUILD_COMPLEX16
  1222. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1223. #endif
  1224. #ifdef EXPRECISION
  1225. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1226. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1227. #endif
  1228. #endif
  1229. #ifdef OPTERON
  1230. #ifdef DEBUG
  1231. fprintf(stderr, "Opteron\n");
  1232. #endif
  1233. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1234. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1235. #endif
  1236. #if BUILD_DOUBLE
  1237. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1238. #endif
  1239. #if BUILD_COMPLEX
  1240. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1241. #endif
  1242. #if BUILD_COMPLEX16
  1243. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1244. #endif
  1245. #ifdef EXPRECISION
  1246. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1247. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1248. #endif
  1249. #endif
  1250. #ifdef BARCELONA
  1251. #ifdef DEBUG
  1252. fprintf(stderr, "Barcelona\n");
  1253. #endif
  1254. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1255. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1256. #endif
  1257. #if BUILD_DOUBLE
  1258. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1259. #endif
  1260. #if BUILD_COMPLEX
  1261. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1262. #endif
  1263. #if BUILD_COMPLEX16
  1264. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1265. #endif
  1266. #ifdef EXPRECISION
  1267. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1268. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1269. #endif
  1270. #endif
  1271. #ifdef BOBCAT
  1272. #ifdef DEBUG
  1273. fprintf(stderr, "Bobcate\n");
  1274. #endif
  1275. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1276. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1277. #endif
  1278. #if BUILD_DOUBLE
  1279. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1280. #endif
  1281. #if BUILD_COMPLEX
  1282. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1283. #endif
  1284. #if BUILD_COMPLEX16
  1285. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1286. #endif
  1287. #ifdef EXPRECISION
  1288. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1289. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1290. #endif
  1291. #endif
  1292. #ifdef BULLDOZER
  1293. #ifdef DEBUG
  1294. fprintf(stderr, "Bulldozer\n");
  1295. #endif
  1296. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1297. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1298. #endif
  1299. #if BUILD_DOUBLE
  1300. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1301. #endif
  1302. #if BUILD_COMPLEX
  1303. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1304. #endif
  1305. #if BUILD_COMPLEX16
  1306. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1307. #endif
  1308. #ifdef EXPRECISION
  1309. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1310. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1311. #endif
  1312. #endif
  1313. #ifdef EXCAVATOR
  1314. #ifdef DEBUG
  1315. fprintf(stderr, "Excavator\n");
  1316. #endif
  1317. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1318. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1319. #endif
  1320. #if BUILD_DOUBLE
  1321. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1322. #endif
  1323. #if BUILD_COMPLEX
  1324. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1325. #endif
  1326. #if BUILD_COMPLEX16
  1327. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1328. #endif
  1329. #ifdef EXPRECISION
  1330. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1331. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1332. #endif
  1333. #endif
  1334. #ifdef PILEDRIVER
  1335. #ifdef DEBUG
  1336. fprintf(stderr, "Piledriver\n");
  1337. #endif
  1338. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1339. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1340. #endif
  1341. #if BUILD_DOUBLE
  1342. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1343. #endif
  1344. #if BUILD_COMPLEX
  1345. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1346. #endif
  1347. #if BUILD_COMPLEX16
  1348. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1349. #endif
  1350. #ifdef EXPRECISION
  1351. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1352. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1353. #endif
  1354. #endif
  1355. #ifdef STEAMROLLER
  1356. #ifdef DEBUG
  1357. fprintf(stderr, "Steamroller\n");
  1358. #endif
  1359. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1360. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1361. #endif
  1362. #if BUILD_DOUBLE
  1363. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1364. #endif
  1365. #if BUILD_COMPLEX
  1366. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1367. #endif
  1368. #if BUILD_COMPLEX16
  1369. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1370. #endif
  1371. #ifdef EXPRECISION
  1372. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1373. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1374. #endif
  1375. #endif
  1376. #ifdef ZEN
  1377. #ifdef DEBUG
  1378. fprintf(stderr, "Zen\n");
  1379. #endif
  1380. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1381. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1382. #endif
  1383. #if BUILD_DOUBLE
  1384. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1385. #endif
  1386. #if BUILD_COMPLEX
  1387. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1388. #endif
  1389. #if BUILD_COMPLEX16
  1390. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1391. #endif
  1392. #ifdef EXPRECISION
  1393. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1394. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1395. #endif
  1396. #endif
  1397. #ifdef NANO
  1398. #ifdef DEBUG
  1399. fprintf(stderr, "NANO\n");
  1400. #endif
  1401. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1402. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1403. #endif
  1404. #if (BUILD_DOUBLE==1)
  1405. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1406. #endif
  1407. #if (BUILD_COMPLEX==1)
  1408. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1409. #endif
  1410. #if (BUILD_COMPLEX16==1)
  1411. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1412. #endif
  1413. #ifdef EXPRECISION
  1414. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1415. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1416. #endif
  1417. #endif
  1418. #if BUILD_COMPLEX==1
  1419. #ifdef CGEMM3M_DEFAULT_P
  1420. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1421. #else
  1422. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1423. #endif
  1424. #endif
  1425. #if BUILD_COMPLEX16==1
  1426. #ifdef ZGEMM3M_DEFAULT_P
  1427. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1428. #else
  1429. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1430. #endif
  1431. #endif
  1432. #ifdef EXPRECISION
  1433. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1434. #endif
  1435. #if BUILD_SINGLE == 1
  1436. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1437. #endif
  1438. #if BUILD_DOUBLE== 1
  1439. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1440. #endif
  1441. #if BUILD_COMPLEX==1
  1442. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1443. #endif
  1444. #if BUILD_COMPLEX16==1
  1445. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1446. #endif
  1447. #if BUILD_COMPLEX==1
  1448. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1449. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1450. #else
  1451. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1452. #endif
  1453. #endif
  1454. #if BUILD_COMPLEX16==1
  1455. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1456. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1457. #else
  1458. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1459. #endif
  1460. #endif
  1461. #ifdef QUAD_PRECISION
  1462. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1463. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1464. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1465. #endif
  1466. #ifdef DEBUG
  1467. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1468. #endif
  1469. #if BUILD_SINGLE==1
  1470. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1471. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1472. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1473. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1474. #endif
  1475. #if BUILD_DOUBLE==1
  1476. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1477. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1478. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1479. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1480. #endif
  1481. #ifdef EXPRECISION
  1482. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1483. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1484. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1485. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1486. #endif
  1487. #if BUILD_COMPLEX ==1
  1488. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1489. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1490. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1491. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1492. #endif
  1493. #if BUILD_COMPLEX16 ==1
  1494. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1495. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1496. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1497. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1498. #endif
  1499. #if BUILD_COMPLEX == 1
  1500. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1501. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1502. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1503. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1504. #endif
  1505. #if BUILD_COMPLEX16 == 1
  1506. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1507. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1508. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1509. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1510. #endif
  1511. #ifdef EXPRECISION
  1512. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1513. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1514. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1515. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1516. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1517. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1518. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1519. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1520. #endif
  1521. }
  1522. #endif //POWER
  1523. #endif //ZARCH
  1524. #endif //(ARCH_ARM64)