You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. shstobf16_kTS, shdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, shdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sgemv_nTS, sgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. shgemm_kernelTS, shgemm_betaTS,
  66. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  67. shgemm_incopyTS, shgemm_itcopyTS,
  68. #else
  69. shgemm_oncopyTS, shgemm_otcopyTS,
  70. #endif
  71. shgemm_oncopyTS, shgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #endif
  104. 0, 0, 0,
  105. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  106. #ifdef SGEMM_DEFAULT_UNROLL_MN
  107. SGEMM_DEFAULT_UNROLL_MN,
  108. #else
  109. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  110. #endif
  111. #ifdef HAVE_EXCLUSIVE_CACHE
  112. 1,
  113. #else
  114. 0,
  115. #endif
  116. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  117. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  118. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  119. dsdot_kTS,
  120. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  121. sgemv_nTS, sgemv_tTS, sger_kTS,
  122. ssymv_LTS, ssymv_UTS,
  123. #ifdef ARCH_X86_64
  124. sgemm_directTS,
  125. sgemm_direct_performantTS,
  126. #endif
  127. sgemm_kernelTS, sgemm_betaTS,
  128. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  129. sgemm_incopyTS, sgemm_itcopyTS,
  130. #else
  131. sgemm_oncopyTS, sgemm_otcopyTS,
  132. #endif
  133. sgemm_oncopyTS, sgemm_otcopyTS,
  134. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  135. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  136. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  137. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  138. #else
  139. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  140. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  141. #endif
  142. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  143. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  144. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  145. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  146. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  147. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  148. #else
  149. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  150. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  151. #endif
  152. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  153. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  154. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  155. ssymm_iutcopyTS, ssymm_iltcopyTS,
  156. #else
  157. ssymm_outcopyTS, ssymm_oltcopyTS,
  158. #endif
  159. ssymm_outcopyTS, ssymm_oltcopyTS,
  160. #ifndef NO_LAPACK
  161. sneg_tcopyTS, slaswp_ncopyTS,
  162. #else
  163. NULL,NULL,
  164. #endif
  165. 0, 0, 0,
  166. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  167. #ifdef DGEMM_DEFAULT_UNROLL_MN
  168. DGEMM_DEFAULT_UNROLL_MN,
  169. #else
  170. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  171. #endif
  172. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  173. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  174. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  175. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  176. dgemv_nTS, dgemv_tTS, dger_kTS,
  177. dsymv_LTS, dsymv_UTS,
  178. dgemm_kernelTS, dgemm_betaTS,
  179. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  180. dgemm_incopyTS, dgemm_itcopyTS,
  181. #else
  182. dgemm_oncopyTS, dgemm_otcopyTS,
  183. #endif
  184. dgemm_oncopyTS, dgemm_otcopyTS,
  185. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  186. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  187. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  188. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  189. #else
  190. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  191. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  192. #endif
  193. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  194. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  195. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  196. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  197. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  198. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  199. #else
  200. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  201. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  202. #endif
  203. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  204. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  205. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  206. dsymm_iutcopyTS, dsymm_iltcopyTS,
  207. #else
  208. dsymm_outcopyTS, dsymm_oltcopyTS,
  209. #endif
  210. dsymm_outcopyTS, dsymm_oltcopyTS,
  211. #ifndef NO_LAPACK
  212. dneg_tcopyTS, dlaswp_ncopyTS,
  213. #else
  214. NULL, NULL,
  215. #endif
  216. #ifdef EXPRECISION
  217. 0, 0, 0,
  218. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  219. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  220. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  221. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  222. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  223. qgemv_nTS, qgemv_tTS, qger_kTS,
  224. qsymv_LTS, qsymv_UTS,
  225. qgemm_kernelTS, qgemm_betaTS,
  226. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  227. qgemm_incopyTS, qgemm_itcopyTS,
  228. #else
  229. qgemm_oncopyTS, qgemm_otcopyTS,
  230. #endif
  231. qgemm_oncopyTS, qgemm_otcopyTS,
  232. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  233. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  234. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  235. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  236. #else
  237. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  238. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  239. #endif
  240. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  241. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  242. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  243. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  244. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  245. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  246. #else
  247. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  248. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  249. #endif
  250. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  251. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  252. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  253. qsymm_iutcopyTS, qsymm_iltcopyTS,
  254. #else
  255. qsymm_outcopyTS, qsymm_oltcopyTS,
  256. #endif
  257. qsymm_outcopyTS, qsymm_oltcopyTS,
  258. #ifndef NO_LAPACK
  259. qneg_tcopyTS, qlaswp_ncopyTS,
  260. #else
  261. NULL, NULL,
  262. #endif
  263. #endif
  264. 0, 0, 0,
  265. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  266. #ifdef CGEMM_DEFAULT_UNROLL_MN
  267. CGEMM_DEFAULT_UNROLL_MN,
  268. #else
  269. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  270. #endif
  271. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  272. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  273. cdotu_kTS, cdotc_kTS, csrot_kTS,
  274. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  275. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  276. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  277. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  278. csymv_LTS, csymv_UTS,
  279. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  280. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  281. cgemm_betaTS,
  282. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  283. cgemm_incopyTS, cgemm_itcopyTS,
  284. #else
  285. cgemm_oncopyTS, cgemm_otcopyTS,
  286. #endif
  287. cgemm_oncopyTS, cgemm_otcopyTS,
  288. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  289. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  290. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  291. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  292. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  293. #else
  294. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  295. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  296. #endif
  297. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  298. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  299. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  300. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  301. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  302. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  303. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  304. #else
  305. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  306. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  307. #endif
  308. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  309. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  310. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  311. csymm_iutcopyTS, csymm_iltcopyTS,
  312. #else
  313. csymm_outcopyTS, csymm_oltcopyTS,
  314. #endif
  315. csymm_outcopyTS, csymm_oltcopyTS,
  316. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  317. chemm_iutcopyTS, chemm_iltcopyTS,
  318. #else
  319. chemm_outcopyTS, chemm_oltcopyTS,
  320. #endif
  321. chemm_outcopyTS, chemm_oltcopyTS,
  322. 0, 0, 0,
  323. #if defined(USE_GEMM3M)
  324. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  325. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  326. #else
  327. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  328. #endif
  329. cgemm3m_kernelTS,
  330. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  331. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  332. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  333. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  334. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  335. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  336. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  337. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  338. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  339. csymm3m_oucopybTS, csymm3m_olcopybTS,
  340. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  341. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  342. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  343. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  344. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  345. chemm3m_oucopybTS, chemm3m_olcopybTS,
  346. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  347. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  348. #else
  349. 0, 0, 0,
  350. NULL,
  351. NULL, NULL,
  352. NULL, NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. NULL, NULL,
  363. NULL, NULL,
  364. NULL, NULL,
  365. NULL, NULL,
  366. NULL, NULL,
  367. NULL, NULL,
  368. NULL, NULL,
  369. #endif
  370. #ifndef NO_LAPACK
  371. cneg_tcopyTS, claswp_ncopyTS,
  372. #else
  373. NULL, NULL,
  374. #endif
  375. 0, 0, 0,
  376. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  377. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  378. ZGEMM_DEFAULT_UNROLL_MN,
  379. #else
  380. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  381. #endif
  382. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  383. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  384. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  385. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  386. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  387. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  388. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  389. zsymv_LTS, zsymv_UTS,
  390. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  391. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  392. zgemm_betaTS,
  393. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  394. zgemm_incopyTS, zgemm_itcopyTS,
  395. #else
  396. zgemm_oncopyTS, zgemm_otcopyTS,
  397. #endif
  398. zgemm_oncopyTS, zgemm_otcopyTS,
  399. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  400. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  401. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  402. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  403. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  404. #else
  405. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  406. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  407. #endif
  408. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  409. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  410. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  411. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  412. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  413. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  414. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  415. #else
  416. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  417. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  418. #endif
  419. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  420. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  421. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  422. zsymm_iutcopyTS, zsymm_iltcopyTS,
  423. #else
  424. zsymm_outcopyTS, zsymm_oltcopyTS,
  425. #endif
  426. zsymm_outcopyTS, zsymm_oltcopyTS,
  427. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  428. zhemm_iutcopyTS, zhemm_iltcopyTS,
  429. #else
  430. zhemm_outcopyTS, zhemm_oltcopyTS,
  431. #endif
  432. zhemm_outcopyTS, zhemm_oltcopyTS,
  433. 0, 0, 0,
  434. #if defined(USE_GEMM3M)
  435. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  436. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  437. #else
  438. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  439. #endif
  440. zgemm3m_kernelTS,
  441. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  442. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  443. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  444. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  445. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  446. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  447. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  448. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  449. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  450. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  451. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  452. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  453. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  454. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  455. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  456. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  457. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  458. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  459. #else
  460. 0, 0, 0,
  461. NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. #endif
  481. #ifndef NO_LAPACK
  482. zneg_tcopyTS, zlaswp_ncopyTS,
  483. #else
  484. NULL, NULL,
  485. #endif
  486. #ifdef EXPRECISION
  487. 0, 0, 0,
  488. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  489. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  490. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  491. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  492. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  493. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  494. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  495. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  496. xsymv_LTS, xsymv_UTS,
  497. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  498. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  499. xgemm_betaTS,
  500. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  501. xgemm_incopyTS, xgemm_itcopyTS,
  502. #else
  503. xgemm_oncopyTS, xgemm_otcopyTS,
  504. #endif
  505. xgemm_oncopyTS, xgemm_otcopyTS,
  506. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  507. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  508. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  509. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  510. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  511. #else
  512. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  513. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  514. #endif
  515. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  516. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  517. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  518. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  519. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  520. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  521. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  522. #else
  523. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  524. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  525. #endif
  526. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  527. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  528. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  529. xsymm_iutcopyTS, xsymm_iltcopyTS,
  530. #else
  531. xsymm_outcopyTS, xsymm_oltcopyTS,
  532. #endif
  533. xsymm_outcopyTS, xsymm_oltcopyTS,
  534. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  535. xhemm_iutcopyTS, xhemm_iltcopyTS,
  536. #else
  537. xhemm_outcopyTS, xhemm_oltcopyTS,
  538. #endif
  539. xhemm_outcopyTS, xhemm_oltcopyTS,
  540. 0, 0, 0,
  541. #if defined(USE_GEMM3M)
  542. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  543. xgemm3m_kernelTS,
  544. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  545. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  546. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  547. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  548. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  549. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  550. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  551. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  552. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  553. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  554. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  555. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  556. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  557. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  558. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  559. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  560. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  561. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  562. #else
  563. 0, 0, 0,
  564. NULL,
  565. NULL, NULL,
  566. NULL, NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. NULL, NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. #endif
  584. #ifndef NO_LAPACK
  585. xneg_tcopyTS, xlaswp_ncopyTS,
  586. #else
  587. NULL, NULL,
  588. #endif
  589. #endif
  590. init_parameter,
  591. SNUMOPT, DNUMOPT, QNUMOPT,
  592. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  593. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  594. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  595. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  596. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  597. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  598. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  599. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  600. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  601. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  602. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  603. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  604. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  605. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  606. };
  607. #if defined(ARCH_ARM64)
  608. static void init_parameter(void) {
  609. #if defined(BUILD_HALF)
  610. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  611. #endif
  612. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  613. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  614. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  615. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  616. #if defined(BUILD_HALF)
  617. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  618. #endif
  619. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  620. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  621. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  622. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  623. #if defined(BUILD_HALF)
  624. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  625. #endif
  626. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  627. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  628. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  629. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  630. #ifdef EXPRECISION
  631. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  632. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  633. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  634. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  635. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  636. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  637. #endif
  638. #if defined(USE_GEMM3M)
  639. #ifdef CGEMM3M_DEFAULT_P
  640. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  641. #else
  642. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  643. #endif
  644. #ifdef ZGEMM3M_DEFAULT_P
  645. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  646. #else
  647. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  648. #endif
  649. #ifdef CGEMM3M_DEFAULT_Q
  650. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  651. #else
  652. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  653. #endif
  654. #ifdef ZGEMM3M_DEFAULT_Q
  655. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  656. #else
  657. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  658. #endif
  659. #ifdef CGEMM3M_DEFAULT_R
  660. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  661. #else
  662. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  663. #endif
  664. #ifdef ZGEMM3M_DEFAULT_R
  665. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  666. #else
  667. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  668. #endif
  669. #ifdef EXPRECISION
  670. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  671. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  672. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  673. #endif
  674. #endif
  675. }
  676. #else // defined(ARCH_ARM64)
  677. #if defined(ARCH_POWER)
  678. static void init_parameter(void) {
  679. #ifdef BUILD_HALF
  680. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  681. #endif
  682. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  683. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  684. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  685. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  686. #ifdef BUILD_HALF
  687. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  688. #endif
  689. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  690. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  691. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  692. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  693. #ifdef BUILD_HALF
  694. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  695. #endif
  696. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  697. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  698. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  699. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  700. }
  701. #else //POWER
  702. #if defined(ARCH_ZARCH)
  703. static void init_parameter(void) {
  704. #ifdef BUILD_HALF
  705. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  706. #endif
  707. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  708. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  709. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  710. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  711. #ifdef BUILD_HALF
  712. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  713. #endif
  714. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  715. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  716. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  717. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  718. #ifdef BUILD_HALF
  719. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  720. #endif
  721. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  722. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  723. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  724. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  725. }
  726. #else //ZARCH
  727. #ifdef ARCH_X86
  728. static int get_l2_size_old(void){
  729. int i, eax, ebx, ecx, edx, cpuid_level;
  730. int info[15];
  731. cpuid(2, &eax, &ebx, &ecx, &edx);
  732. info[ 0] = BITMASK(eax, 8, 0xff);
  733. info[ 1] = BITMASK(eax, 16, 0xff);
  734. info[ 2] = BITMASK(eax, 24, 0xff);
  735. info[ 3] = BITMASK(ebx, 0, 0xff);
  736. info[ 4] = BITMASK(ebx, 8, 0xff);
  737. info[ 5] = BITMASK(ebx, 16, 0xff);
  738. info[ 6] = BITMASK(ebx, 24, 0xff);
  739. info[ 7] = BITMASK(ecx, 0, 0xff);
  740. info[ 8] = BITMASK(ecx, 8, 0xff);
  741. info[ 9] = BITMASK(ecx, 16, 0xff);
  742. info[10] = BITMASK(ecx, 24, 0xff);
  743. info[11] = BITMASK(edx, 0, 0xff);
  744. info[12] = BITMASK(edx, 8, 0xff);
  745. info[13] = BITMASK(edx, 16, 0xff);
  746. info[14] = BITMASK(edx, 24, 0xff);
  747. for (i = 0; i < 15; i++){
  748. switch (info[i]){
  749. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  750. case 0x1a :
  751. return 96;
  752. case 0x39 :
  753. case 0x3b :
  754. case 0x41 :
  755. case 0x79 :
  756. case 0x81 :
  757. return 128;
  758. case 0x3a :
  759. return 192;
  760. case 0x21 :
  761. case 0x3c :
  762. case 0x42 :
  763. case 0x7a :
  764. case 0x7e :
  765. case 0x82 :
  766. return 256;
  767. case 0x3d :
  768. return 384;
  769. case 0x3e :
  770. case 0x43 :
  771. case 0x7b :
  772. case 0x7f :
  773. case 0x83 :
  774. case 0x86 :
  775. return 512;
  776. case 0x44 :
  777. case 0x78 :
  778. case 0x7c :
  779. case 0x84 :
  780. case 0x87 :
  781. return 1024;
  782. case 0x45 :
  783. case 0x7d :
  784. case 0x85 :
  785. return 2048;
  786. case 0x48 :
  787. return 3184;
  788. case 0x49 :
  789. return 4096;
  790. case 0x4e :
  791. return 6144;
  792. }
  793. }
  794. // return 0;
  795. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  796. return 256;
  797. }
  798. #endif
  799. static __inline__ int get_l2_size(void){
  800. int eax, ebx, ecx, edx, l2;
  801. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  802. l2 = BITMASK(ecx, 16, 0xffff);
  803. #ifndef ARCH_X86
  804. if (l2 <= 0) {
  805. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  806. return 256;
  807. }
  808. return l2;
  809. #else
  810. if (l2 > 0) return l2;
  811. return get_l2_size_old();
  812. #endif
  813. }
  814. static __inline__ int get_l3_size(void){
  815. int eax, ebx, ecx, edx;
  816. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  817. return BITMASK(edx, 18, 0x3fff) * 512;
  818. }
  819. static void init_parameter(void) {
  820. int l2 = get_l2_size();
  821. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  822. /* where the GEMM unrolling parameters do not depend on l2 */
  823. #ifdef BUILD_HALF
  824. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  825. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  826. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  827. #endif
  828. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  829. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  830. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  831. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  832. #ifdef CGEMM3M_DEFAULT_Q
  833. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  834. #else
  835. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  836. #endif
  837. #ifdef ZGEMM3M_DEFAULT_Q
  838. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  839. #else
  840. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  841. #endif
  842. #ifdef EXPRECISION
  843. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  844. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  845. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  846. #endif
  847. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  848. #ifdef DEBUG
  849. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  850. #endif
  851. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  852. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  853. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  854. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  855. #ifdef EXPRECISION
  856. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  857. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  858. #endif
  859. #endif
  860. #ifdef CORE_NORTHWOOD
  861. #ifdef DEBUG
  862. fprintf(stderr, "Northwood\n");
  863. #endif
  864. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  865. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  866. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  867. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  868. #ifdef EXPRECISION
  869. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  870. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  871. #endif
  872. #endif
  873. #ifdef ATOM
  874. #ifdef DEBUG
  875. fprintf(stderr, "Atom\n");
  876. #endif
  877. TABLE_NAME.sgemm_p = 256;
  878. TABLE_NAME.dgemm_p = 128;
  879. TABLE_NAME.cgemm_p = 128;
  880. TABLE_NAME.zgemm_p = 64;
  881. #ifdef EXPRECISION
  882. TABLE_NAME.qgemm_p = 64;
  883. TABLE_NAME.xgemm_p = 32;
  884. #endif
  885. #endif
  886. #ifdef CORE_PRESCOTT
  887. #ifdef DEBUG
  888. fprintf(stderr, "Prescott\n");
  889. #endif
  890. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  891. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  892. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  893. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  894. #ifdef EXPRECISION
  895. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  896. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  897. #endif
  898. #endif
  899. #ifdef CORE2
  900. #ifdef DEBUG
  901. fprintf(stderr, "Core2\n");
  902. #endif
  903. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  904. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  905. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  906. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  907. #ifdef EXPRECISION
  908. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  909. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  910. #endif
  911. #endif
  912. #ifdef PENRYN
  913. #ifdef DEBUG
  914. fprintf(stderr, "Penryn\n");
  915. #endif
  916. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  917. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  918. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  919. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  920. #ifdef EXPRECISION
  921. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  922. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  923. #endif
  924. #endif
  925. #ifdef DUNNINGTON
  926. #ifdef DEBUG
  927. fprintf(stderr, "Dunnington\n");
  928. #endif
  929. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  930. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  931. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  932. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  933. #ifdef EXPRECISION
  934. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  935. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  936. #endif
  937. #endif
  938. #ifdef NEHALEM
  939. #ifdef DEBUG
  940. fprintf(stderr, "Nehalem\n");
  941. #endif
  942. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  943. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  944. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  945. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  946. #ifdef EXPRECISION
  947. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  948. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  949. #endif
  950. #endif
  951. #ifdef SANDYBRIDGE
  952. #ifdef DEBUG
  953. fprintf(stderr, "Sandybridge\n");
  954. #endif
  955. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  956. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  957. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  958. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  959. #ifdef EXPRECISION
  960. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  961. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  962. #endif
  963. #endif
  964. #ifdef HASWELL
  965. #ifdef DEBUG
  966. fprintf(stderr, "Haswell\n");
  967. #endif
  968. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  969. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  970. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  971. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  972. #ifdef EXPRECISION
  973. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  974. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  975. #endif
  976. #endif
  977. #if defined (SKYLAKEX) || defined (COOPERLAKE)
  978. #ifdef DEBUG
  979. fprintf(stderr, "SkylakeX\n");
  980. #endif
  981. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  982. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  983. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  984. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  985. #ifdef EXPRECISION
  986. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  987. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  988. #endif
  989. #endif
  990. #ifdef OPTERON
  991. #ifdef DEBUG
  992. fprintf(stderr, "Opteron\n");
  993. #endif
  994. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  995. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  996. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  997. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  998. #ifdef EXPRECISION
  999. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1000. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1001. #endif
  1002. #endif
  1003. #ifdef BARCELONA
  1004. #ifdef DEBUG
  1005. fprintf(stderr, "Barcelona\n");
  1006. #endif
  1007. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1008. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1009. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1010. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1011. #ifdef EXPRECISION
  1012. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1013. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1014. #endif
  1015. #endif
  1016. #ifdef BOBCAT
  1017. #ifdef DEBUG
  1018. fprintf(stderr, "Bobcate\n");
  1019. #endif
  1020. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1021. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1022. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1023. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1024. #ifdef EXPRECISION
  1025. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1026. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1027. #endif
  1028. #endif
  1029. #ifdef BULLDOZER
  1030. #ifdef DEBUG
  1031. fprintf(stderr, "Bulldozer\n");
  1032. #endif
  1033. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1034. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1035. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1036. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1037. #ifdef EXPRECISION
  1038. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1039. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1040. #endif
  1041. #endif
  1042. #ifdef EXCAVATOR
  1043. #ifdef DEBUG
  1044. fprintf(stderr, "Excavator\n");
  1045. #endif
  1046. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1047. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1048. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1049. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1050. #ifdef EXPRECISION
  1051. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1052. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1053. #endif
  1054. #endif
  1055. #ifdef PILEDRIVER
  1056. #ifdef DEBUG
  1057. fprintf(stderr, "Piledriver\n");
  1058. #endif
  1059. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1060. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1061. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1062. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1063. #ifdef EXPRECISION
  1064. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1065. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1066. #endif
  1067. #endif
  1068. #ifdef STEAMROLLER
  1069. #ifdef DEBUG
  1070. fprintf(stderr, "Steamroller\n");
  1071. #endif
  1072. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1073. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1074. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1075. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1076. #ifdef EXPRECISION
  1077. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1078. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1079. #endif
  1080. #endif
  1081. #ifdef ZEN
  1082. #ifdef DEBUG
  1083. fprintf(stderr, "Zen\n");
  1084. #endif
  1085. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1086. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1087. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1088. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1089. #ifdef EXPRECISION
  1090. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1091. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1092. #endif
  1093. #endif
  1094. #ifdef NANO
  1095. #ifdef DEBUG
  1096. fprintf(stderr, "NANO\n");
  1097. #endif
  1098. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1099. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1100. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1101. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1102. #ifdef EXPRECISION
  1103. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1104. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1105. #endif
  1106. #endif
  1107. #ifdef CGEMM3M_DEFAULT_P
  1108. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1109. #else
  1110. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1111. #endif
  1112. #ifdef ZGEMM3M_DEFAULT_P
  1113. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1114. #else
  1115. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1116. #endif
  1117. #ifdef EXPRECISION
  1118. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1119. #endif
  1120. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1121. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1122. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1123. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1124. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1125. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1126. #else
  1127. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1128. #endif
  1129. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1130. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1131. #else
  1132. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1133. #endif
  1134. #ifdef QUAD_PRECISION
  1135. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1136. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1137. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1138. #endif
  1139. #ifdef DEBUG
  1140. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1141. #endif
  1142. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1143. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1144. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1145. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1146. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1147. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1148. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1149. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1150. #ifdef EXPRECISION
  1151. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1152. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1153. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1154. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1155. #endif
  1156. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1157. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1158. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1159. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1160. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1161. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1162. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1163. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1164. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1165. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1166. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1167. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1168. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1169. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1170. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1171. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1172. #ifdef EXPRECISION
  1173. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1174. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1175. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1176. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1177. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1178. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1179. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1180. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1181. #endif
  1182. }
  1183. #endif //POWER
  1184. #endif //ZARCH
  1185. #endif //defined(ARCH_ARM64)