You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 47 kB

6 years ago
6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. shstobf16_kTS, shdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  58. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  59. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  60. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, shdot_kTS,
  61. dsdot_kTS,
  62. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  63. sgemv_nTS, sgemv_tTS, sger_kTS,
  64. ssymv_LTS, ssymv_UTS,
  65. shgemm_kernelTS, shgemm_betaTS,
  66. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  67. shgemm_incopyTS, shgemm_itcopyTS,
  68. #else
  69. shgemm_oncopyTS, shgemm_otcopyTS,
  70. #endif
  71. shgemm_oncopyTS, shgemm_otcopyTS,
  72. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  73. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  74. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  75. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  76. #else
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. #endif
  80. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  81. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  82. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  85. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  86. #else
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #endif
  90. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  91. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. ssymm_iutcopyTS, ssymm_iltcopyTS,
  94. #else
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #endif
  97. ssymm_outcopyTS, ssymm_oltcopyTS,
  98. #ifndef NO_LAPACK
  99. sneg_tcopyTS, slaswp_ncopyTS,
  100. #else
  101. NULL,NULL,
  102. #endif
  103. #endif
  104. #if defined( BUILD_SINGLE) || defined(BUILD_COMPLEX)
  105. 0, 0, 0,
  106. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  107. #ifdef SGEMM_DEFAULT_UNROLL_MN
  108. SGEMM_DEFAULT_UNROLL_MN,
  109. #else
  110. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  111. #endif
  112. #endif
  113. #ifdef HAVE_EXCLUSIVE_CACHE
  114. 1,
  115. #else
  116. 0,
  117. #endif
  118. #if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
  119. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  120. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  121. snrm2_kTS, sasum_kTS,
  122. #endif
  123. #ifdef BUILD_SINGLE
  124. ssum_kTS,
  125. #endif
  126. #if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
  127. scopy_kTS, sdot_kTS,
  128. // dsdot_kTS,
  129. srot_kTS, saxpy_kTS,
  130. sscal_kTS,
  131. sswap_kTS,
  132. sgemv_nTS, sgemv_tTS,
  133. #endif
  134. #ifdef BUILD_SINGLE
  135. sger_kTS,
  136. ssymv_LTS, ssymv_UTS,
  137. #ifdef ARCH_X86_64
  138. sgemm_directTS,
  139. sgemm_direct_performantTS,
  140. #endif
  141. #endif
  142. #if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
  143. sgemm_kernelTS, sgemm_betaTS,
  144. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  145. sgemm_incopyTS, sgemm_itcopyTS,
  146. #else
  147. sgemm_oncopyTS, sgemm_otcopyTS,
  148. #endif
  149. sgemm_oncopyTS, sgemm_otcopyTS,
  150. #endif
  151. #ifdef BUILD_SINGLE
  152. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  153. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  154. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  155. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  156. #else
  157. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  158. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  159. #endif
  160. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  161. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  162. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  163. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  164. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  165. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  166. #else
  167. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  168. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  169. #endif
  170. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  171. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  172. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  173. ssymm_iutcopyTS, ssymm_iltcopyTS,
  174. #else
  175. ssymm_outcopyTS, ssymm_oltcopyTS,
  176. #endif
  177. ssymm_outcopyTS, ssymm_oltcopyTS,
  178. #ifndef NO_LAPACK
  179. sneg_tcopyTS, slaswp_ncopyTS,
  180. #else
  181. NULL,NULL,
  182. #endif
  183. #endif
  184. #if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
  185. 0, 0, 0,
  186. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  187. #ifdef DGEMM_DEFAULT_UNROLL_MN
  188. DGEMM_DEFAULT_UNROLL_MN,
  189. #else
  190. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  191. #endif
  192. #endif
  193. #if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
  194. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  195. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  196. dnrm2_kTS, dasum_kTS,
  197. #endif
  198. #if defined (BUILD_DOUBLE)
  199. dsum_kTS,
  200. #endif
  201. #if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
  202. dcopy_kTS, ddot_kTS,
  203. #endif
  204. #if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE)
  205. dsdot_kTS,
  206. #endif
  207. #if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
  208. drot_kTS,
  209. daxpy_kTS,
  210. dscal_kTS,
  211. dswap_kTS,
  212. dgemv_nTS, dgemv_tTS,
  213. #endif
  214. #if defined (BUILD_DOUBLE)
  215. dger_kTS,
  216. dsymv_LTS, dsymv_UTS,
  217. #endif
  218. #if defined (BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
  219. dgemm_kernelTS, dgemm_betaTS,
  220. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  221. dgemm_incopyTS, dgemm_itcopyTS,
  222. #else
  223. dgemm_oncopyTS, dgemm_otcopyTS,
  224. #endif
  225. dgemm_oncopyTS, dgemm_otcopyTS,
  226. #endif
  227. #if defined (BUILD_DOUBLE)
  228. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  229. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  230. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  231. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  232. #else
  233. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  234. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  235. #endif
  236. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  237. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  238. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  239. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  240. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  241. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  242. #else
  243. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  244. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  245. #endif
  246. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  247. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  248. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  249. dsymm_iutcopyTS, dsymm_iltcopyTS,
  250. #else
  251. dsymm_outcopyTS, dsymm_oltcopyTS,
  252. #endif
  253. dsymm_outcopyTS, dsymm_oltcopyTS,
  254. #ifndef NO_LAPACK
  255. dneg_tcopyTS, dlaswp_ncopyTS,
  256. #else
  257. NULL, NULL,
  258. #endif
  259. #endif
  260. #ifdef EXPRECISION
  261. 0, 0, 0,
  262. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  263. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  264. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  265. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  266. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  267. qgemv_nTS, qgemv_tTS, qger_kTS,
  268. qsymv_LTS, qsymv_UTS,
  269. qgemm_kernelTS, qgemm_betaTS,
  270. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  271. qgemm_incopyTS, qgemm_itcopyTS,
  272. #else
  273. qgemm_oncopyTS, qgemm_otcopyTS,
  274. #endif
  275. qgemm_oncopyTS, qgemm_otcopyTS,
  276. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  277. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  278. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  279. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  280. #else
  281. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  282. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  283. #endif
  284. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  285. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  286. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  287. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  288. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  289. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  290. #else
  291. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  292. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  293. #endif
  294. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  295. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  296. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  297. qsymm_iutcopyTS, qsymm_iltcopyTS,
  298. #else
  299. qsymm_outcopyTS, qsymm_oltcopyTS,
  300. #endif
  301. qsymm_outcopyTS, qsymm_oltcopyTS,
  302. #ifndef NO_LAPACK
  303. qneg_tcopyTS, qlaswp_ncopyTS,
  304. #else
  305. NULL, NULL,
  306. #endif
  307. #endif
  308. #ifdef BUILD_COMPLEX
  309. 0, 0, 0,
  310. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  311. #ifdef CGEMM_DEFAULT_UNROLL_MN
  312. CGEMM_DEFAULT_UNROLL_MN,
  313. #else
  314. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  315. #endif
  316. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  317. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  318. cdotu_kTS, cdotc_kTS, csrot_kTS,
  319. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  320. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  321. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  322. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  323. csymv_LTS, csymv_UTS,
  324. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  325. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  326. cgemm_betaTS,
  327. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  328. cgemm_incopyTS, cgemm_itcopyTS,
  329. #else
  330. cgemm_oncopyTS, cgemm_otcopyTS,
  331. #endif
  332. cgemm_oncopyTS, cgemm_otcopyTS,
  333. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  334. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  335. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  336. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  337. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  338. #else
  339. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  340. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  341. #endif
  342. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  343. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  344. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  345. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  346. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  347. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  348. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  349. #else
  350. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  351. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  352. #endif
  353. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  354. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  355. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  356. csymm_iutcopyTS, csymm_iltcopyTS,
  357. #else
  358. csymm_outcopyTS, csymm_oltcopyTS,
  359. #endif
  360. csymm_outcopyTS, csymm_oltcopyTS,
  361. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  362. chemm_iutcopyTS, chemm_iltcopyTS,
  363. #else
  364. chemm_outcopyTS, chemm_oltcopyTS,
  365. #endif
  366. chemm_outcopyTS, chemm_oltcopyTS,
  367. 0, 0, 0,
  368. #if defined(USE_GEMM3M)
  369. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  370. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  371. #else
  372. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  373. #endif
  374. cgemm3m_kernelTS,
  375. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  376. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  377. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  378. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  379. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  380. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  381. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  382. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  383. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  384. csymm3m_oucopybTS, csymm3m_olcopybTS,
  385. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  386. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  387. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  388. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  389. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  390. chemm3m_oucopybTS, chemm3m_olcopybTS,
  391. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  392. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  393. #else
  394. 0, 0, 0,
  395. NULL,
  396. NULL, NULL,
  397. NULL, NULL,
  398. NULL, NULL,
  399. NULL, NULL,
  400. NULL, NULL,
  401. NULL, NULL,
  402. NULL, NULL,
  403. NULL, NULL,
  404. NULL, NULL,
  405. NULL, NULL,
  406. NULL, NULL,
  407. NULL, NULL,
  408. NULL, NULL,
  409. NULL, NULL,
  410. NULL, NULL,
  411. NULL, NULL,
  412. NULL, NULL,
  413. NULL, NULL,
  414. #endif
  415. #ifndef NO_LAPACK
  416. cneg_tcopyTS, claswp_ncopyTS,
  417. #else
  418. NULL, NULL,
  419. #endif
  420. #endif
  421. #ifdef BUILD_COMPLEX16
  422. 0, 0, 0,
  423. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  424. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  425. ZGEMM_DEFAULT_UNROLL_MN,
  426. #else
  427. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  428. #endif
  429. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  430. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  431. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  432. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  433. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  434. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  435. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  436. zsymv_LTS, zsymv_UTS,
  437. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  438. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  439. zgemm_betaTS,
  440. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  441. zgemm_incopyTS, zgemm_itcopyTS,
  442. #else
  443. zgemm_oncopyTS, zgemm_otcopyTS,
  444. #endif
  445. zgemm_oncopyTS, zgemm_otcopyTS,
  446. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  447. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  448. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  449. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  450. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  451. #else
  452. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  453. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  454. #endif
  455. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  456. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  457. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  458. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  459. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  460. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  461. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  462. #else
  463. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  464. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  465. #endif
  466. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  467. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  468. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  469. zsymm_iutcopyTS, zsymm_iltcopyTS,
  470. #else
  471. zsymm_outcopyTS, zsymm_oltcopyTS,
  472. #endif
  473. zsymm_outcopyTS, zsymm_oltcopyTS,
  474. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  475. zhemm_iutcopyTS, zhemm_iltcopyTS,
  476. #else
  477. zhemm_outcopyTS, zhemm_oltcopyTS,
  478. #endif
  479. zhemm_outcopyTS, zhemm_oltcopyTS,
  480. 0, 0, 0,
  481. #if defined(USE_GEMM3M)
  482. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  483. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  484. #else
  485. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  486. #endif
  487. zgemm3m_kernelTS,
  488. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  489. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  490. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  491. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  492. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  493. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  494. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  495. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  496. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  497. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  498. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  499. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  500. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  501. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  502. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  503. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  504. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  505. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  506. #else
  507. 0, 0, 0,
  508. NULL,
  509. NULL, NULL,
  510. NULL, NULL,
  511. NULL, NULL,
  512. NULL, NULL,
  513. NULL, NULL,
  514. NULL, NULL,
  515. NULL, NULL,
  516. NULL, NULL,
  517. NULL, NULL,
  518. NULL, NULL,
  519. NULL, NULL,
  520. NULL, NULL,
  521. NULL, NULL,
  522. NULL, NULL,
  523. NULL, NULL,
  524. NULL, NULL,
  525. NULL, NULL,
  526. NULL, NULL,
  527. #endif
  528. #ifndef NO_LAPACK
  529. zneg_tcopyTS, zlaswp_ncopyTS,
  530. #else
  531. NULL, NULL,
  532. #endif
  533. #endif
  534. #ifdef EXPRECISION
  535. 0, 0, 0,
  536. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  537. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  538. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  539. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  540. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  541. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  542. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  543. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  544. xsymv_LTS, xsymv_UTS,
  545. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  546. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  547. xgemm_betaTS,
  548. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  549. xgemm_incopyTS, xgemm_itcopyTS,
  550. #else
  551. xgemm_oncopyTS, xgemm_otcopyTS,
  552. #endif
  553. xgemm_oncopyTS, xgemm_otcopyTS,
  554. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  555. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  556. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  557. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  558. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  559. #else
  560. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  561. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  562. #endif
  563. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  564. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  565. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  566. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  567. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  568. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  569. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  570. #else
  571. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  572. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  573. #endif
  574. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  575. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  576. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  577. xsymm_iutcopyTS, xsymm_iltcopyTS,
  578. #else
  579. xsymm_outcopyTS, xsymm_oltcopyTS,
  580. #endif
  581. xsymm_outcopyTS, xsymm_oltcopyTS,
  582. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  583. xhemm_iutcopyTS, xhemm_iltcopyTS,
  584. #else
  585. xhemm_outcopyTS, xhemm_oltcopyTS,
  586. #endif
  587. xhemm_outcopyTS, xhemm_oltcopyTS,
  588. 0, 0, 0,
  589. #if defined(USE_GEMM3M)
  590. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  591. xgemm3m_kernelTS,
  592. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  593. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  594. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  595. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  596. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  597. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  598. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  599. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  600. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  601. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  602. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  603. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  604. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  605. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  606. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  607. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  608. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  609. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  610. #else
  611. 0, 0, 0,
  612. NULL,
  613. NULL, NULL,
  614. NULL, NULL,
  615. NULL, NULL,
  616. NULL, NULL,
  617. NULL, NULL,
  618. NULL, NULL,
  619. NULL, NULL,
  620. NULL, NULL,
  621. NULL, NULL,
  622. NULL, NULL,
  623. NULL, NULL,
  624. NULL, NULL,
  625. NULL, NULL,
  626. NULL, NULL,
  627. NULL, NULL,
  628. NULL, NULL,
  629. NULL, NULL,
  630. NULL, NULL,
  631. #endif
  632. #ifndef NO_LAPACK
  633. xneg_tcopyTS, xlaswp_ncopyTS,
  634. #else
  635. NULL, NULL,
  636. #endif
  637. #endif
  638. init_parameter,
  639. SNUMOPT, DNUMOPT, QNUMOPT,
  640. #ifdef BUILD_SINGLE
  641. saxpby_kTS,
  642. #endif
  643. #ifdef BUILD_DOUBLE
  644. daxpby_kTS,
  645. #endif
  646. #ifdef BUILD_COMPLEX
  647. caxpby_kTS,
  648. #endif
  649. #ifdef BUILD_COMPLEX16
  650. zaxpby_kTS,
  651. #endif
  652. #ifdef BUILD_SINGLE
  653. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  654. #endif
  655. #ifdef BUILD_DOUBLE
  656. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  657. #endif
  658. #ifdef BUILD_COMPLEX
  659. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  660. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  661. #endif
  662. #ifdef BUILD_COMPLEX16
  663. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  664. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  665. #endif
  666. #ifdef BUILD_SINGLE
  667. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  668. #endif
  669. #ifdef BUILD_DOUBLE
  670. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  671. #endif
  672. #ifdef BUILD_COMPLEX
  673. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  674. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  675. #endif
  676. #ifdef BUILD_COMPLEX16
  677. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  678. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  679. #endif
  680. #ifdef BUILD_SINGLE
  681. sgeadd_kTS,
  682. #endif
  683. #ifdef BUILD_DOUBLE
  684. dgeadd_kTS,
  685. #endif
  686. #ifdef BUILD_COMPLEX
  687. cgeadd_kTS,
  688. #endif
  689. #ifdef BUILD_COMPLEX16
  690. zgeadd_kTS
  691. #endif
  692. };
  693. #if defined(ARCH_ARM64)
  694. static void init_parameter(void) {
  695. #if defined(BUILD_HALF)
  696. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  697. #endif
  698. #if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
  699. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  700. #endif
  701. #ifdef BUILD_DOUBLE
  702. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  703. #endif
  704. #ifdef BUILD_COMPLEX
  705. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  706. #endif
  707. #ifdef BUILD_COMPLEX16
  708. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  709. #endif
  710. #if defined(BUILD_HALF)
  711. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  712. #endif
  713. #ifdef BUILD_SINGLE
  714. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  715. #endif
  716. #ifdef BUILD_DOUBLE
  717. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  718. #endif
  719. #ifdef BUILD_COMPLEX
  720. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  721. #endif
  722. #ifdef BUILD_COMPLEX16
  723. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  724. #endif
  725. #if defined(BUILD_HALF)
  726. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  727. #endif
  728. #ifdef BUILD_SINGLE
  729. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  730. #endif
  731. #ifdef BUILD_DOUBLE
  732. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  733. #endif
  734. #ifdef BUILD_COMPLEX
  735. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  736. #endif
  737. #ifdef BUILD_COMPLEX16
  738. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  739. #endif
  740. #ifdef EXPRECISION
  741. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  742. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  743. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  744. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  745. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  746. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  747. #endif
  748. #if defined(USE_GEMM3M)
  749. #ifdef CGEMM3M_DEFAULT_P
  750. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  751. #else
  752. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  753. #endif
  754. #ifdef ZGEMM3M_DEFAULT_P
  755. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  756. #else
  757. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  758. #endif
  759. #ifdef CGEMM3M_DEFAULT_Q
  760. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  761. #else
  762. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  763. #endif
  764. #ifdef ZGEMM3M_DEFAULT_Q
  765. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  766. #else
  767. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  768. #endif
  769. #ifdef CGEMM3M_DEFAULT_R
  770. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  771. #else
  772. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  773. #endif
  774. #ifdef ZGEMM3M_DEFAULT_R
  775. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  776. #else
  777. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  778. #endif
  779. #ifdef EXPRECISION
  780. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  781. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  782. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  783. #endif
  784. #endif
  785. }
  786. #else // defined(ARCH_ARM64)
  787. #if defined(ARCH_POWER)
  788. static void init_parameter(void) {
  789. #ifdef BUILD_HALF
  790. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  791. #endif
  792. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  793. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  794. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  795. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  796. #ifdef BUILD_HALF
  797. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  798. #endif
  799. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  800. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  801. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  802. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  803. #ifdef BUILD_HALF
  804. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  805. #endif
  806. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  807. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  808. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  809. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  810. }
  811. #else //POWER
  812. #if defined(ARCH_ZARCH)
  813. static void init_parameter(void) {
  814. #ifdef BUILD_HALF
  815. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  816. #endif
  817. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  818. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  819. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  820. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  821. #ifdef BUILD_HALF
  822. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  823. #endif
  824. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  825. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  826. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  827. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  828. #ifdef BUILD_HALF
  829. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  830. #endif
  831. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  832. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  833. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  834. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  835. }
  836. #else //ZARCH
  837. #ifdef ARCH_X86
  838. static int get_l2_size_old(void){
  839. int i, eax, ebx, ecx, edx, cpuid_level;
  840. int info[15];
  841. cpuid(2, &eax, &ebx, &ecx, &edx);
  842. info[ 0] = BITMASK(eax, 8, 0xff);
  843. info[ 1] = BITMASK(eax, 16, 0xff);
  844. info[ 2] = BITMASK(eax, 24, 0xff);
  845. info[ 3] = BITMASK(ebx, 0, 0xff);
  846. info[ 4] = BITMASK(ebx, 8, 0xff);
  847. info[ 5] = BITMASK(ebx, 16, 0xff);
  848. info[ 6] = BITMASK(ebx, 24, 0xff);
  849. info[ 7] = BITMASK(ecx, 0, 0xff);
  850. info[ 8] = BITMASK(ecx, 8, 0xff);
  851. info[ 9] = BITMASK(ecx, 16, 0xff);
  852. info[10] = BITMASK(ecx, 24, 0xff);
  853. info[11] = BITMASK(edx, 0, 0xff);
  854. info[12] = BITMASK(edx, 8, 0xff);
  855. info[13] = BITMASK(edx, 16, 0xff);
  856. info[14] = BITMASK(edx, 24, 0xff);
  857. for (i = 0; i < 15; i++){
  858. switch (info[i]){
  859. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  860. case 0x1a :
  861. return 96;
  862. case 0x39 :
  863. case 0x3b :
  864. case 0x41 :
  865. case 0x79 :
  866. case 0x81 :
  867. return 128;
  868. case 0x3a :
  869. return 192;
  870. case 0x21 :
  871. case 0x3c :
  872. case 0x42 :
  873. case 0x7a :
  874. case 0x7e :
  875. case 0x82 :
  876. return 256;
  877. case 0x3d :
  878. return 384;
  879. case 0x3e :
  880. case 0x43 :
  881. case 0x7b :
  882. case 0x7f :
  883. case 0x83 :
  884. case 0x86 :
  885. return 512;
  886. case 0x44 :
  887. case 0x78 :
  888. case 0x7c :
  889. case 0x84 :
  890. case 0x87 :
  891. return 1024;
  892. case 0x45 :
  893. case 0x7d :
  894. case 0x85 :
  895. return 2048;
  896. case 0x48 :
  897. return 3184;
  898. case 0x49 :
  899. return 4096;
  900. case 0x4e :
  901. return 6144;
  902. }
  903. }
  904. // return 0;
  905. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  906. return 256;
  907. }
  908. #endif
  909. static __inline__ int get_l2_size(void){
  910. int eax, ebx, ecx, edx, l2;
  911. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  912. l2 = BITMASK(ecx, 16, 0xffff);
  913. #ifndef ARCH_X86
  914. if (l2 <= 0) {
  915. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  916. return 256;
  917. }
  918. return l2;
  919. #else
  920. if (l2 > 0) return l2;
  921. return get_l2_size_old();
  922. #endif
  923. }
  924. static __inline__ int get_l3_size(void){
  925. int eax, ebx, ecx, edx;
  926. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  927. return BITMASK(edx, 18, 0x3fff) * 512;
  928. }
  929. static void init_parameter(void) {
  930. int l2 = get_l2_size();
  931. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  932. /* where the GEMM unrolling parameters do not depend on l2 */
  933. #ifdef BUILD_HALF
  934. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  935. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  936. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  937. #endif
  938. #ifdef BUILD_SINGLE
  939. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  940. #endif
  941. #ifdef BUILD_DOUBLE
  942. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  943. #endif
  944. #ifdef BUILD_COMPLEX
  945. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  946. #endif
  947. #ifdef BUILD_COMPLEX16
  948. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  949. #endif
  950. #ifdef BUILD_COMPLEX
  951. #ifdef CGEMM3M_DEFAULT_Q
  952. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  953. #else
  954. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  955. #endif
  956. #endif
  957. #ifdef BUILD_COMPLEX16
  958. #ifdef ZGEMM3M_DEFAULT_Q
  959. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  960. #else
  961. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  962. #endif
  963. #endif
  964. #ifdef EXPRECISION
  965. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  966. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  967. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  968. #endif
  969. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  970. #ifdef DEBUG
  971. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  972. #endif
  973. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  974. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  975. #endif
  976. #ifdef BUILD_DOUBLE
  977. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  978. #endif
  979. #ifdef BUILD_COMPLEX
  980. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  981. #endif
  982. #ifdef BUILD_COMPLEX16
  983. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  984. #endif
  985. #ifdef EXPRECISION
  986. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  987. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  988. #endif
  989. #endif
  990. #ifdef CORE_NORTHWOOD
  991. #ifdef DEBUG
  992. fprintf(stderr, "Northwood\n");
  993. #endif
  994. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  995. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  996. #endif
  997. #ifdef BUILD_DOUBLE
  998. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  999. #endif
  1000. #ifdef BUILD_COMPLEX
  1001. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1002. #endif
  1003. #ifdef BUILD_COMPLEX16
  1004. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1005. #endif
  1006. #ifdef EXPRECISION
  1007. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1008. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1009. #endif
  1010. #endif
  1011. #ifdef ATOM
  1012. #ifdef DEBUG
  1013. fprintf(stderr, "Atom\n");
  1014. #endif
  1015. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1016. TABLE_NAME.sgemm_p = 256;
  1017. #endif
  1018. #ifdef BUILD_DOUBLE
  1019. TABLE_NAME.dgemm_p = 128;
  1020. #endif
  1021. #ifdef BUILD_COMPLEX
  1022. TABLE_NAME.cgemm_p = 128;
  1023. #endif
  1024. #ifdef BUILD_COMPLEX16
  1025. TABLE_NAME.zgemm_p = 64;
  1026. #endif
  1027. #ifdef EXPRECISION
  1028. TABLE_NAME.qgemm_p = 64;
  1029. TABLE_NAME.xgemm_p = 32;
  1030. #endif
  1031. #endif
  1032. #ifdef CORE_PRESCOTT
  1033. #ifdef DEBUG
  1034. fprintf(stderr, "Prescott\n");
  1035. #endif
  1036. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1037. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1038. #endif
  1039. #ifdef BUILD_DOUBLE
  1040. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1041. #endif
  1042. #ifdef BUILD_COMPLEX
  1043. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1044. #endif
  1045. #ifdef BUILD_COMPLEX16
  1046. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1047. #endif
  1048. #ifdef EXPRECISION
  1049. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1050. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1051. #endif
  1052. #endif
  1053. #ifdef CORE2
  1054. #ifdef DEBUG
  1055. fprintf(stderr, "Core2\n");
  1056. #endif
  1057. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1058. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1059. #endif
  1060. #ifdef BUILD_DOUBLE
  1061. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1062. #endif
  1063. #ifdef BUILD_COMPLEX
  1064. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1065. #endif
  1066. #ifdef BUILD_COMPLEX16
  1067. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1068. #endif
  1069. #ifdef EXPRECISION
  1070. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1071. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1072. #endif
  1073. #endif
  1074. #ifdef PENRYN
  1075. #ifdef DEBUG
  1076. fprintf(stderr, "Penryn\n");
  1077. #endif
  1078. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1079. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1080. #endif
  1081. #ifdef BUILD_DOUBLE
  1082. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1083. #endif
  1084. #ifdef BUILD_COMPLEX
  1085. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1086. #endif
  1087. #ifdef BUILD_COMPLEX16
  1088. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1089. #endif
  1090. #ifdef EXPRECISION
  1091. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1092. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1093. #endif
  1094. #endif
  1095. #ifdef DUNNINGTON
  1096. #ifdef DEBUG
  1097. fprintf(stderr, "Dunnington\n");
  1098. #endif
  1099. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1100. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1101. #endif
  1102. #ifdef BUILD_DOUBLE
  1103. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1104. #endif
  1105. #ifdef BUILD_COMPLEX
  1106. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1107. #endif
  1108. #ifdef BUILD_COMPLEX16
  1109. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1110. #endif
  1111. #ifdef EXPRECISION
  1112. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1113. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1114. #endif
  1115. #endif
  1116. #ifdef NEHALEM
  1117. #ifdef DEBUG
  1118. fprintf(stderr, "Nehalem\n");
  1119. #endif
  1120. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1121. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1122. #endif
  1123. #ifdef BUILD_DOUBLE
  1124. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1125. #endif
  1126. #ifdef BUILD_COMPLEX
  1127. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1128. #endif
  1129. #ifdef BUILD_COMPLEX16
  1130. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1131. #endif
  1132. #ifdef EXPRECISION
  1133. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1134. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1135. #endif
  1136. #endif
  1137. #ifdef SANDYBRIDGE
  1138. #ifdef DEBUG
  1139. fprintf(stderr, "Sandybridge\n");
  1140. #endif
  1141. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1142. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1143. #endif
  1144. #ifdef BUILD_DOUBLE
  1145. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1146. #endif
  1147. #ifdef BUILD_COMPLEX
  1148. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1149. #endif
  1150. #ifdef BUILD_COMPLEX16
  1151. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1152. #endif
  1153. #ifdef EXPRECISION
  1154. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1155. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1156. #endif
  1157. #endif
  1158. #ifdef HASWELL
  1159. #ifdef DEBUG
  1160. fprintf(stderr, "Haswell\n");
  1161. #endif
  1162. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1163. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1164. #endif
  1165. #ifdef BUILD_DOUBLE
  1166. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1167. #endif
  1168. #ifdef BUILD_COMPLEX
  1169. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1170. #endif
  1171. #ifdef BUILD_COMPLEX16
  1172. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1173. #endif
  1174. #ifdef EXPRECISION
  1175. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1176. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1177. #endif
  1178. #endif
  1179. #if defined (SKYLAKEX) || defined (COOPERLAKE)
  1180. #ifdef DEBUG
  1181. fprintf(stderr, "SkylakeX\n");
  1182. #endif
  1183. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1184. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1185. #endif
  1186. #ifdef BUILD_DOUBLE
  1187. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1188. #endif
  1189. #ifdef BUILD_COMPLEX
  1190. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1191. #endif
  1192. #ifdef BUILD_COMPLEX16
  1193. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1194. #endif
  1195. #ifdef EXPRECISION
  1196. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1197. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1198. #endif
  1199. #endif
  1200. #ifdef OPTERON
  1201. #ifdef DEBUG
  1202. fprintf(stderr, "Opteron\n");
  1203. #endif
  1204. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1205. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1206. #endif
  1207. #ifdef BUILD_DOUBLE
  1208. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1209. #endif
  1210. #ifdef BUILD_COMPLEX
  1211. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1212. #endif
  1213. #ifdef BUILD_COMPLEX16
  1214. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1215. #endif
  1216. #ifdef EXPRECISION
  1217. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1218. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1219. #endif
  1220. #endif
  1221. #ifdef BARCELONA
  1222. #ifdef DEBUG
  1223. fprintf(stderr, "Barcelona\n");
  1224. #endif
  1225. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1226. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1227. #endif
  1228. #ifdef BUILD_DOUBLE
  1229. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1230. #endif
  1231. #ifdef BUILD_COMPLEX
  1232. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1233. #endif
  1234. #ifdef BUILD_COMPLEX16
  1235. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1236. #endif
  1237. #ifdef EXPRECISION
  1238. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1239. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1240. #endif
  1241. #endif
  1242. #ifdef BOBCAT
  1243. #ifdef DEBUG
  1244. fprintf(stderr, "Bobcate\n");
  1245. #endif
  1246. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1247. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1248. #endif
  1249. #ifdef BUILD_DOUBLE
  1250. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1251. #endif
  1252. #ifdef BUILD_COMPLEX
  1253. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1254. #endif
  1255. #ifdef BUILD_COMPLEX16
  1256. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1257. #endif
  1258. #ifdef EXPRECISION
  1259. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1260. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1261. #endif
  1262. #endif
  1263. #ifdef BULLDOZER
  1264. #ifdef DEBUG
  1265. fprintf(stderr, "Bulldozer\n");
  1266. #endif
  1267. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1268. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1269. #endif
  1270. #ifdef BUILD_DOUBLE
  1271. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1272. #endif
  1273. #ifdef BUILD_COMPLEX
  1274. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1275. #endif
  1276. #ifdef BUILD_COMPLEX16
  1277. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1278. #endif
  1279. #ifdef EXPRECISION
  1280. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1281. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1282. #endif
  1283. #endif
  1284. #ifdef EXCAVATOR
  1285. #ifdef DEBUG
  1286. fprintf(stderr, "Excavator\n");
  1287. #endif
  1288. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1289. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1290. #endif
  1291. #ifdef BUILD_DOUBLE
  1292. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1293. #endif
  1294. #ifdef BUILD_COMPLEX
  1295. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1296. #endif
  1297. #ifdef BUILD_COMPLEX16
  1298. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1299. #endif
  1300. #ifdef EXPRECISION
  1301. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1302. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1303. #endif
  1304. #endif
  1305. #ifdef PILEDRIVER
  1306. #ifdef DEBUG
  1307. fprintf(stderr, "Piledriver\n");
  1308. #endif
  1309. #if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1310. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1311. #endif
  1312. #ifdef BUILD_DOUBLE
  1313. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1314. #endif
  1315. #ifdef BUILD_COMPLEX
  1316. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1317. #endif
  1318. #ifdef BUILD_COMPLEX16
  1319. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1320. #endif
  1321. #ifdef EXPRECISION
  1322. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1323. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1324. #endif
  1325. #endif
  1326. #ifdef STEAMROLLER
  1327. #ifdef DEBUG
  1328. fprintf(stderr, "Steamroller\n");
  1329. #endif
  1330. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1331. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1332. #endif
  1333. #ifdef BUILD_DOUBLE
  1334. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1335. #endif
  1336. #ifdef BUILD_COMPLEX
  1337. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1338. #endif
  1339. #ifdef BUILD_COMPLEX16
  1340. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1341. #endif
  1342. #ifdef EXPRECISION
  1343. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1344. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1345. #endif
  1346. #endif
  1347. #ifdef ZEN
  1348. #ifdef DEBUG
  1349. fprintf(stderr, "Zen\n");
  1350. #endif
  1351. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1352. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1353. #endif
  1354. #ifdef BUILD_DOUBLE
  1355. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1356. #endif
  1357. #ifdef BUILD_COMPLEX
  1358. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1359. #endif
  1360. #ifdef BUILD_COMPLEX16
  1361. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1362. #endif
  1363. #ifdef EXPRECISION
  1364. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1365. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1366. #endif
  1367. #endif
  1368. #ifdef NANO
  1369. #ifdef DEBUG
  1370. fprintf(stderr, "NANO\n");
  1371. #endif
  1372. #if defined (BUILD_SINGLE) || defined(BUILD_COMPLEX)
  1373. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1374. #endif
  1375. #ifdef BUILD_DOUBLE
  1376. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1377. #endif
  1378. #ifdef BUILD_COMPLEX
  1379. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1380. #endif
  1381. #ifdef BUILD_COMPLEX16
  1382. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1383. #endif
  1384. #ifdef EXPRECISION
  1385. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1386. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1387. #endif
  1388. #endif
  1389. #ifdef BUILD_COMPLEX
  1390. #ifdef CGEMM3M_DEFAULT_P
  1391. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1392. #else
  1393. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1394. #endif
  1395. #endif
  1396. #ifdef BUILD_COMPLEX16
  1397. #ifdef ZGEMM3M_DEFAULT_P
  1398. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1399. #else
  1400. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1401. #endif
  1402. #endif
  1403. #ifdef EXPRECISION
  1404. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1405. #endif
  1406. #ifdef BUILD_SINGLE
  1407. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1408. #endif
  1409. #ifdef BUILD_DOUBLE
  1410. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1411. #endif
  1412. #ifdef BUILD_COMPLEX
  1413. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1414. #endif
  1415. #ifdef BUILD_COMPLEX16
  1416. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1417. #endif
  1418. #ifdef BUILD_COMPLEX
  1419. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1420. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1421. #else
  1422. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1423. #endif
  1424. #endif
  1425. #ifdef BUILD_COMPLEX16
  1426. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1427. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1428. #else
  1429. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1430. #endif
  1431. #endif
  1432. #ifdef QUAD_PRECISION
  1433. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1434. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1435. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1436. #endif
  1437. #ifdef DEBUG
  1438. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1439. #endif
  1440. #ifdef BUILD_SINGLE
  1441. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1442. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1443. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1444. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1445. #endif
  1446. #ifdef BUILD_DOUBLE
  1447. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1448. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1449. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1450. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1451. #endif
  1452. #ifdef EXPRECISION
  1453. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1454. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1455. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1456. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1457. #endif
  1458. #ifdef BUILD_COMPLEX
  1459. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1460. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1461. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1462. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1463. #endif
  1464. #ifdef BUILD_COMPLEX16
  1465. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1466. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1467. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1468. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1469. #endif
  1470. #ifdef BUILD_COMPLEX
  1471. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1472. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1473. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1474. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1475. #endif
  1476. #ifdef BUILD_COMPLEX16
  1477. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1478. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1479. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1480. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1481. #endif
  1482. #ifdef EXPRECISION
  1483. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1484. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1485. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1486. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1487. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1488. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1489. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1490. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1491. #endif
  1492. }
  1493. #endif //POWER
  1494. #endif //ZARCH
  1495. #endif //defined(ARCH_ARM64)