You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 58 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  114. 0, 0, 0,
  115. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SGEMM_DEFAULT_UNROLL_MN
  117. SGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. #endif
  122. #ifdef HAVE_EXCLUSIVE_CACHE
  123. 1,
  124. #else
  125. 0,
  126. #endif
  127. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  128. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  129. #endif
  130. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  131. isamax_kTS,
  132. #endif
  133. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  134. isamin_kTS, ismax_kTS, ismin_kTS,
  135. snrm2_kTS, sasum_kTS,
  136. #endif
  137. #if BUILD_SINGLE == 1
  138. ssum_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. scopy_kTS, sdot_kTS,
  142. // dsdot_kTS,
  143. srot_kTS, srotm_kTS, saxpy_kTS,
  144. #endif
  145. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  146. sscal_kTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. sswap_kTS,
  150. sgemv_nTS, sgemv_tTS,
  151. #endif
  152. #if BUILD_SINGLE == 1
  153. sger_kTS,
  154. #endif
  155. #if BUILD_SINGLE == 1
  156. ssymv_LTS, ssymv_UTS,
  157. #endif
  158. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  159. #ifdef ARCH_X86_64
  160. sgemm_directTS,
  161. sgemm_direct_performantTS,
  162. #endif
  163. #ifdef ARCH_ARM64
  164. #ifdef HAVE_SME
  165. sgemm_directTS,
  166. #endif
  167. #endif
  168. sgemm_kernelTS, sgemm_betaTS,
  169. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  170. sgemm_incopyTS, sgemm_itcopyTS,
  171. #else
  172. sgemm_oncopyTS, sgemm_otcopyTS,
  173. #endif
  174. sgemm_oncopyTS, sgemm_otcopyTS,
  175. #endif
  176. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  177. #ifdef SMALL_MATRIX_OPT
  178. sgemm_small_matrix_permitTS,
  179. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  180. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  181. #endif
  182. #endif
  183. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  184. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  185. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  186. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  187. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  188. #else
  189. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  190. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  191. #endif
  192. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  193. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  194. #endif
  195. #if (BUILD_SINGLE==1)
  196. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  197. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  198. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  199. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  200. #else
  201. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  202. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  203. #endif
  204. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  205. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  206. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  207. ssymm_iutcopyTS, ssymm_iltcopyTS,
  208. #else
  209. ssymm_outcopyTS, ssymm_oltcopyTS,
  210. #endif
  211. ssymm_outcopyTS, ssymm_oltcopyTS,
  212. #ifndef NO_LAPACK
  213. sneg_tcopyTS, slaswp_ncopyTS,
  214. #else
  215. NULL,NULL,
  216. #endif
  217. #endif
  218. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  219. 0, 0, 0,
  220. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  221. #ifdef DGEMM_DEFAULT_UNROLL_MN
  222. DGEMM_DEFAULT_UNROLL_MN,
  223. #else
  224. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  225. #endif
  226. #endif
  227. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  228. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  229. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  230. dnrm2_kTS, dasum_kTS,
  231. #endif
  232. #if (BUILD_DOUBLE==1)
  233. dsum_kTS,
  234. #endif
  235. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  236. dcopy_kTS, ddot_kTS,
  237. #endif
  238. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  239. dsdot_kTS,
  240. #endif
  241. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  242. drot_kTS,
  243. drotm_kTS,
  244. daxpy_kTS,
  245. dscal_kTS,
  246. dswap_kTS,
  247. dgemv_nTS, dgemv_tTS,
  248. #endif
  249. #if (BUILD_DOUBLE==1)
  250. dger_kTS,
  251. dsymv_LTS, dsymv_UTS,
  252. #endif
  253. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  254. dgemm_kernelTS, dgemm_betaTS,
  255. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  256. dgemm_incopyTS, dgemm_itcopyTS,
  257. #else
  258. dgemm_oncopyTS, dgemm_otcopyTS,
  259. #endif
  260. dgemm_oncopyTS, dgemm_otcopyTS,
  261. #endif
  262. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  263. #ifdef SMALL_MATRIX_OPT
  264. dgemm_small_matrix_permitTS,
  265. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  266. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  267. #endif
  268. #endif
  269. #if (BUILD_DOUBLE==1)
  270. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  271. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  272. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  273. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  274. #else
  275. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  276. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  277. #endif
  278. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  279. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  280. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  281. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  282. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  283. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  284. #else
  285. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  286. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  287. #endif
  288. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  289. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  290. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  291. dsymm_iutcopyTS, dsymm_iltcopyTS,
  292. #else
  293. dsymm_outcopyTS, dsymm_oltcopyTS,
  294. #endif
  295. dsymm_outcopyTS, dsymm_oltcopyTS,
  296. #ifndef NO_LAPACK
  297. dneg_tcopyTS, dlaswp_ncopyTS,
  298. #else
  299. NULL, NULL,
  300. #endif
  301. #endif
  302. #ifdef EXPRECISION
  303. 0, 0, 0,
  304. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  305. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  306. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  307. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  308. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  309. qgemv_nTS, qgemv_tTS, qger_kTS,
  310. qsymv_LTS, qsymv_UTS,
  311. qgemm_kernelTS, qgemm_betaTS,
  312. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  313. qgemm_incopyTS, qgemm_itcopyTS,
  314. #else
  315. qgemm_oncopyTS, qgemm_otcopyTS,
  316. #endif
  317. qgemm_oncopyTS, qgemm_otcopyTS,
  318. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  319. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  320. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  321. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  322. #else
  323. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  324. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  325. #endif
  326. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  327. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  328. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  329. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  330. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  331. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  332. #else
  333. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  334. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  335. #endif
  336. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  337. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  338. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  339. qsymm_iutcopyTS, qsymm_iltcopyTS,
  340. #else
  341. qsymm_outcopyTS, qsymm_oltcopyTS,
  342. #endif
  343. qsymm_outcopyTS, qsymm_oltcopyTS,
  344. #ifndef NO_LAPACK
  345. qneg_tcopyTS, qlaswp_ncopyTS,
  346. #else
  347. NULL, NULL,
  348. #endif
  349. #endif
  350. #if (BUILD_COMPLEX)
  351. 0, 0, 0,
  352. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  353. #ifdef CGEMM_DEFAULT_UNROLL_MN
  354. CGEMM_DEFAULT_UNROLL_MN,
  355. #else
  356. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  357. #endif
  358. #if (BUILD_COMPLEX)
  359. camax_kTS, camin_kTS,
  360. #endif
  361. #if (BUILD_COMPLEX)
  362. icamax_kTS,
  363. #endif
  364. #if (BUILD_COMPLEX)
  365. icamin_kTS,
  366. cnrm2_kTS, casum_kTS, csum_kTS,
  367. #endif
  368. #if (BUILD_COMPLEX)
  369. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  370. #endif
  371. #if (BUILD_COMPLEX)
  372. csrot_kTS,
  373. #endif
  374. #if (BUILD_COMPLEX)
  375. caxpy_kTS,
  376. caxpyc_kTS,
  377. cscal_kTS,
  378. cswap_kTS,
  379. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  380. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  381. #endif
  382. #if (BUILD_COMPLEX)
  383. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  384. csymv_LTS, csymv_UTS,
  385. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  386. #endif
  387. #if (BUILD_COMPLEX)
  388. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  389. cgemm_betaTS,
  390. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  391. cgemm_incopyTS, cgemm_itcopyTS,
  392. #else
  393. cgemm_oncopyTS, cgemm_otcopyTS,
  394. #endif
  395. cgemm_oncopyTS, cgemm_otcopyTS,
  396. #ifdef SMALL_MATRIX_OPT
  397. cgemm_small_matrix_permitTS,
  398. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  399. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  400. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  401. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  402. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  403. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  404. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  405. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  406. #endif
  407. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  408. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  409. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  410. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  411. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  412. #else
  413. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  414. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  415. #endif
  416. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  417. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  418. #endif
  419. #endif
  420. #if (BUILD_COMPLEX)
  421. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  422. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  423. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  424. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  425. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  426. #else
  427. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  428. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  429. #endif
  430. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  431. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  432. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  433. csymm_iutcopyTS, csymm_iltcopyTS,
  434. #else
  435. csymm_outcopyTS, csymm_oltcopyTS,
  436. #endif
  437. csymm_outcopyTS, csymm_oltcopyTS,
  438. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  439. chemm_iutcopyTS, chemm_iltcopyTS,
  440. #else
  441. chemm_outcopyTS, chemm_oltcopyTS,
  442. #endif
  443. chemm_outcopyTS, chemm_oltcopyTS,
  444. 0, 0, 0,
  445. #if (USE_GEMM3M)
  446. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  447. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  448. #else
  449. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  450. #endif
  451. cgemm3m_kernelTS,
  452. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  453. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  454. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  455. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  456. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  457. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  458. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  459. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  460. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  461. csymm3m_oucopybTS, csymm3m_olcopybTS,
  462. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  463. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  464. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  465. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  466. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  467. chemm3m_oucopybTS, chemm3m_olcopybTS,
  468. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  469. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  470. #else
  471. 0, 0, 0,
  472. NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. NULL, NULL,
  483. NULL, NULL,
  484. NULL, NULL,
  485. NULL, NULL,
  486. NULL, NULL,
  487. NULL, NULL,
  488. NULL, NULL,
  489. NULL, NULL,
  490. NULL, NULL,
  491. #endif
  492. #endif
  493. #if (BUILD_COMPLEX)
  494. #ifndef NO_LAPACK
  495. cneg_tcopyTS,
  496. claswp_ncopyTS,
  497. #else
  498. NULL, NULL,
  499. #endif
  500. #endif
  501. #if BUILD_COMPLEX16 == 1
  502. 0, 0, 0,
  503. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  504. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  505. ZGEMM_DEFAULT_UNROLL_MN,
  506. #else
  507. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  508. #endif
  509. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  510. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  511. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  512. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  513. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  514. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  515. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  516. zsymv_LTS, zsymv_UTS,
  517. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  518. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  519. zgemm_betaTS,
  520. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  521. zgemm_incopyTS, zgemm_itcopyTS,
  522. #else
  523. zgemm_oncopyTS, zgemm_otcopyTS,
  524. #endif
  525. zgemm_oncopyTS, zgemm_otcopyTS,
  526. #ifdef SMALL_MATRIX_OPT
  527. zgemm_small_matrix_permitTS,
  528. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  529. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  530. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  531. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  532. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  533. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  534. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  535. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  536. #endif
  537. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  538. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  539. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  540. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  541. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  542. #else
  543. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  544. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  545. #endif
  546. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  547. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  548. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  549. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  550. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  551. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  552. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  553. #else
  554. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  555. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  556. #endif
  557. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  558. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  559. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  560. zsymm_iutcopyTS, zsymm_iltcopyTS,
  561. #else
  562. zsymm_outcopyTS, zsymm_oltcopyTS,
  563. #endif
  564. zsymm_outcopyTS, zsymm_oltcopyTS,
  565. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  566. zhemm_iutcopyTS, zhemm_iltcopyTS,
  567. #else
  568. zhemm_outcopyTS, zhemm_oltcopyTS,
  569. #endif
  570. zhemm_outcopyTS, zhemm_oltcopyTS,
  571. 0, 0, 0,
  572. #if (USE_GEMM3M)
  573. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  574. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  575. #else
  576. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  577. #endif
  578. zgemm3m_kernelTS,
  579. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  580. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  581. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  582. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  583. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  584. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  585. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  586. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  587. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  588. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  589. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  590. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  591. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  592. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  593. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  594. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  595. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  596. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  597. #else
  598. 0, 0, 0,
  599. NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. NULL, NULL,
  610. NULL, NULL,
  611. NULL, NULL,
  612. NULL, NULL,
  613. NULL, NULL,
  614. NULL, NULL,
  615. NULL, NULL,
  616. NULL, NULL,
  617. NULL, NULL,
  618. #endif
  619. #ifndef NO_LAPACK
  620. zneg_tcopyTS, zlaswp_ncopyTS,
  621. #else
  622. NULL, NULL,
  623. #endif
  624. #endif
  625. #ifdef EXPRECISION
  626. 0, 0, 0,
  627. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  628. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  629. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  630. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  631. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  632. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  633. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  634. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  635. xsymv_LTS, xsymv_UTS,
  636. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  637. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  638. xgemm_betaTS,
  639. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  640. xgemm_incopyTS, xgemm_itcopyTS,
  641. #else
  642. xgemm_oncopyTS, xgemm_otcopyTS,
  643. #endif
  644. xgemm_oncopyTS, xgemm_otcopyTS,
  645. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  646. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  647. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  648. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  649. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  650. #else
  651. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  652. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  653. #endif
  654. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  655. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  656. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  657. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  658. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  659. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  660. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  661. #else
  662. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  663. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  664. #endif
  665. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  666. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  667. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  668. xsymm_iutcopyTS, xsymm_iltcopyTS,
  669. #else
  670. xsymm_outcopyTS, xsymm_oltcopyTS,
  671. #endif
  672. xsymm_outcopyTS, xsymm_oltcopyTS,
  673. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  674. xhemm_iutcopyTS, xhemm_iltcopyTS,
  675. #else
  676. xhemm_outcopyTS, xhemm_oltcopyTS,
  677. #endif
  678. xhemm_outcopyTS, xhemm_oltcopyTS,
  679. 0, 0, 0,
  680. #if (USE_GEMM3M)
  681. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  682. xgemm3m_kernelTS,
  683. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  684. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  685. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  686. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  687. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  688. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  689. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  690. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  691. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  692. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  693. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  694. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  695. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  696. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  697. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  698. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  699. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  700. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  701. #else
  702. 0, 0, 0,
  703. NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. NULL, NULL,
  714. NULL, NULL,
  715. NULL, NULL,
  716. NULL, NULL,
  717. NULL, NULL,
  718. NULL, NULL,
  719. NULL, NULL,
  720. NULL, NULL,
  721. NULL, NULL,
  722. #endif
  723. #ifndef NO_LAPACK
  724. xneg_tcopyTS, xlaswp_ncopyTS,
  725. #else
  726. NULL, NULL,
  727. #endif
  728. #endif
  729. init_parameter,
  730. SNUMOPT, DNUMOPT, QNUMOPT,
  731. #if BUILD_SINGLE == 1
  732. saxpby_kTS,
  733. #endif
  734. #if BUILD_DOUBLE == 1
  735. daxpby_kTS,
  736. #endif
  737. #if BUILD_COMPLEX == 1
  738. caxpby_kTS,
  739. #endif
  740. #if BUILD_COMPLEX16== 1
  741. zaxpby_kTS,
  742. #endif
  743. #if BUILD_SINGLE == 1
  744. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  745. #endif
  746. #if BUILD_DOUBLE== 1
  747. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  748. #endif
  749. #if BUILD_COMPLEX == 1
  750. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  751. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  752. #endif
  753. #if BUILD_COMPLEX16 == 1
  754. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  755. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  756. #endif
  757. #if BUILD_SINGLE == 1
  758. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  759. #endif
  760. #if BUILD_DOUBLE== 1
  761. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  762. #endif
  763. #if BUILD_COMPLEX== 1
  764. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  765. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  766. #endif
  767. #if BUILD_COMPLEX16==1
  768. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  769. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  770. #endif
  771. #if BUILD_SINGLE == 1
  772. sgeadd_kTS,
  773. #endif
  774. #if BUILD_DOUBLE==1
  775. dgeadd_kTS,
  776. #endif
  777. #if BUILD_COMPLEX==1
  778. cgeadd_kTS,
  779. #endif
  780. #if BUILD_COMPLEX16==1
  781. zgeadd_kTS,
  782. #endif
  783. };
  784. #if (ARCH_ARM64)
  785. static void init_parameter(void) {
  786. #if (BUILD_BFLOAT16)
  787. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  788. #endif
  789. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  790. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  791. #endif
  792. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  793. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  794. #endif
  795. #if BUILD_COMPLEX==1
  796. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  797. #endif
  798. #if BUILD_COMPLEX16==1
  799. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  800. #endif
  801. #if (BUILD_BFLOAT16)
  802. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  803. #endif
  804. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  805. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  806. #endif
  807. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  808. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  809. #endif
  810. #if BUILD_COMPLEX== 1
  811. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  812. #endif
  813. #if BUILD_COMPLEX16==1
  814. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  815. #endif
  816. #if (BUILD_BFLOAT16)
  817. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  818. #endif
  819. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  820. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  821. #endif
  822. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  823. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  824. #endif
  825. #if BUILD_COMPLEX==1
  826. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  827. #endif
  828. #if BUILD_COMPLEX16==1
  829. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  830. #endif
  831. #ifdef EXPRECISION
  832. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  833. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  834. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  835. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  836. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  837. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  838. #endif
  839. #if (USE_GEMM3M)
  840. #ifdef CGEMM3M_DEFAULT_P
  841. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  842. #else
  843. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  844. #endif
  845. #ifdef ZGEMM3M_DEFAULT_P
  846. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  847. #else
  848. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  849. #endif
  850. #ifdef CGEMM3M_DEFAULT_Q
  851. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  852. #else
  853. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  854. #endif
  855. #ifdef ZGEMM3M_DEFAULT_Q
  856. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  857. #else
  858. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  859. #endif
  860. #ifdef CGEMM3M_DEFAULT_R
  861. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  862. #else
  863. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  864. #endif
  865. #ifdef ZGEMM3M_DEFAULT_R
  866. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  867. #else
  868. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  869. #endif
  870. #ifdef EXPRECISION
  871. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  872. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  873. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  874. #endif
  875. #endif
  876. }
  877. #else // (ARCH_ARM64)
  878. #if defined(ARCH_MIPS64)
  879. static void init_parameter(void) {
  880. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  881. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  882. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  883. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  884. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  885. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  886. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  887. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  888. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  889. TABLE_NAME.dgemm_r = 640;
  890. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  891. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  892. #ifdef EXPRECISION
  893. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  894. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  895. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  896. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  897. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  898. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  899. #endif
  900. #if defined(USE_GEMM3M)
  901. #ifdef CGEMM3M_DEFAULT_P
  902. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  903. #else
  904. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  905. #endif
  906. #ifdef ZGEMM3M_DEFAULT_P
  907. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  908. #else
  909. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  910. #endif
  911. #ifdef CGEMM3M_DEFAULT_Q
  912. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  913. #else
  914. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  915. #endif
  916. #ifdef ZGEMM3M_DEFAULT_Q
  917. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  918. #else
  919. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  920. #endif
  921. #ifdef CGEMM3M_DEFAULT_R
  922. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  923. #else
  924. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  925. #endif
  926. #ifdef ZGEMM3M_DEFAULT_R
  927. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  928. #else
  929. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  930. #endif
  931. #ifdef EXPRECISION
  932. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  933. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  934. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  935. #endif
  936. #endif
  937. }
  938. #else // (ARCH_MIPS64)
  939. #if (ARCH_LOONGARCH64)
  940. static int get_L3_size() {
  941. int ret = 0, id = 0x14;
  942. __asm__ volatile (
  943. "cpucfg %[ret], %[id]"
  944. : [ret]"=r"(ret)
  945. : [id]"r"(id)
  946. : "memory"
  947. );
  948. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  949. }
  950. static void init_parameter(void) {
  951. #ifdef BUILD_BFLOAT16
  952. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  953. #endif
  954. #ifdef BUILD_BFLOAT16
  955. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  956. #endif
  957. #if defined(LA464)
  958. int L3_size = get_L3_size();
  959. #ifdef SMP
  960. if(blas_num_threads == 1){
  961. #endif
  962. //single thread
  963. if (L3_size == 32){ // 3C5000 and 3D5000
  964. TABLE_NAME.sgemm_p = 256;
  965. TABLE_NAME.sgemm_q = 384;
  966. TABLE_NAME.sgemm_r = 8192;
  967. TABLE_NAME.dgemm_p = 112;
  968. TABLE_NAME.dgemm_q = 289;
  969. TABLE_NAME.dgemm_r = 4096;
  970. TABLE_NAME.cgemm_p = 128;
  971. TABLE_NAME.cgemm_q = 256;
  972. TABLE_NAME.cgemm_r = 4096;
  973. TABLE_NAME.zgemm_p = 128;
  974. TABLE_NAME.zgemm_q = 128;
  975. TABLE_NAME.zgemm_r = 2048;
  976. } else { // 3A5000 and 3C5000L
  977. TABLE_NAME.sgemm_p = 256;
  978. TABLE_NAME.sgemm_q = 384;
  979. TABLE_NAME.sgemm_r = 4096;
  980. TABLE_NAME.dgemm_p = 112;
  981. TABLE_NAME.dgemm_q = 300;
  982. TABLE_NAME.dgemm_r = 3024;
  983. TABLE_NAME.cgemm_p = 128;
  984. TABLE_NAME.cgemm_q = 256;
  985. TABLE_NAME.cgemm_r = 2048;
  986. TABLE_NAME.zgemm_p = 128;
  987. TABLE_NAME.zgemm_q = 128;
  988. TABLE_NAME.zgemm_r = 1024;
  989. }
  990. #ifdef SMP
  991. }else{
  992. //multi thread
  993. if (L3_size == 32){ // 3C5000 and 3D5000
  994. TABLE_NAME.sgemm_p = 256;
  995. TABLE_NAME.sgemm_q = 384;
  996. TABLE_NAME.sgemm_r = 1024;
  997. TABLE_NAME.dgemm_p = 112;
  998. TABLE_NAME.dgemm_q = 289;
  999. TABLE_NAME.dgemm_r = 342;
  1000. TABLE_NAME.cgemm_p = 128;
  1001. TABLE_NAME.cgemm_q = 256;
  1002. TABLE_NAME.cgemm_r = 512;
  1003. TABLE_NAME.zgemm_p = 128;
  1004. TABLE_NAME.zgemm_q = 128;
  1005. TABLE_NAME.zgemm_r = 512;
  1006. } else { // 3A5000 and 3C5000L
  1007. TABLE_NAME.sgemm_p = 256;
  1008. TABLE_NAME.sgemm_q = 384;
  1009. TABLE_NAME.sgemm_r = 2048;
  1010. TABLE_NAME.dgemm_p = 112;
  1011. TABLE_NAME.dgemm_q = 300;
  1012. TABLE_NAME.dgemm_r = 738;
  1013. TABLE_NAME.cgemm_p = 128;
  1014. TABLE_NAME.cgemm_q = 256;
  1015. TABLE_NAME.cgemm_r = 1024;
  1016. TABLE_NAME.zgemm_p = 128;
  1017. TABLE_NAME.zgemm_q = 128;
  1018. TABLE_NAME.zgemm_r = 1024;
  1019. }
  1020. }
  1021. #endif
  1022. #else
  1023. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1024. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1025. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1026. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1027. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1028. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1029. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1030. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1031. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1032. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1033. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1034. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1035. #endif
  1036. #ifdef BUILD_BFLOAT16
  1037. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1038. #endif
  1039. }
  1040. #else // (ARCH_LOONGARCH64)
  1041. #if (ARCH_POWER)
  1042. static void init_parameter(void) {
  1043. #ifdef BUILD_BFLOAT16
  1044. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1045. #endif
  1046. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1047. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1048. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1049. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1050. #ifdef BUILD_BFLOAT16
  1051. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1052. #endif
  1053. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1054. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1055. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1056. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1057. #ifdef BUILD_BFLOAT16
  1058. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1059. #endif
  1060. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1061. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1062. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1063. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1064. }
  1065. #else //POWER
  1066. #if (ARCH_ZARCH)
  1067. static void init_parameter(void) {
  1068. #ifdef BUILD_BFLOAT16
  1069. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1070. #endif
  1071. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1072. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1073. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1074. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1075. #ifdef BUILD_BFLOAT16
  1076. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1077. #endif
  1078. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1079. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1080. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1081. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1082. #ifdef BUILD_BFLOAT16
  1083. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1084. #endif
  1085. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1086. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1087. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1088. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1089. }
  1090. #else //ZARCH
  1091. #if (ARCH_RISCV64)
  1092. static void init_parameter(void) {
  1093. #ifdef BUILD_BFLOAT16
  1094. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1095. #endif
  1096. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1097. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1098. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1099. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1100. #ifdef BUILD_BFLOAT16
  1101. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1102. #endif
  1103. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1104. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1105. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1106. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1107. #ifdef BUILD_BFLOAT16
  1108. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1109. #endif
  1110. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1111. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1112. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1113. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1114. }
  1115. #else //RISCV64
  1116. #ifdef ARCH_X86
  1117. static int get_l2_size_old(void){
  1118. int i, eax, ebx, ecx, edx, cpuid_level;
  1119. int info[15];
  1120. cpuid(2, &eax, &ebx, &ecx, &edx);
  1121. info[ 0] = BITMASK(eax, 8, 0xff);
  1122. info[ 1] = BITMASK(eax, 16, 0xff);
  1123. info[ 2] = BITMASK(eax, 24, 0xff);
  1124. info[ 3] = BITMASK(ebx, 0, 0xff);
  1125. info[ 4] = BITMASK(ebx, 8, 0xff);
  1126. info[ 5] = BITMASK(ebx, 16, 0xff);
  1127. info[ 6] = BITMASK(ebx, 24, 0xff);
  1128. info[ 7] = BITMASK(ecx, 0, 0xff);
  1129. info[ 8] = BITMASK(ecx, 8, 0xff);
  1130. info[ 9] = BITMASK(ecx, 16, 0xff);
  1131. info[10] = BITMASK(ecx, 24, 0xff);
  1132. info[11] = BITMASK(edx, 0, 0xff);
  1133. info[12] = BITMASK(edx, 8, 0xff);
  1134. info[13] = BITMASK(edx, 16, 0xff);
  1135. info[14] = BITMASK(edx, 24, 0xff);
  1136. for (i = 0; i < 15; i++){
  1137. switch (info[i]){
  1138. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1139. case 0x1a :
  1140. return 96;
  1141. case 0x39 :
  1142. case 0x3b :
  1143. case 0x41 :
  1144. case 0x79 :
  1145. case 0x81 :
  1146. return 128;
  1147. case 0x3a :
  1148. return 192;
  1149. case 0x21 :
  1150. case 0x3c :
  1151. case 0x42 :
  1152. case 0x7a :
  1153. case 0x7e :
  1154. case 0x82 :
  1155. return 256;
  1156. case 0x3d :
  1157. return 384;
  1158. case 0x3e :
  1159. case 0x43 :
  1160. case 0x7b :
  1161. case 0x7f :
  1162. case 0x83 :
  1163. case 0x86 :
  1164. return 512;
  1165. case 0x44 :
  1166. case 0x78 :
  1167. case 0x7c :
  1168. case 0x84 :
  1169. case 0x87 :
  1170. return 1024;
  1171. case 0x45 :
  1172. case 0x7d :
  1173. case 0x85 :
  1174. return 2048;
  1175. case 0x48 :
  1176. return 3184;
  1177. case 0x49 :
  1178. return 4096;
  1179. case 0x4e :
  1180. return 6144;
  1181. }
  1182. }
  1183. // return 0;
  1184. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1185. return 256;
  1186. }
  1187. #endif
  1188. static __inline__ int get_l2_size(void){
  1189. int eax, ebx, ecx, edx, l2;
  1190. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1191. if (l2 != 0)
  1192. return l2;
  1193. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1194. l2 = BITMASK(ecx, 16, 0xffff);
  1195. #ifndef ARCH_X86
  1196. if (l2 <= 0) {
  1197. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1198. return 256;
  1199. }
  1200. return l2;
  1201. #else
  1202. if (l2 > 0) return l2;
  1203. return get_l2_size_old();
  1204. #endif
  1205. }
  1206. static __inline__ int get_l3_size(void){
  1207. int eax, ebx, ecx, edx;
  1208. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1209. return BITMASK(edx, 18, 0x3fff) * 512;
  1210. }
  1211. static void init_parameter(void) {
  1212. int l2 = get_l2_size();
  1213. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1214. /* where the GEMM unrolling parameters do not depend on l2 */
  1215. #ifdef BUILD_BFLOAT16
  1216. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1217. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1218. #endif
  1219. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1220. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1221. #endif
  1222. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1223. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1224. #endif
  1225. #if BUILD_COMPLEX == 1
  1226. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1227. #endif
  1228. #if BUILD_COMPLEX16==1
  1229. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1230. #endif
  1231. #if BUILD_COMPLEX == 1
  1232. #ifdef CGEMM3M_DEFAULT_Q
  1233. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1234. #else
  1235. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1236. #endif
  1237. #endif
  1238. #if BUILD_COMPLEX16 == 1
  1239. #ifdef ZGEMM3M_DEFAULT_Q
  1240. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1241. #else
  1242. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1243. #endif
  1244. #endif
  1245. #ifdef EXPRECISION
  1246. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1247. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1248. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1249. #endif
  1250. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1251. #ifdef DEBUG
  1252. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1253. #endif
  1254. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1255. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1256. #endif
  1257. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1258. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1259. #endif
  1260. #if BUILD_COMPLEX==1
  1261. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1262. #endif
  1263. #if BUILD_COMPLEX16==1
  1264. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1265. #endif
  1266. #ifdef EXPRECISION
  1267. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1268. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1269. #endif
  1270. #endif
  1271. #ifdef CORE_NORTHWOOD
  1272. #ifdef DEBUG
  1273. fprintf(stderr, "Northwood\n");
  1274. #endif
  1275. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1276. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1277. #endif
  1278. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1279. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1280. #endif
  1281. #if BUILD_COMPLEX==1
  1282. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1283. #endif
  1284. #if BUILD_COMPLEX16==1
  1285. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1286. #endif
  1287. #ifdef EXPRECISION
  1288. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1289. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1290. #endif
  1291. #endif
  1292. #ifdef ATOM
  1293. #ifdef DEBUG
  1294. fprintf(stderr, "Atom\n");
  1295. #endif
  1296. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1297. TABLE_NAME.sgemm_p = 256;
  1298. #endif
  1299. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1300. TABLE_NAME.dgemm_p = 128;
  1301. #endif
  1302. #if BUILD_COMPLEX==1
  1303. TABLE_NAME.cgemm_p = 128;
  1304. #endif
  1305. #if BUILD_COMPLEX16==1
  1306. TABLE_NAME.zgemm_p = 64;
  1307. #endif
  1308. #ifdef EXPRECISION
  1309. TABLE_NAME.qgemm_p = 64;
  1310. TABLE_NAME.xgemm_p = 32;
  1311. #endif
  1312. #endif
  1313. #ifdef CORE_PRESCOTT
  1314. #ifdef DEBUG
  1315. fprintf(stderr, "Prescott\n");
  1316. #endif
  1317. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1318. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1319. #endif
  1320. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1321. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1322. #endif
  1323. #if BUILD_COMPLEX==1
  1324. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1325. #endif
  1326. #if BUILD_COMPLEX16 == 1
  1327. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1328. #endif
  1329. #ifdef EXPRECISION
  1330. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1331. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1332. #endif
  1333. #endif
  1334. #ifdef CORE2
  1335. #ifdef DEBUG
  1336. fprintf(stderr, "Core2\n");
  1337. #endif
  1338. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1339. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1340. #endif
  1341. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1342. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1343. #endif
  1344. #if BUILD_COMPLEX==1
  1345. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1346. #endif
  1347. #if BUILD_COMPLEX16==1
  1348. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1349. #endif
  1350. #ifdef EXPRECISION
  1351. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1352. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1353. #endif
  1354. #endif
  1355. #ifdef PENRYN
  1356. #ifdef DEBUG
  1357. fprintf(stderr, "Penryn\n");
  1358. #endif
  1359. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1360. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1361. #endif
  1362. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1363. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1364. #endif
  1365. #if BUILD_COMPLEX==1
  1366. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1367. #endif
  1368. #if BUILD_COMPLEX16==1
  1369. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1370. #endif
  1371. #ifdef EXPRECISION
  1372. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1373. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1374. #endif
  1375. #endif
  1376. #ifdef DUNNINGTON
  1377. #ifdef DEBUG
  1378. fprintf(stderr, "Dunnington\n");
  1379. #endif
  1380. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1381. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1382. #endif
  1383. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1384. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1385. #endif
  1386. #if BUILD_COMPLEX==1
  1387. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1388. #endif
  1389. #if BUILD_COMPLEX16==1
  1390. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1391. #endif
  1392. #ifdef EXPRECISION
  1393. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1394. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1395. #endif
  1396. #endif
  1397. #ifdef NEHALEM
  1398. #ifdef DEBUG
  1399. fprintf(stderr, "Nehalem\n");
  1400. #endif
  1401. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1402. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1403. #endif
  1404. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1405. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1406. #endif
  1407. #if BUILD_COMPLEX
  1408. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1409. #endif
  1410. #if BUILD_COMPLEX16
  1411. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1412. #endif
  1413. #ifdef EXPRECISION
  1414. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1415. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1416. #endif
  1417. #endif
  1418. #ifdef SANDYBRIDGE
  1419. #ifdef DEBUG
  1420. fprintf(stderr, "Sandybridge\n");
  1421. #endif
  1422. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1423. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1424. #endif
  1425. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1426. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1427. #endif
  1428. #if BUILD_COMPLEX
  1429. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1430. #endif
  1431. #if BUILD_COMPLEX16
  1432. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1433. #endif
  1434. #ifdef EXPRECISION
  1435. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1436. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1437. #endif
  1438. #endif
  1439. #ifdef HASWELL
  1440. #ifdef DEBUG
  1441. fprintf(stderr, "Haswell\n");
  1442. #endif
  1443. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1444. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1445. #endif
  1446. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1447. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1448. #endif
  1449. #if BUILD_COMPLEX
  1450. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1451. #endif
  1452. #if BUILD_COMPLEX16
  1453. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1454. #endif
  1455. #ifdef EXPRECISION
  1456. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1457. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1458. #endif
  1459. #endif
  1460. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1461. #ifdef DEBUG
  1462. fprintf(stderr, "SkylakeX\n");
  1463. #endif
  1464. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1465. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1466. #endif
  1467. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1468. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1469. #endif
  1470. #if BUILD_COMPLEX
  1471. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1472. #endif
  1473. #if BUILD_COMPLEX16
  1474. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1475. #endif
  1476. #ifdef EXPRECISION
  1477. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1478. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1479. #endif
  1480. #endif
  1481. #ifdef OPTERON
  1482. #ifdef DEBUG
  1483. fprintf(stderr, "Opteron\n");
  1484. #endif
  1485. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1486. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1487. #endif
  1488. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1489. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1490. #endif
  1491. #if BUILD_COMPLEX
  1492. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1493. #endif
  1494. #if BUILD_COMPLEX16
  1495. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1496. #endif
  1497. #ifdef EXPRECISION
  1498. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1499. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1500. #endif
  1501. #endif
  1502. #ifdef BARCELONA
  1503. #ifdef DEBUG
  1504. fprintf(stderr, "Barcelona\n");
  1505. #endif
  1506. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1507. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1508. #endif
  1509. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1510. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1511. #endif
  1512. #if BUILD_COMPLEX
  1513. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1514. #endif
  1515. #if BUILD_COMPLEX16
  1516. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1517. #endif
  1518. #ifdef EXPRECISION
  1519. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1520. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1521. #endif
  1522. #endif
  1523. #ifdef BOBCAT
  1524. #ifdef DEBUG
  1525. fprintf(stderr, "Bobcate\n");
  1526. #endif
  1527. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1528. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1529. #endif
  1530. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1531. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1532. #endif
  1533. #if BUILD_COMPLEX
  1534. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1535. #endif
  1536. #if BUILD_COMPLEX16
  1537. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1538. #endif
  1539. #ifdef EXPRECISION
  1540. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1541. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1542. #endif
  1543. #endif
  1544. #ifdef BULLDOZER
  1545. #ifdef DEBUG
  1546. fprintf(stderr, "Bulldozer\n");
  1547. #endif
  1548. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1549. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1550. #endif
  1551. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1552. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1553. #endif
  1554. #if BUILD_COMPLEX
  1555. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1556. #endif
  1557. #if BUILD_COMPLEX16
  1558. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1559. #endif
  1560. #ifdef EXPRECISION
  1561. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1562. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1563. #endif
  1564. #endif
  1565. #ifdef EXCAVATOR
  1566. #ifdef DEBUG
  1567. fprintf(stderr, "Excavator\n");
  1568. #endif
  1569. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1570. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1571. #endif
  1572. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1573. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1574. #endif
  1575. #if BUILD_COMPLEX
  1576. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1577. #endif
  1578. #if BUILD_COMPLEX16
  1579. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1580. #endif
  1581. #ifdef EXPRECISION
  1582. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1583. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1584. #endif
  1585. #endif
  1586. #ifdef PILEDRIVER
  1587. #ifdef DEBUG
  1588. fprintf(stderr, "Piledriver\n");
  1589. #endif
  1590. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1591. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1592. #endif
  1593. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1594. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1595. #endif
  1596. #if BUILD_COMPLEX
  1597. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1598. #endif
  1599. #if BUILD_COMPLEX16
  1600. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1601. #endif
  1602. #ifdef EXPRECISION
  1603. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1604. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1605. #endif
  1606. #endif
  1607. #ifdef STEAMROLLER
  1608. #ifdef DEBUG
  1609. fprintf(stderr, "Steamroller\n");
  1610. #endif
  1611. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1612. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1613. #endif
  1614. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1615. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1616. #endif
  1617. #if BUILD_COMPLEX
  1618. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1619. #endif
  1620. #if BUILD_COMPLEX16
  1621. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1622. #endif
  1623. #ifdef EXPRECISION
  1624. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1625. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1626. #endif
  1627. #endif
  1628. #ifdef ZEN
  1629. #ifdef DEBUG
  1630. fprintf(stderr, "Zen\n");
  1631. #endif
  1632. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1633. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1634. #endif
  1635. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1636. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1637. #endif
  1638. #if BUILD_COMPLEX
  1639. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1640. #endif
  1641. #if BUILD_COMPLEX16
  1642. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1643. #endif
  1644. #ifdef EXPRECISION
  1645. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1646. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1647. #endif
  1648. #endif
  1649. #ifdef NANO
  1650. #ifdef DEBUG
  1651. fprintf(stderr, "NANO\n");
  1652. #endif
  1653. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1654. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1655. #endif
  1656. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1657. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1658. #endif
  1659. #if (BUILD_COMPLEX==1)
  1660. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1661. #endif
  1662. #if (BUILD_COMPLEX16==1)
  1663. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1664. #endif
  1665. #ifdef EXPRECISION
  1666. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1667. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1668. #endif
  1669. #endif
  1670. #ifdef SAPPHIRERAPIDS
  1671. #if (BUILD_BFLOAT16 == 1)
  1672. TABLE_NAME.need_amxtile_permission = 1;
  1673. #endif
  1674. #endif
  1675. #if BUILD_COMPLEX==1
  1676. #ifdef CGEMM3M_DEFAULT_P
  1677. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1678. #else
  1679. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1680. #endif
  1681. #endif
  1682. #if BUILD_COMPLEX16==1
  1683. #ifdef ZGEMM3M_DEFAULT_P
  1684. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1685. #else
  1686. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1687. #endif
  1688. #endif
  1689. #ifdef EXPRECISION
  1690. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1691. #endif
  1692. #if BUILD_SINGLE == 1
  1693. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1694. #endif
  1695. #if BUILD_DOUBLE== 1
  1696. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1697. #endif
  1698. #if BUILD_COMPLEX==1
  1699. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1700. #endif
  1701. #if BUILD_COMPLEX16==1
  1702. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1703. #endif
  1704. #if BUILD_COMPLEX==1
  1705. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1706. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1707. #else
  1708. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1709. #endif
  1710. #endif
  1711. #if BUILD_COMPLEX16==1
  1712. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1713. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1714. #else
  1715. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1716. #endif
  1717. #endif
  1718. #ifdef QUAD_PRECISION
  1719. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1720. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1721. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1722. #endif
  1723. #ifdef DEBUG
  1724. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1725. #endif
  1726. #if BUILD_BFLOAT16==1
  1727. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1728. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1729. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1730. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1731. #endif
  1732. #if BUILD_SINGLE==1
  1733. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1734. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1735. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1736. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1737. #endif
  1738. #if BUILD_DOUBLE==1
  1739. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1740. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1741. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1742. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1743. #endif
  1744. #ifdef EXPRECISION
  1745. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1746. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1747. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1748. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1749. #endif
  1750. #if BUILD_COMPLEX ==1
  1751. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1752. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1753. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1754. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1755. #endif
  1756. #if BUILD_COMPLEX16 ==1
  1757. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1758. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1759. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1760. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1761. #endif
  1762. #if BUILD_COMPLEX == 1
  1763. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1764. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1765. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1766. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1767. #endif
  1768. #if BUILD_COMPLEX16 == 1
  1769. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1770. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1771. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1772. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1773. #endif
  1774. #ifdef EXPRECISION
  1775. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1776. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1777. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1778. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1779. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1780. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1781. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1782. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1783. #endif
  1784. }
  1785. #endif //RISCV64
  1786. #endif //POWER
  1787. #endif //ZARCH
  1788. #endif //(ARCH_LOONGARCH64)
  1789. #endif //(ARCH_MIPS64)
  1790. #endif //(ARCH_ARM64)