You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 58 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  114. 0, 0, 0,
  115. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SGEMM_DEFAULT_UNROLL_MN
  117. SGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. #endif
  122. #ifdef HAVE_EXCLUSIVE_CACHE
  123. 1,
  124. #else
  125. 0,
  126. #endif
  127. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  128. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  129. #endif
  130. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  131. isamax_kTS,
  132. #endif
  133. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  134. isamin_kTS, ismax_kTS, ismin_kTS,
  135. snrm2_kTS, sasum_kTS,
  136. #endif
  137. #if BUILD_SINGLE == 1
  138. ssum_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. scopy_kTS, sdot_kTS,
  142. // dsdot_kTS,
  143. srot_kTS, srotm_kTS, saxpy_kTS,
  144. #endif
  145. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  146. sscal_kTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. sswap_kTS,
  150. sgemv_nTS, sgemv_tTS,
  151. #endif
  152. #if BUILD_SINGLE == 1
  153. sger_kTS,
  154. #endif
  155. #if BUILD_SINGLE == 1
  156. ssymv_LTS, ssymv_UTS,
  157. #endif
  158. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  159. #ifdef ARCH_X86_64
  160. sgemm_directTS,
  161. sgemm_direct_performantTS,
  162. #endif
  163. sgemm_kernelTS, sgemm_betaTS,
  164. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  165. sgemm_incopyTS, sgemm_itcopyTS,
  166. #else
  167. sgemm_oncopyTS, sgemm_otcopyTS,
  168. #endif
  169. sgemm_oncopyTS, sgemm_otcopyTS,
  170. #endif
  171. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  172. #ifdef SMALL_MATRIX_OPT
  173. sgemm_small_matrix_permitTS,
  174. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  175. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  176. #endif
  177. #endif
  178. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  179. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  180. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  181. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  182. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  183. #else
  184. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  185. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  186. #endif
  187. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  188. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  189. #endif
  190. #if (BUILD_SINGLE==1)
  191. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  192. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  193. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  194. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  195. #else
  196. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  197. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  198. #endif
  199. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  200. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  201. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  202. ssymm_iutcopyTS, ssymm_iltcopyTS,
  203. #else
  204. ssymm_outcopyTS, ssymm_oltcopyTS,
  205. #endif
  206. ssymm_outcopyTS, ssymm_oltcopyTS,
  207. #ifndef NO_LAPACK
  208. sneg_tcopyTS, slaswp_ncopyTS,
  209. #else
  210. NULL,NULL,
  211. #endif
  212. #endif
  213. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  214. 0, 0, 0,
  215. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  216. #ifdef DGEMM_DEFAULT_UNROLL_MN
  217. DGEMM_DEFAULT_UNROLL_MN,
  218. #else
  219. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  220. #endif
  221. #endif
  222. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  223. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  224. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  225. dnrm2_kTS, dasum_kTS,
  226. #endif
  227. #if (BUILD_DOUBLE==1)
  228. dsum_kTS,
  229. #endif
  230. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  231. dcopy_kTS, ddot_kTS,
  232. #endif
  233. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  234. dsdot_kTS,
  235. #endif
  236. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  237. drot_kTS,
  238. drotm_kTS,
  239. daxpy_kTS,
  240. dscal_kTS,
  241. dswap_kTS,
  242. dgemv_nTS, dgemv_tTS,
  243. #endif
  244. #if (BUILD_DOUBLE==1)
  245. dger_kTS,
  246. dsymv_LTS, dsymv_UTS,
  247. #endif
  248. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  249. dgemm_kernelTS, dgemm_betaTS,
  250. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  251. dgemm_incopyTS, dgemm_itcopyTS,
  252. #else
  253. dgemm_oncopyTS, dgemm_otcopyTS,
  254. #endif
  255. dgemm_oncopyTS, dgemm_otcopyTS,
  256. #endif
  257. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  258. #ifdef SMALL_MATRIX_OPT
  259. dgemm_small_matrix_permitTS,
  260. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  261. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  262. #endif
  263. #endif
  264. #if (BUILD_DOUBLE==1)
  265. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  266. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  267. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  268. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  269. #else
  270. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  271. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  272. #endif
  273. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  274. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  275. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  276. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  277. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  278. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  279. #else
  280. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  281. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  282. #endif
  283. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  284. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  285. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  286. dsymm_iutcopyTS, dsymm_iltcopyTS,
  287. #else
  288. dsymm_outcopyTS, dsymm_oltcopyTS,
  289. #endif
  290. dsymm_outcopyTS, dsymm_oltcopyTS,
  291. #ifndef NO_LAPACK
  292. dneg_tcopyTS, dlaswp_ncopyTS,
  293. #else
  294. NULL, NULL,
  295. #endif
  296. #endif
  297. #ifdef EXPRECISION
  298. 0, 0, 0,
  299. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  300. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  301. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  302. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  303. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  304. qgemv_nTS, qgemv_tTS, qger_kTS,
  305. qsymv_LTS, qsymv_UTS,
  306. qgemm_kernelTS, qgemm_betaTS,
  307. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  308. qgemm_incopyTS, qgemm_itcopyTS,
  309. #else
  310. qgemm_oncopyTS, qgemm_otcopyTS,
  311. #endif
  312. qgemm_oncopyTS, qgemm_otcopyTS,
  313. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  314. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  315. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  316. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  317. #else
  318. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  319. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  320. #endif
  321. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  322. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  323. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  324. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  325. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  326. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  327. #else
  328. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  329. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  330. #endif
  331. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  332. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  333. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  334. qsymm_iutcopyTS, qsymm_iltcopyTS,
  335. #else
  336. qsymm_outcopyTS, qsymm_oltcopyTS,
  337. #endif
  338. qsymm_outcopyTS, qsymm_oltcopyTS,
  339. #ifndef NO_LAPACK
  340. qneg_tcopyTS, qlaswp_ncopyTS,
  341. #else
  342. NULL, NULL,
  343. #endif
  344. #endif
  345. #if (BUILD_COMPLEX)
  346. 0, 0, 0,
  347. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  348. #ifdef CGEMM_DEFAULT_UNROLL_MN
  349. CGEMM_DEFAULT_UNROLL_MN,
  350. #else
  351. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  352. #endif
  353. #if (BUILD_COMPLEX)
  354. camax_kTS, camin_kTS,
  355. #endif
  356. #if (BUILD_COMPLEX)
  357. icamax_kTS,
  358. #endif
  359. #if (BUILD_COMPLEX)
  360. icamin_kTS,
  361. cnrm2_kTS, casum_kTS, csum_kTS,
  362. #endif
  363. #if (BUILD_COMPLEX)
  364. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  365. #endif
  366. #if (BUILD_COMPLEX)
  367. csrot_kTS,
  368. #endif
  369. #if (BUILD_COMPLEX)
  370. caxpy_kTS,
  371. caxpyc_kTS,
  372. cscal_kTS,
  373. cswap_kTS,
  374. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  375. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  376. #endif
  377. #if (BUILD_COMPLEX)
  378. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  379. csymv_LTS, csymv_UTS,
  380. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  381. #endif
  382. #if (BUILD_COMPLEX)
  383. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  384. cgemm_betaTS,
  385. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  386. cgemm_incopyTS, cgemm_itcopyTS,
  387. #else
  388. cgemm_oncopyTS, cgemm_otcopyTS,
  389. #endif
  390. cgemm_oncopyTS, cgemm_otcopyTS,
  391. #ifdef SMALL_MATRIX_OPT
  392. cgemm_small_matrix_permitTS,
  393. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  394. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  395. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  396. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  397. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  398. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  399. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  400. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  401. #endif
  402. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  403. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  404. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  405. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  406. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  407. #else
  408. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  409. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  410. #endif
  411. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  412. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  413. #endif
  414. #endif
  415. #if (BUILD_COMPLEX)
  416. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  417. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  418. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  419. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  420. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  421. #else
  422. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  423. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  424. #endif
  425. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  426. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  427. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  428. csymm_iutcopyTS, csymm_iltcopyTS,
  429. #else
  430. csymm_outcopyTS, csymm_oltcopyTS,
  431. #endif
  432. csymm_outcopyTS, csymm_oltcopyTS,
  433. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  434. chemm_iutcopyTS, chemm_iltcopyTS,
  435. #else
  436. chemm_outcopyTS, chemm_oltcopyTS,
  437. #endif
  438. chemm_outcopyTS, chemm_oltcopyTS,
  439. 0, 0, 0,
  440. #if (USE_GEMM3M)
  441. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  442. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  443. #else
  444. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  445. #endif
  446. cgemm3m_kernelTS,
  447. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  448. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  449. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  450. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  451. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  452. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  453. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  454. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  455. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  456. csymm3m_oucopybTS, csymm3m_olcopybTS,
  457. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  458. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  459. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  460. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  461. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  462. chemm3m_oucopybTS, chemm3m_olcopybTS,
  463. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  464. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  465. #else
  466. 0, 0, 0,
  467. NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. NULL, NULL,
  483. NULL, NULL,
  484. NULL, NULL,
  485. NULL, NULL,
  486. #endif
  487. #endif
  488. #if (BUILD_COMPLEX)
  489. #ifndef NO_LAPACK
  490. cneg_tcopyTS,
  491. claswp_ncopyTS,
  492. #else
  493. NULL, NULL,
  494. #endif
  495. #endif
  496. #if BUILD_COMPLEX16 == 1
  497. 0, 0, 0,
  498. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  499. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  500. ZGEMM_DEFAULT_UNROLL_MN,
  501. #else
  502. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  503. #endif
  504. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  505. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  506. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  507. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  508. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  509. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  510. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  511. zsymv_LTS, zsymv_UTS,
  512. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  513. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  514. zgemm_betaTS,
  515. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  516. zgemm_incopyTS, zgemm_itcopyTS,
  517. #else
  518. zgemm_oncopyTS, zgemm_otcopyTS,
  519. #endif
  520. zgemm_oncopyTS, zgemm_otcopyTS,
  521. #ifdef SMALL_MATRIX_OPT
  522. zgemm_small_matrix_permitTS,
  523. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  524. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  525. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  526. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  527. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  528. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  529. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  530. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  531. #endif
  532. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  533. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  534. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  535. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  536. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  537. #else
  538. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  539. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  540. #endif
  541. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  542. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  543. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  544. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  545. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  546. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  547. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  548. #else
  549. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  550. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  551. #endif
  552. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  553. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  554. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  555. zsymm_iutcopyTS, zsymm_iltcopyTS,
  556. #else
  557. zsymm_outcopyTS, zsymm_oltcopyTS,
  558. #endif
  559. zsymm_outcopyTS, zsymm_oltcopyTS,
  560. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  561. zhemm_iutcopyTS, zhemm_iltcopyTS,
  562. #else
  563. zhemm_outcopyTS, zhemm_oltcopyTS,
  564. #endif
  565. zhemm_outcopyTS, zhemm_oltcopyTS,
  566. 0, 0, 0,
  567. #if (USE_GEMM3M)
  568. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  569. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  570. #else
  571. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  572. #endif
  573. zgemm3m_kernelTS,
  574. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  575. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  576. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  577. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  578. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  579. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  580. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  581. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  582. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  583. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  584. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  585. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  586. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  587. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  588. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  589. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  590. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  591. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  592. #else
  593. 0, 0, 0,
  594. NULL,
  595. NULL, NULL,
  596. NULL, NULL,
  597. NULL, NULL,
  598. NULL, NULL,
  599. NULL, NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. NULL, NULL,
  610. NULL, NULL,
  611. NULL, NULL,
  612. NULL, NULL,
  613. #endif
  614. #ifndef NO_LAPACK
  615. zneg_tcopyTS, zlaswp_ncopyTS,
  616. #else
  617. NULL, NULL,
  618. #endif
  619. #endif
  620. #ifdef EXPRECISION
  621. 0, 0, 0,
  622. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  623. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  624. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  625. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  626. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  627. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  628. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  629. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  630. xsymv_LTS, xsymv_UTS,
  631. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  632. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  633. xgemm_betaTS,
  634. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  635. xgemm_incopyTS, xgemm_itcopyTS,
  636. #else
  637. xgemm_oncopyTS, xgemm_otcopyTS,
  638. #endif
  639. xgemm_oncopyTS, xgemm_otcopyTS,
  640. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  641. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  642. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  643. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  644. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  645. #else
  646. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  647. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  648. #endif
  649. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  650. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  651. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  652. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  653. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  654. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  655. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  656. #else
  657. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  658. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  659. #endif
  660. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  661. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  662. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  663. xsymm_iutcopyTS, xsymm_iltcopyTS,
  664. #else
  665. xsymm_outcopyTS, xsymm_oltcopyTS,
  666. #endif
  667. xsymm_outcopyTS, xsymm_oltcopyTS,
  668. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  669. xhemm_iutcopyTS, xhemm_iltcopyTS,
  670. #else
  671. xhemm_outcopyTS, xhemm_oltcopyTS,
  672. #endif
  673. xhemm_outcopyTS, xhemm_oltcopyTS,
  674. 0, 0, 0,
  675. #if (USE_GEMM3M)
  676. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  677. xgemm3m_kernelTS,
  678. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  679. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  680. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  681. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  682. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  683. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  684. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  685. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  686. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  687. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  688. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  689. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  690. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  691. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  692. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  693. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  694. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  695. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  696. #else
  697. 0, 0, 0,
  698. NULL,
  699. NULL, NULL,
  700. NULL, NULL,
  701. NULL, NULL,
  702. NULL, NULL,
  703. NULL, NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. NULL, NULL,
  714. NULL, NULL,
  715. NULL, NULL,
  716. NULL, NULL,
  717. #endif
  718. #ifndef NO_LAPACK
  719. xneg_tcopyTS, xlaswp_ncopyTS,
  720. #else
  721. NULL, NULL,
  722. #endif
  723. #endif
  724. init_parameter,
  725. SNUMOPT, DNUMOPT, QNUMOPT,
  726. #if BUILD_SINGLE == 1
  727. saxpby_kTS,
  728. #endif
  729. #if BUILD_DOUBLE == 1
  730. daxpby_kTS,
  731. #endif
  732. #if BUILD_COMPLEX == 1
  733. caxpby_kTS,
  734. #endif
  735. #if BUILD_COMPLEX16== 1
  736. zaxpby_kTS,
  737. #endif
  738. #if BUILD_SINGLE == 1
  739. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  740. #endif
  741. #if BUILD_DOUBLE== 1
  742. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  743. #endif
  744. #if BUILD_COMPLEX == 1
  745. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  746. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  747. #endif
  748. #if BUILD_COMPLEX16 == 1
  749. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  750. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  751. #endif
  752. #if BUILD_SINGLE == 1
  753. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  754. #endif
  755. #if BUILD_DOUBLE== 1
  756. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  757. #endif
  758. #if BUILD_COMPLEX== 1
  759. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  760. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  761. #endif
  762. #if BUILD_COMPLEX16==1
  763. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  764. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  765. #endif
  766. #if BUILD_SINGLE == 1
  767. sgeadd_kTS,
  768. #endif
  769. #if BUILD_DOUBLE==1
  770. dgeadd_kTS,
  771. #endif
  772. #if BUILD_COMPLEX==1
  773. cgeadd_kTS,
  774. #endif
  775. #if BUILD_COMPLEX16==1
  776. zgeadd_kTS,
  777. #endif
  778. };
  779. #if (ARCH_ARM64)
  780. static void init_parameter(void) {
  781. #if (BUILD_BFLOAT16)
  782. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  783. #endif
  784. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  785. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  786. #endif
  787. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  788. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  789. #endif
  790. #if BUILD_COMPLEX==1
  791. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  792. #endif
  793. #if BUILD_COMPLEX16==1
  794. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  795. #endif
  796. #if (BUILD_BFLOAT16)
  797. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  798. #endif
  799. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  800. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  801. #endif
  802. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  803. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  804. #endif
  805. #if BUILD_COMPLEX== 1
  806. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  807. #endif
  808. #if BUILD_COMPLEX16==1
  809. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  810. #endif
  811. #if (BUILD_BFLOAT16)
  812. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  813. #endif
  814. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  815. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  816. #endif
  817. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  818. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  819. #endif
  820. #if BUILD_COMPLEX==1
  821. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  822. #endif
  823. #if BUILD_COMPLEX16==1
  824. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  825. #endif
  826. #ifdef EXPRECISION
  827. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  828. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  829. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  830. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  831. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  832. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  833. #endif
  834. #if (USE_GEMM3M)
  835. #ifdef CGEMM3M_DEFAULT_P
  836. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  837. #else
  838. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  839. #endif
  840. #ifdef ZGEMM3M_DEFAULT_P
  841. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  842. #else
  843. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  844. #endif
  845. #ifdef CGEMM3M_DEFAULT_Q
  846. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  847. #else
  848. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  849. #endif
  850. #ifdef ZGEMM3M_DEFAULT_Q
  851. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  852. #else
  853. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  854. #endif
  855. #ifdef CGEMM3M_DEFAULT_R
  856. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  857. #else
  858. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  859. #endif
  860. #ifdef ZGEMM3M_DEFAULT_R
  861. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  862. #else
  863. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  864. #endif
  865. #ifdef EXPRECISION
  866. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  867. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  868. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  869. #endif
  870. #endif
  871. }
  872. #else // (ARCH_ARM64)
  873. #if defined(ARCH_MIPS64)
  874. static void init_parameter(void) {
  875. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  876. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  877. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  878. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  879. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  880. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  881. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  882. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  883. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  884. TABLE_NAME.dgemm_r = 640;
  885. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  886. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  887. #ifdef EXPRECISION
  888. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  889. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  890. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  891. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  892. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  893. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  894. #endif
  895. #if defined(USE_GEMM3M)
  896. #ifdef CGEMM3M_DEFAULT_P
  897. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  898. #else
  899. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  900. #endif
  901. #ifdef ZGEMM3M_DEFAULT_P
  902. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  903. #else
  904. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  905. #endif
  906. #ifdef CGEMM3M_DEFAULT_Q
  907. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  908. #else
  909. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  910. #endif
  911. #ifdef ZGEMM3M_DEFAULT_Q
  912. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  913. #else
  914. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  915. #endif
  916. #ifdef CGEMM3M_DEFAULT_R
  917. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  918. #else
  919. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  920. #endif
  921. #ifdef ZGEMM3M_DEFAULT_R
  922. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  923. #else
  924. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  925. #endif
  926. #ifdef EXPRECISION
  927. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  928. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  929. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  930. #endif
  931. #endif
  932. }
  933. #else // (ARCH_MIPS64)
  934. #if (ARCH_LOONGARCH64)
  935. static int get_L3_size() {
  936. int ret = 0, id = 0x14;
  937. __asm__ volatile (
  938. "cpucfg %[ret], %[id]"
  939. : [ret]"=r"(ret)
  940. : [id]"r"(id)
  941. : "memory"
  942. );
  943. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  944. }
  945. static void init_parameter(void) {
  946. #ifdef BUILD_BFLOAT16
  947. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  948. #endif
  949. #ifdef BUILD_BFLOAT16
  950. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  951. #endif
  952. #if defined(LA464)
  953. int L3_size = get_L3_size();
  954. #ifdef SMP
  955. if(blas_num_threads == 1){
  956. #endif
  957. //single thread
  958. if (L3_size == 32){ // 3C5000 and 3D5000
  959. TABLE_NAME.sgemm_p = 256;
  960. TABLE_NAME.sgemm_q = 384;
  961. TABLE_NAME.sgemm_r = 8192;
  962. TABLE_NAME.dgemm_p = 112;
  963. TABLE_NAME.dgemm_q = 289;
  964. TABLE_NAME.dgemm_r = 4096;
  965. TABLE_NAME.cgemm_p = 128;
  966. TABLE_NAME.cgemm_q = 256;
  967. TABLE_NAME.cgemm_r = 4096;
  968. TABLE_NAME.zgemm_p = 128;
  969. TABLE_NAME.zgemm_q = 128;
  970. TABLE_NAME.zgemm_r = 2048;
  971. } else { // 3A5000 and 3C5000L
  972. TABLE_NAME.sgemm_p = 256;
  973. TABLE_NAME.sgemm_q = 384;
  974. TABLE_NAME.sgemm_r = 4096;
  975. TABLE_NAME.dgemm_p = 112;
  976. TABLE_NAME.dgemm_q = 300;
  977. TABLE_NAME.dgemm_r = 3024;
  978. TABLE_NAME.cgemm_p = 128;
  979. TABLE_NAME.cgemm_q = 256;
  980. TABLE_NAME.cgemm_r = 2048;
  981. TABLE_NAME.zgemm_p = 128;
  982. TABLE_NAME.zgemm_q = 128;
  983. TABLE_NAME.zgemm_r = 1024;
  984. }
  985. #ifdef SMP
  986. }else{
  987. //multi thread
  988. if (L3_size == 32){ // 3C5000 and 3D5000
  989. TABLE_NAME.sgemm_p = 256;
  990. TABLE_NAME.sgemm_q = 384;
  991. TABLE_NAME.sgemm_r = 1024;
  992. TABLE_NAME.dgemm_p = 112;
  993. TABLE_NAME.dgemm_q = 289;
  994. TABLE_NAME.dgemm_r = 342;
  995. TABLE_NAME.cgemm_p = 128;
  996. TABLE_NAME.cgemm_q = 256;
  997. TABLE_NAME.cgemm_r = 512;
  998. TABLE_NAME.zgemm_p = 128;
  999. TABLE_NAME.zgemm_q = 128;
  1000. TABLE_NAME.zgemm_r = 512;
  1001. } else { // 3A5000 and 3C5000L
  1002. TABLE_NAME.sgemm_p = 256;
  1003. TABLE_NAME.sgemm_q = 384;
  1004. TABLE_NAME.sgemm_r = 2048;
  1005. TABLE_NAME.dgemm_p = 112;
  1006. TABLE_NAME.dgemm_q = 300;
  1007. TABLE_NAME.dgemm_r = 738;
  1008. TABLE_NAME.cgemm_p = 128;
  1009. TABLE_NAME.cgemm_q = 256;
  1010. TABLE_NAME.cgemm_r = 1024;
  1011. TABLE_NAME.zgemm_p = 128;
  1012. TABLE_NAME.zgemm_q = 128;
  1013. TABLE_NAME.zgemm_r = 1024;
  1014. }
  1015. }
  1016. #endif
  1017. #else
  1018. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1019. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1020. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1021. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1022. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1023. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1024. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1025. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1026. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1027. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1028. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1029. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1030. #endif
  1031. #ifdef BUILD_BFLOAT16
  1032. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1033. #endif
  1034. }
  1035. #else // (ARCH_LOONGARCH64)
  1036. #if (ARCH_POWER)
  1037. static void init_parameter(void) {
  1038. #ifdef BUILD_BFLOAT16
  1039. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1040. #endif
  1041. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1042. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1043. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1044. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1045. #ifdef BUILD_BFLOAT16
  1046. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1047. #endif
  1048. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1049. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1050. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1051. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1052. #ifdef BUILD_BFLOAT16
  1053. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1054. #endif
  1055. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1056. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1057. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1058. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1059. }
  1060. #else //POWER
  1061. #if (ARCH_ZARCH)
  1062. static void init_parameter(void) {
  1063. #ifdef BUILD_BFLOAT16
  1064. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1065. #endif
  1066. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1067. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1068. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1069. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1070. #ifdef BUILD_BFLOAT16
  1071. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1072. #endif
  1073. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1074. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1075. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1076. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1077. #ifdef BUILD_BFLOAT16
  1078. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1079. #endif
  1080. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1081. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1082. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1083. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1084. }
  1085. #else //ZARCH
  1086. #if (ARCH_RISCV64)
  1087. static void init_parameter(void) {
  1088. #ifdef BUILD_BFLOAT16
  1089. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1090. #endif
  1091. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1092. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1093. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1094. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1095. #ifdef BUILD_BFLOAT16
  1096. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1097. #endif
  1098. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1099. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1100. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1101. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1102. #ifdef BUILD_BFLOAT16
  1103. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1104. #endif
  1105. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1106. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1107. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1108. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1109. }
  1110. #else //RISCV64
  1111. #ifdef ARCH_X86
  1112. static int get_l2_size_old(void){
  1113. int i, eax, ebx, ecx, edx, cpuid_level;
  1114. int info[15];
  1115. cpuid(2, &eax, &ebx, &ecx, &edx);
  1116. info[ 0] = BITMASK(eax, 8, 0xff);
  1117. info[ 1] = BITMASK(eax, 16, 0xff);
  1118. info[ 2] = BITMASK(eax, 24, 0xff);
  1119. info[ 3] = BITMASK(ebx, 0, 0xff);
  1120. info[ 4] = BITMASK(ebx, 8, 0xff);
  1121. info[ 5] = BITMASK(ebx, 16, 0xff);
  1122. info[ 6] = BITMASK(ebx, 24, 0xff);
  1123. info[ 7] = BITMASK(ecx, 0, 0xff);
  1124. info[ 8] = BITMASK(ecx, 8, 0xff);
  1125. info[ 9] = BITMASK(ecx, 16, 0xff);
  1126. info[10] = BITMASK(ecx, 24, 0xff);
  1127. info[11] = BITMASK(edx, 0, 0xff);
  1128. info[12] = BITMASK(edx, 8, 0xff);
  1129. info[13] = BITMASK(edx, 16, 0xff);
  1130. info[14] = BITMASK(edx, 24, 0xff);
  1131. for (i = 0; i < 15; i++){
  1132. switch (info[i]){
  1133. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1134. case 0x1a :
  1135. return 96;
  1136. case 0x39 :
  1137. case 0x3b :
  1138. case 0x41 :
  1139. case 0x79 :
  1140. case 0x81 :
  1141. return 128;
  1142. case 0x3a :
  1143. return 192;
  1144. case 0x21 :
  1145. case 0x3c :
  1146. case 0x42 :
  1147. case 0x7a :
  1148. case 0x7e :
  1149. case 0x82 :
  1150. return 256;
  1151. case 0x3d :
  1152. return 384;
  1153. case 0x3e :
  1154. case 0x43 :
  1155. case 0x7b :
  1156. case 0x7f :
  1157. case 0x83 :
  1158. case 0x86 :
  1159. return 512;
  1160. case 0x44 :
  1161. case 0x78 :
  1162. case 0x7c :
  1163. case 0x84 :
  1164. case 0x87 :
  1165. return 1024;
  1166. case 0x45 :
  1167. case 0x7d :
  1168. case 0x85 :
  1169. return 2048;
  1170. case 0x48 :
  1171. return 3184;
  1172. case 0x49 :
  1173. return 4096;
  1174. case 0x4e :
  1175. return 6144;
  1176. }
  1177. }
  1178. // return 0;
  1179. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1180. return 256;
  1181. }
  1182. #endif
  1183. static __inline__ int get_l2_size(void){
  1184. int eax, ebx, ecx, edx, l2;
  1185. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1186. if (l2 != 0)
  1187. return l2;
  1188. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1189. l2 = BITMASK(ecx, 16, 0xffff);
  1190. #ifndef ARCH_X86
  1191. if (l2 <= 0) {
  1192. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1193. return 256;
  1194. }
  1195. return l2;
  1196. #else
  1197. if (l2 > 0) return l2;
  1198. return get_l2_size_old();
  1199. #endif
  1200. }
  1201. static __inline__ int get_l3_size(void){
  1202. int eax, ebx, ecx, edx;
  1203. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1204. return BITMASK(edx, 18, 0x3fff) * 512;
  1205. }
  1206. static void init_parameter(void) {
  1207. int l2 = get_l2_size();
  1208. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1209. /* where the GEMM unrolling parameters do not depend on l2 */
  1210. #ifdef BUILD_BFLOAT16
  1211. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1212. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1213. #endif
  1214. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1215. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1216. #endif
  1217. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1218. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1219. #endif
  1220. #if BUILD_COMPLEX == 1
  1221. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1222. #endif
  1223. #if BUILD_COMPLEX16==1
  1224. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1225. #endif
  1226. #if BUILD_COMPLEX == 1
  1227. #ifdef CGEMM3M_DEFAULT_Q
  1228. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1229. #else
  1230. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1231. #endif
  1232. #endif
  1233. #if BUILD_COMPLEX16 == 1
  1234. #ifdef ZGEMM3M_DEFAULT_Q
  1235. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1236. #else
  1237. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1238. #endif
  1239. #endif
  1240. #ifdef EXPRECISION
  1241. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1242. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1243. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1244. #endif
  1245. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1246. #ifdef DEBUG
  1247. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1248. #endif
  1249. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1250. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1251. #endif
  1252. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1253. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1254. #endif
  1255. #if BUILD_COMPLEX==1
  1256. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1257. #endif
  1258. #if BUILD_COMPLEX16==1
  1259. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1260. #endif
  1261. #ifdef EXPRECISION
  1262. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1263. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1264. #endif
  1265. #endif
  1266. #ifdef CORE_NORTHWOOD
  1267. #ifdef DEBUG
  1268. fprintf(stderr, "Northwood\n");
  1269. #endif
  1270. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1271. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1272. #endif
  1273. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1274. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1275. #endif
  1276. #if BUILD_COMPLEX==1
  1277. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1278. #endif
  1279. #if BUILD_COMPLEX16==1
  1280. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1281. #endif
  1282. #ifdef EXPRECISION
  1283. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1284. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1285. #endif
  1286. #endif
  1287. #ifdef ATOM
  1288. #ifdef DEBUG
  1289. fprintf(stderr, "Atom\n");
  1290. #endif
  1291. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1292. TABLE_NAME.sgemm_p = 256;
  1293. #endif
  1294. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1295. TABLE_NAME.dgemm_p = 128;
  1296. #endif
  1297. #if BUILD_COMPLEX==1
  1298. TABLE_NAME.cgemm_p = 128;
  1299. #endif
  1300. #if BUILD_COMPLEX16==1
  1301. TABLE_NAME.zgemm_p = 64;
  1302. #endif
  1303. #ifdef EXPRECISION
  1304. TABLE_NAME.qgemm_p = 64;
  1305. TABLE_NAME.xgemm_p = 32;
  1306. #endif
  1307. #endif
  1308. #ifdef CORE_PRESCOTT
  1309. #ifdef DEBUG
  1310. fprintf(stderr, "Prescott\n");
  1311. #endif
  1312. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1313. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1314. #endif
  1315. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1316. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1317. #endif
  1318. #if BUILD_COMPLEX==1
  1319. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1320. #endif
  1321. #if BUILD_COMPLEX16 == 1
  1322. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1323. #endif
  1324. #ifdef EXPRECISION
  1325. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1326. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1327. #endif
  1328. #endif
  1329. #ifdef CORE2
  1330. #ifdef DEBUG
  1331. fprintf(stderr, "Core2\n");
  1332. #endif
  1333. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1334. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1335. #endif
  1336. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1337. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1338. #endif
  1339. #if BUILD_COMPLEX==1
  1340. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1341. #endif
  1342. #if BUILD_COMPLEX16==1
  1343. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1344. #endif
  1345. #ifdef EXPRECISION
  1346. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1347. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1348. #endif
  1349. #endif
  1350. #ifdef PENRYN
  1351. #ifdef DEBUG
  1352. fprintf(stderr, "Penryn\n");
  1353. #endif
  1354. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1355. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1356. #endif
  1357. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1358. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1359. #endif
  1360. #if BUILD_COMPLEX==1
  1361. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1362. #endif
  1363. #if BUILD_COMPLEX16==1
  1364. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1365. #endif
  1366. #ifdef EXPRECISION
  1367. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1368. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1369. #endif
  1370. #endif
  1371. #ifdef DUNNINGTON
  1372. #ifdef DEBUG
  1373. fprintf(stderr, "Dunnington\n");
  1374. #endif
  1375. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1376. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1377. #endif
  1378. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1379. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1380. #endif
  1381. #if BUILD_COMPLEX==1
  1382. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1383. #endif
  1384. #if BUILD_COMPLEX16==1
  1385. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1386. #endif
  1387. #ifdef EXPRECISION
  1388. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1389. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1390. #endif
  1391. #endif
  1392. #ifdef NEHALEM
  1393. #ifdef DEBUG
  1394. fprintf(stderr, "Nehalem\n");
  1395. #endif
  1396. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1397. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1398. #endif
  1399. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1400. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1401. #endif
  1402. #if BUILD_COMPLEX
  1403. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1404. #endif
  1405. #if BUILD_COMPLEX16
  1406. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1407. #endif
  1408. #ifdef EXPRECISION
  1409. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1410. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1411. #endif
  1412. #endif
  1413. #ifdef SANDYBRIDGE
  1414. #ifdef DEBUG
  1415. fprintf(stderr, "Sandybridge\n");
  1416. #endif
  1417. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1418. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1419. #endif
  1420. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1421. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1422. #endif
  1423. #if BUILD_COMPLEX
  1424. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1425. #endif
  1426. #if BUILD_COMPLEX16
  1427. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1428. #endif
  1429. #ifdef EXPRECISION
  1430. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1431. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1432. #endif
  1433. #endif
  1434. #ifdef HASWELL
  1435. #ifdef DEBUG
  1436. fprintf(stderr, "Haswell\n");
  1437. #endif
  1438. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1439. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1440. #endif
  1441. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1442. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1443. #endif
  1444. #if BUILD_COMPLEX
  1445. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1446. #endif
  1447. #if BUILD_COMPLEX16
  1448. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1449. #endif
  1450. #ifdef EXPRECISION
  1451. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1452. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1453. #endif
  1454. #endif
  1455. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1456. #ifdef DEBUG
  1457. fprintf(stderr, "SkylakeX\n");
  1458. #endif
  1459. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1460. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1461. #endif
  1462. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1463. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1464. #endif
  1465. #if BUILD_COMPLEX
  1466. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1467. #endif
  1468. #if BUILD_COMPLEX16
  1469. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1470. #endif
  1471. #ifdef EXPRECISION
  1472. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1473. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1474. #endif
  1475. #endif
  1476. #ifdef OPTERON
  1477. #ifdef DEBUG
  1478. fprintf(stderr, "Opteron\n");
  1479. #endif
  1480. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1481. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1482. #endif
  1483. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1484. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1485. #endif
  1486. #if BUILD_COMPLEX
  1487. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1488. #endif
  1489. #if BUILD_COMPLEX16
  1490. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1491. #endif
  1492. #ifdef EXPRECISION
  1493. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1494. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1495. #endif
  1496. #endif
  1497. #ifdef BARCELONA
  1498. #ifdef DEBUG
  1499. fprintf(stderr, "Barcelona\n");
  1500. #endif
  1501. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1502. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1503. #endif
  1504. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1505. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1506. #endif
  1507. #if BUILD_COMPLEX
  1508. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1509. #endif
  1510. #if BUILD_COMPLEX16
  1511. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1512. #endif
  1513. #ifdef EXPRECISION
  1514. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1515. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1516. #endif
  1517. #endif
  1518. #ifdef BOBCAT
  1519. #ifdef DEBUG
  1520. fprintf(stderr, "Bobcate\n");
  1521. #endif
  1522. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1523. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1524. #endif
  1525. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1526. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1527. #endif
  1528. #if BUILD_COMPLEX
  1529. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1530. #endif
  1531. #if BUILD_COMPLEX16
  1532. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1533. #endif
  1534. #ifdef EXPRECISION
  1535. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1536. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1537. #endif
  1538. #endif
  1539. #ifdef BULLDOZER
  1540. #ifdef DEBUG
  1541. fprintf(stderr, "Bulldozer\n");
  1542. #endif
  1543. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1544. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1545. #endif
  1546. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1547. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1548. #endif
  1549. #if BUILD_COMPLEX
  1550. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1551. #endif
  1552. #if BUILD_COMPLEX16
  1553. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1554. #endif
  1555. #ifdef EXPRECISION
  1556. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1557. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1558. #endif
  1559. #endif
  1560. #ifdef EXCAVATOR
  1561. #ifdef DEBUG
  1562. fprintf(stderr, "Excavator\n");
  1563. #endif
  1564. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1565. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1566. #endif
  1567. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1568. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1569. #endif
  1570. #if BUILD_COMPLEX
  1571. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1572. #endif
  1573. #if BUILD_COMPLEX16
  1574. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1575. #endif
  1576. #ifdef EXPRECISION
  1577. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1578. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1579. #endif
  1580. #endif
  1581. #ifdef PILEDRIVER
  1582. #ifdef DEBUG
  1583. fprintf(stderr, "Piledriver\n");
  1584. #endif
  1585. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1586. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1587. #endif
  1588. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1589. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1590. #endif
  1591. #if BUILD_COMPLEX
  1592. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1593. #endif
  1594. #if BUILD_COMPLEX16
  1595. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1596. #endif
  1597. #ifdef EXPRECISION
  1598. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1599. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1600. #endif
  1601. #endif
  1602. #ifdef STEAMROLLER
  1603. #ifdef DEBUG
  1604. fprintf(stderr, "Steamroller\n");
  1605. #endif
  1606. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1607. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1608. #endif
  1609. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1610. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1611. #endif
  1612. #if BUILD_COMPLEX
  1613. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1614. #endif
  1615. #if BUILD_COMPLEX16
  1616. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1617. #endif
  1618. #ifdef EXPRECISION
  1619. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1620. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1621. #endif
  1622. #endif
  1623. #ifdef ZEN
  1624. #ifdef DEBUG
  1625. fprintf(stderr, "Zen\n");
  1626. #endif
  1627. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1628. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1629. #endif
  1630. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1631. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1632. #endif
  1633. #if BUILD_COMPLEX
  1634. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1635. #endif
  1636. #if BUILD_COMPLEX16
  1637. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1638. #endif
  1639. #ifdef EXPRECISION
  1640. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1641. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1642. #endif
  1643. #endif
  1644. #ifdef NANO
  1645. #ifdef DEBUG
  1646. fprintf(stderr, "NANO\n");
  1647. #endif
  1648. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1649. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1650. #endif
  1651. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1652. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1653. #endif
  1654. #if (BUILD_COMPLEX==1)
  1655. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1656. #endif
  1657. #if (BUILD_COMPLEX16==1)
  1658. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1659. #endif
  1660. #ifdef EXPRECISION
  1661. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1662. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1663. #endif
  1664. #endif
  1665. #ifdef SAPPHIRERAPIDS
  1666. #if (BUILD_BFLOAT16 == 1)
  1667. TABLE_NAME.need_amxtile_permission = 1;
  1668. #endif
  1669. #endif
  1670. #if BUILD_COMPLEX==1
  1671. #ifdef CGEMM3M_DEFAULT_P
  1672. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1673. #else
  1674. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1675. #endif
  1676. #endif
  1677. #if BUILD_COMPLEX16==1
  1678. #ifdef ZGEMM3M_DEFAULT_P
  1679. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1680. #else
  1681. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1682. #endif
  1683. #endif
  1684. #ifdef EXPRECISION
  1685. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1686. #endif
  1687. #if BUILD_SINGLE == 1
  1688. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1689. #endif
  1690. #if BUILD_DOUBLE== 1
  1691. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1692. #endif
  1693. #if BUILD_COMPLEX==1
  1694. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1695. #endif
  1696. #if BUILD_COMPLEX16==1
  1697. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1698. #endif
  1699. #if BUILD_COMPLEX==1
  1700. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1701. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1702. #else
  1703. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1704. #endif
  1705. #endif
  1706. #if BUILD_COMPLEX16==1
  1707. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1708. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1709. #else
  1710. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1711. #endif
  1712. #endif
  1713. #ifdef QUAD_PRECISION
  1714. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1715. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1716. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1717. #endif
  1718. #ifdef DEBUG
  1719. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1720. #endif
  1721. #if BUILD_BFLOAT16==1
  1722. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1723. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1724. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1725. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1726. #endif
  1727. #if BUILD_SINGLE==1
  1728. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1729. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1730. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1731. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1732. #endif
  1733. #if BUILD_DOUBLE==1
  1734. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1735. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1736. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1737. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1738. #endif
  1739. #ifdef EXPRECISION
  1740. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1741. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1742. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1743. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1744. #endif
  1745. #if BUILD_COMPLEX ==1
  1746. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1747. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1748. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1749. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1750. #endif
  1751. #if BUILD_COMPLEX16 ==1
  1752. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1753. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1754. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1755. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1756. #endif
  1757. #if BUILD_COMPLEX == 1
  1758. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1759. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1760. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1761. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1762. #endif
  1763. #if BUILD_COMPLEX16 == 1
  1764. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1765. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1766. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1767. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1768. #endif
  1769. #ifdef EXPRECISION
  1770. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1771. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1772. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1773. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1774. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1775. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1776. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1777. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1778. #endif
  1779. }
  1780. #endif //RISCV64
  1781. #endif //POWER
  1782. #endif //ZARCH
  1783. #endif //(ARCH_LOONGARCH64)
  1784. #endif //(ARCH_MIPS64)
  1785. #endif //(ARCH_ARM64)