You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 60 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023, 2025 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N,
  54. #ifdef BGEMM_DEFAULT_UNROLL_MN
  55. BGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. BGEMM_ALIGN_K,
  60. 0, 0, 0,
  61. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  62. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  63. SBGEMM_DEFAULT_UNROLL_MN,
  64. #else
  65. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  66. #endif
  67. SBGEMM_ALIGN_K,
  68. 0, // need_amxtile_permission
  69. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  70. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  71. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  72. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  73. dsdot_kTS,
  74. srot_kTS, srotm_kTS, bscal_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  75. bgemv_nTS, bgemv_tTS, sbgemv_nTS, sbgemv_tTS, sger_kTS,
  76. ssymv_LTS, ssymv_UTS,
  77. bgemm_kernelTS, bgemm_betaTS,
  78. #if BGEMM_DEFAULT_UNROLL_M != BGEMM_DEFAULT_UNROLL_N
  79. bgemm_incopyTS, bgemm_itcopyTS,
  80. #else
  81. bgemm_oncopyTS, bgemm_otcopyTS,
  82. #endif
  83. bgemm_oncopyTS, bgemm_otcopyTS,
  84. sbgemm_kernelTS, sbgemm_betaTS,
  85. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  86. sbgemm_incopyTS, sbgemm_itcopyTS,
  87. #else
  88. sbgemm_oncopyTS, sbgemm_otcopyTS,
  89. #endif
  90. sbgemm_oncopyTS, sbgemm_otcopyTS,
  91. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  94. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  95. #else
  96. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  97. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  98. #endif
  99. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  100. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  101. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  102. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  103. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  104. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  105. #else
  106. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  107. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  108. #endif
  109. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  110. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  111. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  112. ssymm_iutcopyTS, ssymm_iltcopyTS,
  113. #else
  114. ssymm_outcopyTS, ssymm_oltcopyTS,
  115. #endif
  116. ssymm_outcopyTS, ssymm_oltcopyTS,
  117. #ifndef NO_LAPACK
  118. sneg_tcopyTS, slaswp_ncopyTS,
  119. #else
  120. NULL,NULL,
  121. #endif
  122. #ifdef SMALL_MATRIX_OPT
  123. sbgemm_small_matrix_permitTS,
  124. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  125. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  126. #endif
  127. #endif
  128. #ifdef BUILD_HFLOAT16
  129. 0, 0, 0,
  130. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  131. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  132. SHGEMM_DEFAULT_UNROLL_MN,
  133. #else
  134. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  135. #endif
  136. shgemm_kernelTS, shgemm_betaTS,
  137. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  138. shgemm_incopyTS, shgemm_itcopyTS,
  139. #else
  140. shgemm_oncopyTS, shgemm_otcopyTS,
  141. #endif
  142. shgemm_oncopyTS, shgemm_otcopyTS,
  143. #endif
  144. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  145. 0, 0, 0,
  146. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  147. #ifdef SGEMM_DEFAULT_UNROLL_MN
  148. SGEMM_DEFAULT_UNROLL_MN,
  149. #else
  150. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  151. #endif
  152. #endif
  153. #ifdef HAVE_EXCLUSIVE_CACHE
  154. 1,
  155. #else
  156. 0,
  157. #endif
  158. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  159. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  160. #endif
  161. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  162. isamax_kTS,
  163. #endif
  164. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  165. isamin_kTS, ismax_kTS, ismin_kTS,
  166. snrm2_kTS, sasum_kTS,
  167. #endif
  168. #if BUILD_SINGLE == 1
  169. ssum_kTS,
  170. #endif
  171. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  172. scopy_kTS, sdot_kTS,
  173. // dsdot_kTS,
  174. srot_kTS, srotm_kTS, saxpy_kTS,
  175. #endif
  176. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  177. sscal_kTS,
  178. #endif
  179. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  180. sswap_kTS,
  181. sgemv_nTS, sgemv_tTS,
  182. #endif
  183. #if BUILD_SINGLE == 1
  184. sger_kTS,
  185. #endif
  186. #if BUILD_SINGLE == 1
  187. ssymv_LTS, ssymv_UTS,
  188. #endif
  189. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  190. #ifdef ARCH_X86_64
  191. sgemm_directTS,
  192. sgemm_direct_performantTS,
  193. #endif
  194. #ifdef ARCH_ARM64
  195. sgemm_directTS,
  196. sgemm_direct_alpha_betaTS,
  197. #endif
  198. sgemm_kernelTS, sgemm_betaTS,
  199. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  200. sgemm_incopyTS, sgemm_itcopyTS,
  201. #else
  202. sgemm_oncopyTS, sgemm_otcopyTS,
  203. #endif
  204. sgemm_oncopyTS, sgemm_otcopyTS,
  205. #endif
  206. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  207. #ifdef SMALL_MATRIX_OPT
  208. sgemm_small_matrix_permitTS,
  209. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  210. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  211. #endif
  212. #endif
  213. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  214. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  215. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  216. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  217. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  218. #else
  219. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  220. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  221. #endif
  222. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  223. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  224. #endif
  225. #if (BUILD_SINGLE==1)
  226. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  227. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  228. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  229. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  230. #else
  231. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  232. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  233. #endif
  234. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  235. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  236. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  237. ssymm_iutcopyTS, ssymm_iltcopyTS,
  238. #else
  239. ssymm_outcopyTS, ssymm_oltcopyTS,
  240. #endif
  241. ssymm_outcopyTS, ssymm_oltcopyTS,
  242. #ifndef NO_LAPACK
  243. sneg_tcopyTS, slaswp_ncopyTS,
  244. #else
  245. NULL,NULL,
  246. #endif
  247. #endif
  248. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  249. 0, 0, 0,
  250. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  251. #ifdef DGEMM_DEFAULT_UNROLL_MN
  252. DGEMM_DEFAULT_UNROLL_MN,
  253. #else
  254. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  255. #endif
  256. #endif
  257. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  258. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  259. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  260. dnrm2_kTS, dasum_kTS,
  261. #endif
  262. #if (BUILD_DOUBLE==1)
  263. dsum_kTS,
  264. #endif
  265. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  266. dcopy_kTS, ddot_kTS,
  267. #endif
  268. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  269. dsdot_kTS,
  270. #endif
  271. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  272. drot_kTS,
  273. drotm_kTS,
  274. daxpy_kTS,
  275. dscal_kTS,
  276. dswap_kTS,
  277. dgemv_nTS, dgemv_tTS,
  278. #endif
  279. #if (BUILD_DOUBLE==1)
  280. dger_kTS,
  281. dsymv_LTS, dsymv_UTS,
  282. #endif
  283. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  284. dgemm_kernelTS, dgemm_betaTS,
  285. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  286. dgemm_incopyTS, dgemm_itcopyTS,
  287. #else
  288. dgemm_oncopyTS, dgemm_otcopyTS,
  289. #endif
  290. dgemm_oncopyTS, dgemm_otcopyTS,
  291. #endif
  292. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  293. #ifdef SMALL_MATRIX_OPT
  294. dgemm_small_matrix_permitTS,
  295. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  296. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  297. #endif
  298. #endif
  299. #if (BUILD_DOUBLE==1)
  300. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  301. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  302. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  303. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  304. #else
  305. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  306. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  307. #endif
  308. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  309. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  310. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  311. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  312. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  313. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  314. #else
  315. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  316. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  317. #endif
  318. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  319. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  320. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  321. dsymm_iutcopyTS, dsymm_iltcopyTS,
  322. #else
  323. dsymm_outcopyTS, dsymm_oltcopyTS,
  324. #endif
  325. dsymm_outcopyTS, dsymm_oltcopyTS,
  326. #ifndef NO_LAPACK
  327. dneg_tcopyTS, dlaswp_ncopyTS,
  328. #else
  329. NULL, NULL,
  330. #endif
  331. #endif
  332. #ifdef EXPRECISION
  333. 0, 0, 0,
  334. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  335. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  336. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  337. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  338. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  339. qgemv_nTS, qgemv_tTS, qger_kTS,
  340. qsymv_LTS, qsymv_UTS,
  341. qgemm_kernelTS, qgemm_betaTS,
  342. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  343. qgemm_incopyTS, qgemm_itcopyTS,
  344. #else
  345. qgemm_oncopyTS, qgemm_otcopyTS,
  346. #endif
  347. qgemm_oncopyTS, qgemm_otcopyTS,
  348. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  349. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  350. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  351. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  352. #else
  353. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  354. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  355. #endif
  356. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  357. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  358. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  359. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  360. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  361. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  362. #else
  363. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  364. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  365. #endif
  366. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  367. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  368. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  369. qsymm_iutcopyTS, qsymm_iltcopyTS,
  370. #else
  371. qsymm_outcopyTS, qsymm_oltcopyTS,
  372. #endif
  373. qsymm_outcopyTS, qsymm_oltcopyTS,
  374. #ifndef NO_LAPACK
  375. qneg_tcopyTS, qlaswp_ncopyTS,
  376. #else
  377. NULL, NULL,
  378. #endif
  379. #endif
  380. #if (BUILD_COMPLEX)
  381. 0, 0, 0,
  382. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  383. #ifdef CGEMM_DEFAULT_UNROLL_MN
  384. CGEMM_DEFAULT_UNROLL_MN,
  385. #else
  386. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  387. #endif
  388. #if (BUILD_COMPLEX)
  389. camax_kTS, camin_kTS,
  390. #endif
  391. #if (BUILD_COMPLEX)
  392. icamax_kTS,
  393. #endif
  394. #if (BUILD_COMPLEX)
  395. icamin_kTS,
  396. cnrm2_kTS, casum_kTS, csum_kTS,
  397. #endif
  398. #if (BUILD_COMPLEX)
  399. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  400. #endif
  401. #if (BUILD_COMPLEX)
  402. csrot_kTS,
  403. #endif
  404. #if (BUILD_COMPLEX)
  405. caxpy_kTS,
  406. caxpyc_kTS,
  407. cscal_kTS,
  408. cswap_kTS,
  409. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  410. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  411. #endif
  412. #if (BUILD_COMPLEX)
  413. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  414. csymv_LTS, csymv_UTS,
  415. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  416. #endif
  417. #if (BUILD_COMPLEX)
  418. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  419. cgemm_betaTS,
  420. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  421. cgemm_incopyTS, cgemm_itcopyTS,
  422. #else
  423. cgemm_oncopyTS, cgemm_otcopyTS,
  424. #endif
  425. cgemm_oncopyTS, cgemm_otcopyTS,
  426. #ifdef SMALL_MATRIX_OPT
  427. cgemm_small_matrix_permitTS,
  428. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  429. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  430. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  431. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  432. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  433. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  434. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  435. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  436. #endif
  437. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  438. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  439. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  440. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  441. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  442. #else
  443. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  444. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  445. #endif
  446. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  447. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  448. #endif
  449. #endif
  450. #if (BUILD_COMPLEX)
  451. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  452. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  453. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  454. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  455. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  456. #else
  457. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  458. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  459. #endif
  460. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  461. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  462. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  463. csymm_iutcopyTS, csymm_iltcopyTS,
  464. #else
  465. csymm_outcopyTS, csymm_oltcopyTS,
  466. #endif
  467. csymm_outcopyTS, csymm_oltcopyTS,
  468. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  469. chemm_iutcopyTS, chemm_iltcopyTS,
  470. #else
  471. chemm_outcopyTS, chemm_oltcopyTS,
  472. #endif
  473. chemm_outcopyTS, chemm_oltcopyTS,
  474. 0, 0, 0,
  475. #if (USE_GEMM3M)
  476. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  477. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  478. #else
  479. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  480. #endif
  481. cgemm3m_kernelTS,
  482. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  483. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  484. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  485. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  486. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  487. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  488. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  489. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  490. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  491. csymm3m_oucopybTS, csymm3m_olcopybTS,
  492. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  493. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  494. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  495. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  496. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  497. chemm3m_oucopybTS, chemm3m_olcopybTS,
  498. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  499. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  500. #else
  501. 0, 0, 0,
  502. NULL,
  503. NULL, NULL,
  504. NULL, NULL,
  505. NULL, NULL,
  506. NULL, NULL,
  507. NULL, NULL,
  508. NULL, NULL,
  509. NULL, NULL,
  510. NULL, NULL,
  511. NULL, NULL,
  512. NULL, NULL,
  513. NULL, NULL,
  514. NULL, NULL,
  515. NULL, NULL,
  516. NULL, NULL,
  517. NULL, NULL,
  518. NULL, NULL,
  519. NULL, NULL,
  520. NULL, NULL,
  521. #endif
  522. #endif
  523. #if (BUILD_COMPLEX)
  524. #ifndef NO_LAPACK
  525. cneg_tcopyTS,
  526. claswp_ncopyTS,
  527. #else
  528. NULL, NULL,
  529. #endif
  530. #endif
  531. #if BUILD_COMPLEX16 == 1
  532. 0, 0, 0,
  533. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  534. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  535. ZGEMM_DEFAULT_UNROLL_MN,
  536. #else
  537. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  538. #endif
  539. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  540. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  541. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  542. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  543. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  544. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  545. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  546. zsymv_LTS, zsymv_UTS,
  547. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  548. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  549. zgemm_betaTS,
  550. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  551. zgemm_incopyTS, zgemm_itcopyTS,
  552. #else
  553. zgemm_oncopyTS, zgemm_otcopyTS,
  554. #endif
  555. zgemm_oncopyTS, zgemm_otcopyTS,
  556. #ifdef SMALL_MATRIX_OPT
  557. zgemm_small_matrix_permitTS,
  558. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  559. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  560. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  561. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  562. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  563. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  564. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  565. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  566. #endif
  567. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  568. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  569. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  570. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  571. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  572. #else
  573. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  574. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  575. #endif
  576. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  577. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  578. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  579. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  580. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  581. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  582. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  583. #else
  584. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  585. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  586. #endif
  587. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  588. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  589. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  590. zsymm_iutcopyTS, zsymm_iltcopyTS,
  591. #else
  592. zsymm_outcopyTS, zsymm_oltcopyTS,
  593. #endif
  594. zsymm_outcopyTS, zsymm_oltcopyTS,
  595. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  596. zhemm_iutcopyTS, zhemm_iltcopyTS,
  597. #else
  598. zhemm_outcopyTS, zhemm_oltcopyTS,
  599. #endif
  600. zhemm_outcopyTS, zhemm_oltcopyTS,
  601. 0, 0, 0,
  602. #if (USE_GEMM3M)
  603. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  604. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  605. #else
  606. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  607. #endif
  608. zgemm3m_kernelTS,
  609. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  610. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  611. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  612. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  613. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  614. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  615. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  616. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  617. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  618. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  619. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  620. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  621. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  622. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  623. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  624. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  625. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  626. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  627. #else
  628. 0, 0, 0,
  629. NULL,
  630. NULL, NULL,
  631. NULL, NULL,
  632. NULL, NULL,
  633. NULL, NULL,
  634. NULL, NULL,
  635. NULL, NULL,
  636. NULL, NULL,
  637. NULL, NULL,
  638. NULL, NULL,
  639. NULL, NULL,
  640. NULL, NULL,
  641. NULL, NULL,
  642. NULL, NULL,
  643. NULL, NULL,
  644. NULL, NULL,
  645. NULL, NULL,
  646. NULL, NULL,
  647. NULL, NULL,
  648. #endif
  649. #ifndef NO_LAPACK
  650. zneg_tcopyTS, zlaswp_ncopyTS,
  651. #else
  652. NULL, NULL,
  653. #endif
  654. #endif
  655. #ifdef EXPRECISION
  656. 0, 0, 0,
  657. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  658. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  659. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  660. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  661. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  662. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  663. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  664. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  665. xsymv_LTS, xsymv_UTS,
  666. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  667. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  668. xgemm_betaTS,
  669. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  670. xgemm_incopyTS, xgemm_itcopyTS,
  671. #else
  672. xgemm_oncopyTS, xgemm_otcopyTS,
  673. #endif
  674. xgemm_oncopyTS, xgemm_otcopyTS,
  675. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  676. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  677. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  678. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  679. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  680. #else
  681. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  682. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  683. #endif
  684. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  685. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  686. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  687. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  688. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  689. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  690. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  691. #else
  692. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  693. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  694. #endif
  695. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  696. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  697. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  698. xsymm_iutcopyTS, xsymm_iltcopyTS,
  699. #else
  700. xsymm_outcopyTS, xsymm_oltcopyTS,
  701. #endif
  702. xsymm_outcopyTS, xsymm_oltcopyTS,
  703. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  704. xhemm_iutcopyTS, xhemm_iltcopyTS,
  705. #else
  706. xhemm_outcopyTS, xhemm_oltcopyTS,
  707. #endif
  708. xhemm_outcopyTS, xhemm_oltcopyTS,
  709. 0, 0, 0,
  710. #if (USE_GEMM3M)
  711. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  712. xgemm3m_kernelTS,
  713. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  714. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  715. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  716. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  717. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  718. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  719. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  720. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  721. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  722. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  723. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  724. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  725. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  726. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  727. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  728. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  729. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  730. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  731. #else
  732. 0, 0, 0,
  733. NULL,
  734. NULL, NULL,
  735. NULL, NULL,
  736. NULL, NULL,
  737. NULL, NULL,
  738. NULL, NULL,
  739. NULL, NULL,
  740. NULL, NULL,
  741. NULL, NULL,
  742. NULL, NULL,
  743. NULL, NULL,
  744. NULL, NULL,
  745. NULL, NULL,
  746. NULL, NULL,
  747. NULL, NULL,
  748. NULL, NULL,
  749. NULL, NULL,
  750. NULL, NULL,
  751. NULL, NULL,
  752. #endif
  753. #ifndef NO_LAPACK
  754. xneg_tcopyTS, xlaswp_ncopyTS,
  755. #else
  756. NULL, NULL,
  757. #endif
  758. #endif
  759. init_parameter,
  760. SNUMOPT, DNUMOPT, QNUMOPT,
  761. #if BUILD_SINGLE == 1
  762. saxpby_kTS,
  763. #endif
  764. #if BUILD_DOUBLE == 1
  765. daxpby_kTS,
  766. #endif
  767. #if BUILD_COMPLEX == 1
  768. caxpby_kTS,
  769. #endif
  770. #if BUILD_COMPLEX16== 1
  771. zaxpby_kTS,
  772. #endif
  773. #if BUILD_SINGLE == 1
  774. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  775. #endif
  776. #if BUILD_DOUBLE== 1
  777. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  778. #endif
  779. #if BUILD_COMPLEX == 1
  780. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  781. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  782. #endif
  783. #if BUILD_COMPLEX16 == 1
  784. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  785. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  786. #endif
  787. #if BUILD_SINGLE == 1
  788. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  789. #endif
  790. #if BUILD_DOUBLE== 1
  791. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  792. #endif
  793. #if BUILD_COMPLEX== 1
  794. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  795. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  796. #endif
  797. #if BUILD_COMPLEX16==1
  798. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  799. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  800. #endif
  801. #if BUILD_SINGLE == 1
  802. sgeadd_kTS,
  803. #endif
  804. #if BUILD_DOUBLE==1
  805. dgeadd_kTS,
  806. #endif
  807. #if BUILD_COMPLEX==1
  808. cgeadd_kTS,
  809. #endif
  810. #if BUILD_COMPLEX16==1
  811. zgeadd_kTS,
  812. #endif
  813. };
  814. #if (ARCH_ARM64)
  815. static void init_parameter(void) {
  816. #if (BUILD_BFLOAT16)
  817. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  818. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  819. #endif
  820. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  821. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  822. #endif
  823. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  824. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  825. #endif
  826. #if BUILD_COMPLEX==1
  827. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  828. #endif
  829. #if BUILD_COMPLEX16==1
  830. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  831. #endif
  832. #if (BUILD_BFLOAT16)
  833. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  834. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  835. #endif
  836. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  837. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  838. #endif
  839. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  840. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  841. #endif
  842. #if BUILD_COMPLEX== 1
  843. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  844. #endif
  845. #if BUILD_COMPLEX16==1
  846. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  847. #endif
  848. #if (BUILD_BFLOAT16)
  849. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  850. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  851. #endif
  852. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  853. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  854. #endif
  855. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  856. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  857. #endif
  858. #if BUILD_COMPLEX==1
  859. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  860. #endif
  861. #if BUILD_COMPLEX16==1
  862. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  863. #endif
  864. #ifdef EXPRECISION
  865. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  866. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  867. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  868. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  869. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  870. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  871. #endif
  872. #if (USE_GEMM3M)
  873. #ifdef CGEMM3M_DEFAULT_P
  874. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  875. #else
  876. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  877. #endif
  878. #ifdef ZGEMM3M_DEFAULT_P
  879. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  880. #else
  881. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  882. #endif
  883. #ifdef CGEMM3M_DEFAULT_Q
  884. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  885. #else
  886. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  887. #endif
  888. #ifdef ZGEMM3M_DEFAULT_Q
  889. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  890. #else
  891. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  892. #endif
  893. #ifdef CGEMM3M_DEFAULT_R
  894. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  895. #else
  896. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  897. #endif
  898. #ifdef ZGEMM3M_DEFAULT_R
  899. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  900. #else
  901. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  902. #endif
  903. #ifdef EXPRECISION
  904. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  905. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  906. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  907. #endif
  908. #endif
  909. }
  910. #else // (ARCH_ARM64)
  911. #if defined(ARCH_MIPS64)
  912. static void init_parameter(void) {
  913. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  914. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  915. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  916. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  917. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  918. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  919. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  920. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  921. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  922. TABLE_NAME.dgemm_r = 640;
  923. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  924. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  925. #ifdef EXPRECISION
  926. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  927. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  928. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  929. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  930. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  931. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  932. #endif
  933. #if defined(USE_GEMM3M)
  934. #ifdef CGEMM3M_DEFAULT_P
  935. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  936. #else
  937. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  938. #endif
  939. #ifdef ZGEMM3M_DEFAULT_P
  940. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  941. #else
  942. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  943. #endif
  944. #ifdef CGEMM3M_DEFAULT_Q
  945. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  946. #else
  947. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  948. #endif
  949. #ifdef ZGEMM3M_DEFAULT_Q
  950. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  951. #else
  952. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  953. #endif
  954. #ifdef CGEMM3M_DEFAULT_R
  955. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  956. #else
  957. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  958. #endif
  959. #ifdef ZGEMM3M_DEFAULT_R
  960. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  961. #else
  962. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  963. #endif
  964. #ifdef EXPRECISION
  965. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  966. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  967. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  968. #endif
  969. #endif
  970. }
  971. #else // (ARCH_MIPS64)
  972. #if (ARCH_LOONGARCH64)
  973. static int get_L3_size() {
  974. int ret = 0, id = 0x14;
  975. __asm__ volatile (
  976. "cpucfg %[ret], %[id]"
  977. : [ret]"=r"(ret)
  978. : [id]"r"(id)
  979. : "memory"
  980. );
  981. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  982. }
  983. static void init_parameter(void) {
  984. #ifdef BUILD_BFLOAT16
  985. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  986. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  987. #endif
  988. #ifdef BUILD_BFLOAT16
  989. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  990. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  991. #endif
  992. #if defined(LA464)
  993. int L3_size = get_L3_size();
  994. #ifdef SMP
  995. if(blas_num_threads == 1){
  996. #endif
  997. //single thread
  998. if (L3_size == 32){ // 3C5000 and 3D5000
  999. TABLE_NAME.sgemm_p = 256;
  1000. TABLE_NAME.sgemm_q = 384;
  1001. TABLE_NAME.sgemm_r = 8192;
  1002. TABLE_NAME.dgemm_p = 112;
  1003. TABLE_NAME.dgemm_q = 289;
  1004. TABLE_NAME.dgemm_r = 4096;
  1005. TABLE_NAME.cgemm_p = 128;
  1006. TABLE_NAME.cgemm_q = 256;
  1007. TABLE_NAME.cgemm_r = 4096;
  1008. TABLE_NAME.zgemm_p = 128;
  1009. TABLE_NAME.zgemm_q = 128;
  1010. TABLE_NAME.zgemm_r = 2048;
  1011. } else { // 3A5000 and 3C5000L
  1012. TABLE_NAME.sgemm_p = 256;
  1013. TABLE_NAME.sgemm_q = 384;
  1014. TABLE_NAME.sgemm_r = 4096;
  1015. TABLE_NAME.dgemm_p = 112;
  1016. TABLE_NAME.dgemm_q = 300;
  1017. TABLE_NAME.dgemm_r = 3024;
  1018. TABLE_NAME.cgemm_p = 128;
  1019. TABLE_NAME.cgemm_q = 256;
  1020. TABLE_NAME.cgemm_r = 2048;
  1021. TABLE_NAME.zgemm_p = 128;
  1022. TABLE_NAME.zgemm_q = 128;
  1023. TABLE_NAME.zgemm_r = 1024;
  1024. }
  1025. #ifdef SMP
  1026. }else{
  1027. //multi thread
  1028. if (L3_size == 32){ // 3C5000 and 3D5000
  1029. TABLE_NAME.sgemm_p = 256;
  1030. TABLE_NAME.sgemm_q = 384;
  1031. TABLE_NAME.sgemm_r = 1024;
  1032. TABLE_NAME.dgemm_p = 112;
  1033. TABLE_NAME.dgemm_q = 289;
  1034. TABLE_NAME.dgemm_r = 342;
  1035. TABLE_NAME.cgemm_p = 128;
  1036. TABLE_NAME.cgemm_q = 256;
  1037. TABLE_NAME.cgemm_r = 512;
  1038. TABLE_NAME.zgemm_p = 128;
  1039. TABLE_NAME.zgemm_q = 128;
  1040. TABLE_NAME.zgemm_r = 512;
  1041. } else { // 3A5000 and 3C5000L
  1042. TABLE_NAME.sgemm_p = 256;
  1043. TABLE_NAME.sgemm_q = 384;
  1044. TABLE_NAME.sgemm_r = 2048;
  1045. TABLE_NAME.dgemm_p = 112;
  1046. TABLE_NAME.dgemm_q = 300;
  1047. TABLE_NAME.dgemm_r = 738;
  1048. TABLE_NAME.cgemm_p = 128;
  1049. TABLE_NAME.cgemm_q = 256;
  1050. TABLE_NAME.cgemm_r = 1024;
  1051. TABLE_NAME.zgemm_p = 128;
  1052. TABLE_NAME.zgemm_q = 128;
  1053. TABLE_NAME.zgemm_r = 1024;
  1054. }
  1055. }
  1056. #endif
  1057. #else
  1058. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1059. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1060. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1061. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1062. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1063. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1064. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1065. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1066. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1067. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1068. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1069. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1070. #endif
  1071. #ifdef BUILD_BFLOAT16
  1072. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1073. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1074. #endif
  1075. }
  1076. #else // (ARCH_LOONGARCH64)
  1077. #if (ARCH_POWER)
  1078. static void init_parameter(void) {
  1079. #ifdef BUILD_BFLOAT16
  1080. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1081. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1082. #endif
  1083. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1084. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1085. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1086. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1087. #ifdef BUILD_BFLOAT16
  1088. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1089. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1090. #endif
  1091. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1092. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1093. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1094. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1095. #ifdef BUILD_BFLOAT16
  1096. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1097. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1098. #endif
  1099. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1100. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1101. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1102. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1103. }
  1104. #else //POWER
  1105. #if (ARCH_ZARCH)
  1106. static void init_parameter(void) {
  1107. #ifdef BUILD_BFLOAT16
  1108. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1109. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1110. #endif
  1111. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1112. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1113. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1114. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1115. #ifdef BUILD_BFLOAT16
  1116. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1117. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1118. #endif
  1119. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1120. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1121. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1122. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1123. #ifdef BUILD_BFLOAT16
  1124. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1125. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1126. #endif
  1127. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1128. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1129. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1130. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1131. }
  1132. #else //ZARCH
  1133. #if (ARCH_RISCV64)
  1134. static void init_parameter(void) {
  1135. #ifdef BUILD_BFLOAT16
  1136. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1137. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1138. #endif
  1139. #ifdef BUILD_HFLOAT16
  1140. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1141. #endif
  1142. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1143. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1144. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1145. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1146. #ifdef BUILD_BFLOAT16
  1147. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1148. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1149. #endif
  1150. #ifdef BUILD_HFLOAT16
  1151. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  1152. #endif
  1153. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1154. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1155. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1156. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1157. #ifdef BUILD_BFLOAT16
  1158. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1159. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1160. #endif
  1161. #ifdef BUILD_HFLOAT16
  1162. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1163. #endif
  1164. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1165. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1166. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1167. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1168. }
  1169. #else //RISCV64
  1170. #ifdef ARCH_X86
  1171. static int get_l2_size_old(void){
  1172. int i, eax, ebx, ecx, edx, cpuid_level;
  1173. int info[15];
  1174. cpuid(2, &eax, &ebx, &ecx, &edx);
  1175. info[ 0] = BITMASK(eax, 8, 0xff);
  1176. info[ 1] = BITMASK(eax, 16, 0xff);
  1177. info[ 2] = BITMASK(eax, 24, 0xff);
  1178. info[ 3] = BITMASK(ebx, 0, 0xff);
  1179. info[ 4] = BITMASK(ebx, 8, 0xff);
  1180. info[ 5] = BITMASK(ebx, 16, 0xff);
  1181. info[ 6] = BITMASK(ebx, 24, 0xff);
  1182. info[ 7] = BITMASK(ecx, 0, 0xff);
  1183. info[ 8] = BITMASK(ecx, 8, 0xff);
  1184. info[ 9] = BITMASK(ecx, 16, 0xff);
  1185. info[10] = BITMASK(ecx, 24, 0xff);
  1186. info[11] = BITMASK(edx, 0, 0xff);
  1187. info[12] = BITMASK(edx, 8, 0xff);
  1188. info[13] = BITMASK(edx, 16, 0xff);
  1189. info[14] = BITMASK(edx, 24, 0xff);
  1190. for (i = 0; i < 15; i++){
  1191. switch (info[i]){
  1192. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1193. case 0x1a :
  1194. return 96;
  1195. case 0x39 :
  1196. case 0x3b :
  1197. case 0x41 :
  1198. case 0x79 :
  1199. case 0x81 :
  1200. return 128;
  1201. case 0x3a :
  1202. return 192;
  1203. case 0x21 :
  1204. case 0x3c :
  1205. case 0x42 :
  1206. case 0x7a :
  1207. case 0x7e :
  1208. case 0x82 :
  1209. return 256;
  1210. case 0x3d :
  1211. return 384;
  1212. case 0x3e :
  1213. case 0x43 :
  1214. case 0x7b :
  1215. case 0x7f :
  1216. case 0x83 :
  1217. case 0x86 :
  1218. return 512;
  1219. case 0x44 :
  1220. case 0x78 :
  1221. case 0x7c :
  1222. case 0x84 :
  1223. case 0x87 :
  1224. return 1024;
  1225. case 0x45 :
  1226. case 0x7d :
  1227. case 0x85 :
  1228. return 2048;
  1229. case 0x48 :
  1230. return 3184;
  1231. case 0x49 :
  1232. return 4096;
  1233. case 0x4e :
  1234. return 6144;
  1235. }
  1236. }
  1237. // return 0;
  1238. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1239. return 256;
  1240. }
  1241. #endif
  1242. static __inline__ int get_l2_size(void){
  1243. int eax, ebx, ecx, edx, l2;
  1244. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1245. if (l2 != 0)
  1246. return l2;
  1247. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1248. l2 = BITMASK(ecx, 16, 0xffff);
  1249. #ifndef ARCH_X86
  1250. if (l2 <= 0) {
  1251. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1252. return 256;
  1253. }
  1254. return l2;
  1255. #else
  1256. if (l2 > 0) return l2;
  1257. return get_l2_size_old();
  1258. #endif
  1259. }
  1260. static __inline__ int get_l3_size(void){
  1261. int eax, ebx, ecx, edx;
  1262. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1263. return BITMASK(edx, 18, 0x3fff) * 512;
  1264. }
  1265. static void init_parameter(void) {
  1266. int l2 = get_l2_size();
  1267. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1268. /* where the GEMM unrolling parameters do not depend on l2 */
  1269. #ifdef BUILD_BFLOAT16
  1270. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1271. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1272. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1273. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1274. #endif
  1275. #ifdef BUILD_HFLOAT16
  1276. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1277. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1278. #endif
  1279. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1280. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1281. #endif
  1282. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1283. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1284. #endif
  1285. #if BUILD_COMPLEX == 1
  1286. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1287. #endif
  1288. #if BUILD_COMPLEX16==1
  1289. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1290. #endif
  1291. #if BUILD_COMPLEX == 1
  1292. #ifdef CGEMM3M_DEFAULT_Q
  1293. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1294. #else
  1295. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1296. #endif
  1297. #endif
  1298. #if BUILD_COMPLEX16 == 1
  1299. #ifdef ZGEMM3M_DEFAULT_Q
  1300. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1301. #else
  1302. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1303. #endif
  1304. #endif
  1305. #ifdef EXPRECISION
  1306. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1307. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1308. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1309. #endif
  1310. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1311. #ifdef DEBUG
  1312. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1313. #endif
  1314. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1315. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1316. #endif
  1317. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1318. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1319. #endif
  1320. #if BUILD_COMPLEX==1
  1321. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1322. #endif
  1323. #if BUILD_COMPLEX16==1
  1324. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1325. #endif
  1326. #ifdef EXPRECISION
  1327. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1328. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1329. #endif
  1330. #endif
  1331. #ifdef CORE_NORTHWOOD
  1332. #ifdef DEBUG
  1333. fprintf(stderr, "Northwood\n");
  1334. #endif
  1335. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1336. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1337. #endif
  1338. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1339. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1340. #endif
  1341. #if BUILD_COMPLEX==1
  1342. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1343. #endif
  1344. #if BUILD_COMPLEX16==1
  1345. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1346. #endif
  1347. #ifdef EXPRECISION
  1348. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1349. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1350. #endif
  1351. #endif
  1352. #ifdef ATOM
  1353. #ifdef DEBUG
  1354. fprintf(stderr, "Atom\n");
  1355. #endif
  1356. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1357. TABLE_NAME.sgemm_p = 256;
  1358. #endif
  1359. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1360. TABLE_NAME.dgemm_p = 128;
  1361. #endif
  1362. #if BUILD_COMPLEX==1
  1363. TABLE_NAME.cgemm_p = 128;
  1364. #endif
  1365. #if BUILD_COMPLEX16==1
  1366. TABLE_NAME.zgemm_p = 64;
  1367. #endif
  1368. #ifdef EXPRECISION
  1369. TABLE_NAME.qgemm_p = 64;
  1370. TABLE_NAME.xgemm_p = 32;
  1371. #endif
  1372. #endif
  1373. #ifdef CORE_PRESCOTT
  1374. #ifdef DEBUG
  1375. fprintf(stderr, "Prescott\n");
  1376. #endif
  1377. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1378. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1379. #endif
  1380. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1381. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1382. #endif
  1383. #if BUILD_COMPLEX==1
  1384. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1385. #endif
  1386. #if BUILD_COMPLEX16 == 1
  1387. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1388. #endif
  1389. #ifdef EXPRECISION
  1390. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1391. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1392. #endif
  1393. #endif
  1394. #ifdef CORE2
  1395. #ifdef DEBUG
  1396. fprintf(stderr, "Core2\n");
  1397. #endif
  1398. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1399. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1400. #endif
  1401. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1402. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1403. #endif
  1404. #if BUILD_COMPLEX==1
  1405. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1406. #endif
  1407. #if BUILD_COMPLEX16==1
  1408. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1409. #endif
  1410. #ifdef EXPRECISION
  1411. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1412. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1413. #endif
  1414. #endif
  1415. #ifdef PENRYN
  1416. #ifdef DEBUG
  1417. fprintf(stderr, "Penryn\n");
  1418. #endif
  1419. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1420. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1421. #endif
  1422. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1423. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1424. #endif
  1425. #if BUILD_COMPLEX==1
  1426. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1427. #endif
  1428. #if BUILD_COMPLEX16==1
  1429. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1430. #endif
  1431. #ifdef EXPRECISION
  1432. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1433. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1434. #endif
  1435. #endif
  1436. #ifdef DUNNINGTON
  1437. #ifdef DEBUG
  1438. fprintf(stderr, "Dunnington\n");
  1439. #endif
  1440. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1441. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1442. #endif
  1443. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1444. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1445. #endif
  1446. #if BUILD_COMPLEX==1
  1447. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1448. #endif
  1449. #if BUILD_COMPLEX16==1
  1450. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1451. #endif
  1452. #ifdef EXPRECISION
  1453. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1454. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1455. #endif
  1456. #endif
  1457. #ifdef NEHALEM
  1458. #ifdef DEBUG
  1459. fprintf(stderr, "Nehalem\n");
  1460. #endif
  1461. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1462. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1463. #endif
  1464. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1465. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1466. #endif
  1467. #if BUILD_COMPLEX
  1468. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1469. #endif
  1470. #if BUILD_COMPLEX16
  1471. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1472. #endif
  1473. #ifdef EXPRECISION
  1474. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1475. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1476. #endif
  1477. #endif
  1478. #ifdef SANDYBRIDGE
  1479. #ifdef DEBUG
  1480. fprintf(stderr, "Sandybridge\n");
  1481. #endif
  1482. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1483. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1484. #endif
  1485. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1486. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1487. #endif
  1488. #if BUILD_COMPLEX
  1489. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1490. #endif
  1491. #if BUILD_COMPLEX16
  1492. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1493. #endif
  1494. #ifdef EXPRECISION
  1495. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1496. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1497. #endif
  1498. #endif
  1499. #ifdef HASWELL
  1500. #ifdef DEBUG
  1501. fprintf(stderr, "Haswell\n");
  1502. #endif
  1503. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1504. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1505. #endif
  1506. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1507. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1508. #endif
  1509. #if BUILD_COMPLEX
  1510. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1511. #endif
  1512. #if BUILD_COMPLEX16
  1513. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1514. #endif
  1515. #ifdef EXPRECISION
  1516. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1517. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1518. #endif
  1519. #endif
  1520. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1521. #ifdef DEBUG
  1522. fprintf(stderr, "SkylakeX\n");
  1523. #endif
  1524. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1525. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1526. #endif
  1527. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1528. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1529. #endif
  1530. #if BUILD_COMPLEX
  1531. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1532. #endif
  1533. #if BUILD_COMPLEX16
  1534. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1535. #endif
  1536. #ifdef EXPRECISION
  1537. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1538. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1539. #endif
  1540. #endif
  1541. #ifdef OPTERON
  1542. #ifdef DEBUG
  1543. fprintf(stderr, "Opteron\n");
  1544. #endif
  1545. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1546. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1547. #endif
  1548. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1549. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1550. #endif
  1551. #if BUILD_COMPLEX
  1552. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1553. #endif
  1554. #if BUILD_COMPLEX16
  1555. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1556. #endif
  1557. #ifdef EXPRECISION
  1558. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1559. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1560. #endif
  1561. #endif
  1562. #ifdef BARCELONA
  1563. #ifdef DEBUG
  1564. fprintf(stderr, "Barcelona\n");
  1565. #endif
  1566. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1567. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1568. #endif
  1569. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1570. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1571. #endif
  1572. #if BUILD_COMPLEX
  1573. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1574. #endif
  1575. #if BUILD_COMPLEX16
  1576. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1577. #endif
  1578. #ifdef EXPRECISION
  1579. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1580. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1581. #endif
  1582. #endif
  1583. #ifdef BOBCAT
  1584. #ifdef DEBUG
  1585. fprintf(stderr, "Bobcate\n");
  1586. #endif
  1587. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1588. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1589. #endif
  1590. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1591. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1592. #endif
  1593. #if BUILD_COMPLEX
  1594. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1595. #endif
  1596. #if BUILD_COMPLEX16
  1597. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1598. #endif
  1599. #ifdef EXPRECISION
  1600. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1601. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1602. #endif
  1603. #endif
  1604. #ifdef BULLDOZER
  1605. #ifdef DEBUG
  1606. fprintf(stderr, "Bulldozer\n");
  1607. #endif
  1608. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1609. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1610. #endif
  1611. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1612. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1613. #endif
  1614. #if BUILD_COMPLEX
  1615. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1616. #endif
  1617. #if BUILD_COMPLEX16
  1618. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1619. #endif
  1620. #ifdef EXPRECISION
  1621. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1622. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1623. #endif
  1624. #endif
  1625. #ifdef EXCAVATOR
  1626. #ifdef DEBUG
  1627. fprintf(stderr, "Excavator\n");
  1628. #endif
  1629. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1630. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1631. #endif
  1632. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1633. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1634. #endif
  1635. #if BUILD_COMPLEX
  1636. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1637. #endif
  1638. #if BUILD_COMPLEX16
  1639. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1640. #endif
  1641. #ifdef EXPRECISION
  1642. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1643. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1644. #endif
  1645. #endif
  1646. #ifdef PILEDRIVER
  1647. #ifdef DEBUG
  1648. fprintf(stderr, "Piledriver\n");
  1649. #endif
  1650. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1651. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1652. #endif
  1653. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1654. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1655. #endif
  1656. #if BUILD_COMPLEX
  1657. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1658. #endif
  1659. #if BUILD_COMPLEX16
  1660. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1661. #endif
  1662. #ifdef EXPRECISION
  1663. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1664. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1665. #endif
  1666. #endif
  1667. #ifdef STEAMROLLER
  1668. #ifdef DEBUG
  1669. fprintf(stderr, "Steamroller\n");
  1670. #endif
  1671. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1672. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1673. #endif
  1674. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1675. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1676. #endif
  1677. #if BUILD_COMPLEX
  1678. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1679. #endif
  1680. #if BUILD_COMPLEX16
  1681. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1682. #endif
  1683. #ifdef EXPRECISION
  1684. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1685. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1686. #endif
  1687. #endif
  1688. #ifdef ZEN
  1689. #ifdef DEBUG
  1690. fprintf(stderr, "Zen\n");
  1691. #endif
  1692. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1693. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1694. #endif
  1695. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1696. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1697. #endif
  1698. #if BUILD_COMPLEX
  1699. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1700. #endif
  1701. #if BUILD_COMPLEX16
  1702. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1703. #endif
  1704. #ifdef EXPRECISION
  1705. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1706. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1707. #endif
  1708. #endif
  1709. #ifdef NANO
  1710. #ifdef DEBUG
  1711. fprintf(stderr, "NANO\n");
  1712. #endif
  1713. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1714. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1715. #endif
  1716. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1717. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1718. #endif
  1719. #if (BUILD_COMPLEX==1)
  1720. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1721. #endif
  1722. #if (BUILD_COMPLEX16==1)
  1723. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1724. #endif
  1725. #ifdef EXPRECISION
  1726. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1727. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1728. #endif
  1729. #endif
  1730. #ifdef SAPPHIRERAPIDS
  1731. #if (BUILD_BFLOAT16 == 1)
  1732. TABLE_NAME.need_amxtile_permission = 1;
  1733. #endif
  1734. #endif
  1735. #if BUILD_COMPLEX==1
  1736. #ifdef CGEMM3M_DEFAULT_P
  1737. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1738. #else
  1739. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1740. #endif
  1741. #endif
  1742. #if BUILD_COMPLEX16==1
  1743. #ifdef ZGEMM3M_DEFAULT_P
  1744. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1745. #else
  1746. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1747. #endif
  1748. #endif
  1749. #ifdef EXPRECISION
  1750. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1751. #endif
  1752. #if BUILD_SINGLE == 1
  1753. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1754. #endif
  1755. #if BUILD_DOUBLE== 1
  1756. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1757. #endif
  1758. #if BUILD_COMPLEX==1
  1759. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1760. #endif
  1761. #if BUILD_COMPLEX16==1
  1762. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1763. #endif
  1764. #if BUILD_COMPLEX==1
  1765. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1766. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1767. #else
  1768. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1769. #endif
  1770. #endif
  1771. #if BUILD_COMPLEX16==1
  1772. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1773. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1774. #else
  1775. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1776. #endif
  1777. #endif
  1778. #ifdef QUAD_PRECISION
  1779. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1780. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1781. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1782. #endif
  1783. #ifdef DEBUG
  1784. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1785. #endif
  1786. #if BUILD_BFLOAT16==1
  1787. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1788. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1789. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1790. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1791. TABLE_NAME.bgemm_r = (((BUFFER_SIZE -
  1792. ((TABLE_NAME.bgemm_p * TABLE_NAME.bgemm_q * 4 + TABLE_NAME.offsetA
  1793. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1794. ) / (TABLE_NAME.bgemm_q * 4) - 15) & ~15);
  1795. #endif
  1796. #if BUILD_HFLOAT16==1
  1797. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1798. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1799. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1800. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1801. #endif
  1802. #if BUILD_SINGLE==1
  1803. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1804. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1805. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1806. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1807. #endif
  1808. #if BUILD_DOUBLE==1
  1809. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1810. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1811. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1812. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1813. #endif
  1814. #ifdef EXPRECISION
  1815. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1816. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1817. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1818. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1819. #endif
  1820. #if BUILD_COMPLEX ==1
  1821. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1822. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1823. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1824. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1825. #endif
  1826. #if BUILD_COMPLEX16 ==1
  1827. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1828. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1829. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1830. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1831. #endif
  1832. #if BUILD_COMPLEX == 1
  1833. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1834. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1835. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1836. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1837. #endif
  1838. #if BUILD_COMPLEX16 == 1
  1839. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1840. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1841. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1842. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1843. #endif
  1844. #ifdef EXPRECISION
  1845. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1846. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1847. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1848. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1849. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1850. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1851. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1852. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1853. #endif
  1854. }
  1855. #endif //RISCV64
  1856. #endif //POWER
  1857. #endif //ZARCH
  1858. #endif //(ARCH_LOONGARCH64)
  1859. #endif //(ARCH_MIPS64)
  1860. #endif //(ARCH_ARM64)