You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  58. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  59. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  60. dsdot_kTS,
  61. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  62. sgemv_nTS, sgemv_tTS, sger_kTS,
  63. ssymv_LTS, ssymv_UTS,
  64. shgemm_kernelTS, shgemm_betaTS,
  65. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  66. shgemm_incopyTS, shgemm_itcopyTS,
  67. #else
  68. shgemm_oncopyTS, shgemm_otcopyTS,
  69. #endif
  70. shgemm_oncopyTS, shgemm_otcopyTS,
  71. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  72. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  73. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  74. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  75. #else
  76. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  77. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  78. #endif
  79. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  80. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  81. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  82. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  83. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  84. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  85. #else
  86. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  87. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  88. #endif
  89. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  90. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  91. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  92. ssymm_iutcopyTS, ssymm_iltcopyTS,
  93. #else
  94. ssymm_outcopyTS, ssymm_oltcopyTS,
  95. #endif
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #ifndef NO_LAPACK
  98. sneg_tcopyTS, slaswp_ncopyTS,
  99. #else
  100. NULL,NULL,
  101. #endif
  102. #endif
  103. 0, 0, 0,
  104. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  105. #ifdef SGEMM_DEFAULT_UNROLL_MN
  106. SGEMM_DEFAULT_UNROLL_MN,
  107. #else
  108. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  109. #endif
  110. #ifdef HAVE_EXCLUSIVE_CACHE
  111. 1,
  112. #else
  113. 0,
  114. #endif
  115. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  116. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  117. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  118. dsdot_kTS,
  119. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  120. sgemv_nTS, sgemv_tTS, sger_kTS,
  121. ssymv_LTS, ssymv_UTS,
  122. sgemm_kernelTS, sgemm_betaTS,
  123. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  124. sgemm_incopyTS, sgemm_itcopyTS,
  125. #else
  126. sgemm_oncopyTS, sgemm_otcopyTS,
  127. #endif
  128. sgemm_oncopyTS, sgemm_otcopyTS,
  129. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  130. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  131. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  132. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  133. #else
  134. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  135. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  136. #endif
  137. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  138. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  139. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  140. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  141. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  142. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  143. #else
  144. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  145. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  146. #endif
  147. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  148. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  149. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  150. ssymm_iutcopyTS, ssymm_iltcopyTS,
  151. #else
  152. ssymm_outcopyTS, ssymm_oltcopyTS,
  153. #endif
  154. ssymm_outcopyTS, ssymm_oltcopyTS,
  155. #ifndef NO_LAPACK
  156. sneg_tcopyTS, slaswp_ncopyTS,
  157. #else
  158. NULL,NULL,
  159. #endif
  160. 0, 0, 0,
  161. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  162. #ifdef DGEMM_DEFAULT_UNROLL_MN
  163. DGEMM_DEFAULT_UNROLL_MN,
  164. #else
  165. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  166. #endif
  167. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  168. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  169. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  170. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  171. dgemv_nTS, dgemv_tTS, dger_kTS,
  172. dsymv_LTS, dsymv_UTS,
  173. dgemm_kernelTS, dgemm_betaTS,
  174. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  175. dgemm_incopyTS, dgemm_itcopyTS,
  176. #else
  177. dgemm_oncopyTS, dgemm_otcopyTS,
  178. #endif
  179. dgemm_oncopyTS, dgemm_otcopyTS,
  180. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  181. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  182. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  183. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  184. #else
  185. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  186. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  187. #endif
  188. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  189. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  190. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  191. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  192. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  193. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  194. #else
  195. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  196. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  197. #endif
  198. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  199. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  200. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  201. dsymm_iutcopyTS, dsymm_iltcopyTS,
  202. #else
  203. dsymm_outcopyTS, dsymm_oltcopyTS,
  204. #endif
  205. dsymm_outcopyTS, dsymm_oltcopyTS,
  206. #ifndef NO_LAPACK
  207. dneg_tcopyTS, dlaswp_ncopyTS,
  208. #else
  209. NULL, NULL,
  210. #endif
  211. #ifdef EXPRECISION
  212. 0, 0, 0,
  213. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  214. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  215. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  216. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  217. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  218. qgemv_nTS, qgemv_tTS, qger_kTS,
  219. qsymv_LTS, qsymv_UTS,
  220. qgemm_kernelTS, qgemm_betaTS,
  221. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  222. qgemm_incopyTS, qgemm_itcopyTS,
  223. #else
  224. qgemm_oncopyTS, qgemm_otcopyTS,
  225. #endif
  226. qgemm_oncopyTS, qgemm_otcopyTS,
  227. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  228. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  229. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  230. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  231. #else
  232. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  233. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  234. #endif
  235. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  236. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  237. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  238. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  239. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  240. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  241. #else
  242. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  243. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  244. #endif
  245. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  246. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  247. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  248. qsymm_iutcopyTS, qsymm_iltcopyTS,
  249. #else
  250. qsymm_outcopyTS, qsymm_oltcopyTS,
  251. #endif
  252. qsymm_outcopyTS, qsymm_oltcopyTS,
  253. #ifndef NO_LAPACK
  254. qneg_tcopyTS, qlaswp_ncopyTS,
  255. #else
  256. NULL, NULL,
  257. #endif
  258. #endif
  259. 0, 0, 0,
  260. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  261. #ifdef CGEMM_DEFAULT_UNROLL_MN
  262. CGEMM_DEFAULT_UNROLL_MN,
  263. #else
  264. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  265. #endif
  266. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  267. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  268. cdotu_kTS, cdotc_kTS, csrot_kTS,
  269. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  270. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  271. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  272. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  273. csymv_LTS, csymv_UTS,
  274. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  275. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  276. cgemm_betaTS,
  277. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  278. cgemm_incopyTS, cgemm_itcopyTS,
  279. #else
  280. cgemm_oncopyTS, cgemm_otcopyTS,
  281. #endif
  282. cgemm_oncopyTS, cgemm_otcopyTS,
  283. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  284. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  285. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  286. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  287. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  288. #else
  289. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  290. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  291. #endif
  292. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  293. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  294. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  295. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  296. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  297. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  298. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  299. #else
  300. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  301. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  302. #endif
  303. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  304. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  305. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  306. csymm_iutcopyTS, csymm_iltcopyTS,
  307. #else
  308. csymm_outcopyTS, csymm_oltcopyTS,
  309. #endif
  310. csymm_outcopyTS, csymm_oltcopyTS,
  311. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  312. chemm_iutcopyTS, chemm_iltcopyTS,
  313. #else
  314. chemm_outcopyTS, chemm_oltcopyTS,
  315. #endif
  316. chemm_outcopyTS, chemm_oltcopyTS,
  317. 0, 0, 0,
  318. #if defined(USE_GEMM3M)
  319. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  320. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  321. #else
  322. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  323. #endif
  324. cgemm3m_kernelTS,
  325. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  326. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  327. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  328. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  329. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  330. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  331. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  332. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  333. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  334. csymm3m_oucopybTS, csymm3m_olcopybTS,
  335. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  336. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  337. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  338. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  339. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  340. chemm3m_oucopybTS, chemm3m_olcopybTS,
  341. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  342. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  343. #else
  344. 0, 0, 0,
  345. NULL,
  346. NULL, NULL,
  347. NULL, NULL,
  348. NULL, NULL,
  349. NULL, NULL,
  350. NULL, NULL,
  351. NULL, NULL,
  352. NULL, NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. NULL, NULL,
  363. NULL, NULL,
  364. #endif
  365. #ifndef NO_LAPACK
  366. cneg_tcopyTS, claswp_ncopyTS,
  367. #else
  368. NULL, NULL,
  369. #endif
  370. 0, 0, 0,
  371. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  372. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  373. ZGEMM_DEFAULT_UNROLL_MN,
  374. #else
  375. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  376. #endif
  377. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  378. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  379. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  380. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  381. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  382. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  383. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  384. zsymv_LTS, zsymv_UTS,
  385. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  386. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  387. zgemm_betaTS,
  388. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  389. zgemm_incopyTS, zgemm_itcopyTS,
  390. #else
  391. zgemm_oncopyTS, zgemm_otcopyTS,
  392. #endif
  393. zgemm_oncopyTS, zgemm_otcopyTS,
  394. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  395. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  396. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  397. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  398. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  399. #else
  400. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  401. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  402. #endif
  403. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  404. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  405. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  406. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  407. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  408. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  409. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  410. #else
  411. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  412. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  413. #endif
  414. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  415. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  416. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  417. zsymm_iutcopyTS, zsymm_iltcopyTS,
  418. #else
  419. zsymm_outcopyTS, zsymm_oltcopyTS,
  420. #endif
  421. zsymm_outcopyTS, zsymm_oltcopyTS,
  422. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  423. zhemm_iutcopyTS, zhemm_iltcopyTS,
  424. #else
  425. zhemm_outcopyTS, zhemm_oltcopyTS,
  426. #endif
  427. zhemm_outcopyTS, zhemm_oltcopyTS,
  428. 0, 0, 0,
  429. #if defined(USE_GEMM3M)
  430. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  431. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  432. #else
  433. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  434. #endif
  435. zgemm3m_kernelTS,
  436. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  437. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  438. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  439. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  440. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  441. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  442. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  443. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  444. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  445. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  446. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  447. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  448. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  449. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  450. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  451. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  452. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  453. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  454. #else
  455. 0, 0, 0,
  456. NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. #endif
  476. #ifndef NO_LAPACK
  477. zneg_tcopyTS, zlaswp_ncopyTS,
  478. #else
  479. NULL, NULL,
  480. #endif
  481. #ifdef EXPRECISION
  482. 0, 0, 0,
  483. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  484. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  485. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  486. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  487. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  488. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  489. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  490. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  491. xsymv_LTS, xsymv_UTS,
  492. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  493. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  494. xgemm_betaTS,
  495. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  496. xgemm_incopyTS, xgemm_itcopyTS,
  497. #else
  498. xgemm_oncopyTS, xgemm_otcopyTS,
  499. #endif
  500. xgemm_oncopyTS, xgemm_otcopyTS,
  501. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  502. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  503. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  504. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  505. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  506. #else
  507. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  508. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  509. #endif
  510. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  511. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  512. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  513. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  514. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  515. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  516. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  517. #else
  518. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  519. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  520. #endif
  521. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  522. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  523. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  524. xsymm_iutcopyTS, xsymm_iltcopyTS,
  525. #else
  526. xsymm_outcopyTS, xsymm_oltcopyTS,
  527. #endif
  528. xsymm_outcopyTS, xsymm_oltcopyTS,
  529. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  530. xhemm_iutcopyTS, xhemm_iltcopyTS,
  531. #else
  532. xhemm_outcopyTS, xhemm_oltcopyTS,
  533. #endif
  534. xhemm_outcopyTS, xhemm_oltcopyTS,
  535. 0, 0, 0,
  536. #if defined(USE_GEMM3M)
  537. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  538. xgemm3m_kernelTS,
  539. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  540. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  541. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  542. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  543. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  544. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  545. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  546. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  547. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  548. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  549. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  550. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  551. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  552. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  553. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  554. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  555. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  556. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  557. #else
  558. 0, 0, 0,
  559. NULL,
  560. NULL, NULL,
  561. NULL, NULL,
  562. NULL, NULL,
  563. NULL, NULL,
  564. NULL, NULL,
  565. NULL, NULL,
  566. NULL, NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. NULL, NULL,
  577. NULL, NULL,
  578. #endif
  579. #ifndef NO_LAPACK
  580. xneg_tcopyTS, xlaswp_ncopyTS,
  581. #else
  582. NULL, NULL,
  583. #endif
  584. #endif
  585. init_parameter,
  586. SNUMOPT, DNUMOPT, QNUMOPT,
  587. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  588. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  589. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  590. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  591. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  592. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  593. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  594. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  595. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  596. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  597. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  598. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  599. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  600. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  601. };
  602. #if defined(ARCH_ARM64)
  603. static void init_parameter(void) {
  604. #if defined(BUILD_HALF)
  605. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  606. #endif
  607. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  608. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  609. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  610. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  611. #if defined(BUILD_HALF)
  612. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  613. #endif
  614. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  615. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  616. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  617. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  618. #if defined(BUILD_HALF)
  619. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  620. #endif
  621. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  622. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  623. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  624. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  625. #ifdef EXPRECISION
  626. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  627. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  628. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  629. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  630. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  631. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  632. #endif
  633. #if defined(USE_GEMM3M)
  634. #ifdef CGEMM3M_DEFAULT_P
  635. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  636. #else
  637. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  638. #endif
  639. #ifdef ZGEMM3M_DEFAULT_P
  640. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  641. #else
  642. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  643. #endif
  644. #ifdef CGEMM3M_DEFAULT_Q
  645. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  646. #else
  647. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  648. #endif
  649. #ifdef ZGEMM3M_DEFAULT_Q
  650. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  651. #else
  652. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  653. #endif
  654. #ifdef CGEMM3M_DEFAULT_R
  655. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  656. #else
  657. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  658. #endif
  659. #ifdef ZGEMM3M_DEFAULT_R
  660. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  661. #else
  662. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  663. #endif
  664. #ifdef EXPRECISION
  665. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  666. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  667. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  668. #endif
  669. #endif
  670. }
  671. #else // defined(ARCH_ARM64)
  672. #if defined(ARCH_POWER)
  673. static void init_parameter(void) {
  674. #ifdef BUILD_HALF
  675. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  676. #endif
  677. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  678. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  679. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  680. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  681. #ifdef BUILD_HALF
  682. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  683. #endif
  684. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  685. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  686. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  687. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  688. #ifdef BUILD_HALF
  689. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  690. #endif
  691. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  692. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  693. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  694. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  695. }
  696. #else //POWER
  697. #if defined(ARCH_ZARCH)
  698. static void init_parameter(void) {
  699. #ifdef BUILD_HALF
  700. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  701. #endif
  702. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  703. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  704. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  705. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  706. #ifdef BUILD_HALF
  707. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  708. #endif
  709. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  710. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  711. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  712. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  713. #ifdef BUILD_HALF
  714. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  715. #endif
  716. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  717. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  718. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  719. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  720. }
  721. #else //ZARCH
  722. #ifdef ARCH_X86
  723. static int get_l2_size_old(void){
  724. int i, eax, ebx, ecx, edx, cpuid_level;
  725. int info[15];
  726. cpuid(2, &eax, &ebx, &ecx, &edx);
  727. info[ 0] = BITMASK(eax, 8, 0xff);
  728. info[ 1] = BITMASK(eax, 16, 0xff);
  729. info[ 2] = BITMASK(eax, 24, 0xff);
  730. info[ 3] = BITMASK(ebx, 0, 0xff);
  731. info[ 4] = BITMASK(ebx, 8, 0xff);
  732. info[ 5] = BITMASK(ebx, 16, 0xff);
  733. info[ 6] = BITMASK(ebx, 24, 0xff);
  734. info[ 7] = BITMASK(ecx, 0, 0xff);
  735. info[ 8] = BITMASK(ecx, 8, 0xff);
  736. info[ 9] = BITMASK(ecx, 16, 0xff);
  737. info[10] = BITMASK(ecx, 24, 0xff);
  738. info[11] = BITMASK(edx, 0, 0xff);
  739. info[12] = BITMASK(edx, 8, 0xff);
  740. info[13] = BITMASK(edx, 16, 0xff);
  741. info[14] = BITMASK(edx, 24, 0xff);
  742. for (i = 0; i < 15; i++){
  743. switch (info[i]){
  744. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  745. case 0x1a :
  746. return 96;
  747. case 0x39 :
  748. case 0x3b :
  749. case 0x41 :
  750. case 0x79 :
  751. case 0x81 :
  752. return 128;
  753. case 0x3a :
  754. return 192;
  755. case 0x21 :
  756. case 0x3c :
  757. case 0x42 :
  758. case 0x7a :
  759. case 0x7e :
  760. case 0x82 :
  761. return 256;
  762. case 0x3d :
  763. return 384;
  764. case 0x3e :
  765. case 0x43 :
  766. case 0x7b :
  767. case 0x7f :
  768. case 0x83 :
  769. case 0x86 :
  770. return 512;
  771. case 0x44 :
  772. case 0x78 :
  773. case 0x7c :
  774. case 0x84 :
  775. case 0x87 :
  776. return 1024;
  777. case 0x45 :
  778. case 0x7d :
  779. case 0x85 :
  780. return 2048;
  781. case 0x48 :
  782. return 3184;
  783. case 0x49 :
  784. return 4096;
  785. case 0x4e :
  786. return 6144;
  787. }
  788. }
  789. // return 0;
  790. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  791. return 256;
  792. }
  793. #endif
  794. static __inline__ int get_l2_size(void){
  795. int eax, ebx, ecx, edx, l2;
  796. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  797. l2 = BITMASK(ecx, 16, 0xffff);
  798. #ifndef ARCH_X86
  799. if (l2 <= 0) {
  800. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  801. return 256;
  802. }
  803. return l2;
  804. #else
  805. if (l2 > 0) return l2;
  806. return get_l2_size_old();
  807. #endif
  808. }
  809. static __inline__ int get_l3_size(void){
  810. int eax, ebx, ecx, edx;
  811. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  812. return BITMASK(edx, 18, 0x3fff) * 512;
  813. }
  814. static void init_parameter(void) {
  815. int l2 = get_l2_size();
  816. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  817. /* where the GEMM unrolling parameters do not depend on l2 */
  818. #ifdef BUILD_HALF
  819. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  820. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  821. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  822. #endif
  823. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  824. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  825. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  826. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  827. #ifdef CGEMM3M_DEFAULT_Q
  828. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  829. #else
  830. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  831. #endif
  832. #ifdef ZGEMM3M_DEFAULT_Q
  833. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  834. #else
  835. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  836. #endif
  837. #ifdef EXPRECISION
  838. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  839. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  840. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  841. #endif
  842. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  843. #ifdef DEBUG
  844. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  845. #endif
  846. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  847. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  848. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  849. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  850. #ifdef EXPRECISION
  851. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  852. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  853. #endif
  854. #endif
  855. #ifdef CORE_NORTHWOOD
  856. #ifdef DEBUG
  857. fprintf(stderr, "Northwood\n");
  858. #endif
  859. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  860. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  861. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  862. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  863. #ifdef EXPRECISION
  864. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  865. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  866. #endif
  867. #endif
  868. #ifdef ATOM
  869. #ifdef DEBUG
  870. fprintf(stderr, "Atom\n");
  871. #endif
  872. TABLE_NAME.sgemm_p = 256;
  873. TABLE_NAME.dgemm_p = 128;
  874. TABLE_NAME.cgemm_p = 128;
  875. TABLE_NAME.zgemm_p = 64;
  876. #ifdef EXPRECISION
  877. TABLE_NAME.qgemm_p = 64;
  878. TABLE_NAME.xgemm_p = 32;
  879. #endif
  880. #endif
  881. #ifdef CORE_PRESCOTT
  882. #ifdef DEBUG
  883. fprintf(stderr, "Prescott\n");
  884. #endif
  885. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  886. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  887. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  888. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  889. #ifdef EXPRECISION
  890. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  891. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  892. #endif
  893. #endif
  894. #ifdef CORE2
  895. #ifdef DEBUG
  896. fprintf(stderr, "Core2\n");
  897. #endif
  898. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  899. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  900. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  901. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  902. #ifdef EXPRECISION
  903. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  904. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  905. #endif
  906. #endif
  907. #ifdef PENRYN
  908. #ifdef DEBUG
  909. fprintf(stderr, "Penryn\n");
  910. #endif
  911. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  912. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  913. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  914. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  915. #ifdef EXPRECISION
  916. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  917. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  918. #endif
  919. #endif
  920. #ifdef DUNNINGTON
  921. #ifdef DEBUG
  922. fprintf(stderr, "Dunnington\n");
  923. #endif
  924. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  925. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  926. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  927. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  928. #ifdef EXPRECISION
  929. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  930. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  931. #endif
  932. #endif
  933. #ifdef NEHALEM
  934. #ifdef DEBUG
  935. fprintf(stderr, "Nehalem\n");
  936. #endif
  937. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  938. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  939. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  940. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  941. #ifdef EXPRECISION
  942. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  943. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  944. #endif
  945. #endif
  946. #ifdef SANDYBRIDGE
  947. #ifdef DEBUG
  948. fprintf(stderr, "Sandybridge\n");
  949. #endif
  950. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  951. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  952. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  953. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  954. #ifdef EXPRECISION
  955. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  956. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  957. #endif
  958. #endif
  959. #ifdef HASWELL
  960. #ifdef DEBUG
  961. fprintf(stderr, "Haswell\n");
  962. #endif
  963. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  964. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  965. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  966. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  967. #ifdef EXPRECISION
  968. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  969. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  970. #endif
  971. #endif
  972. #if defined (SKYLAKEX) || defined (COOPERLAKE)
  973. #ifdef DEBUG
  974. fprintf(stderr, "SkylakeX\n");
  975. #endif
  976. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  977. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  978. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  979. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  980. #ifdef EXPRECISION
  981. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  982. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  983. #endif
  984. #endif
  985. #ifdef OPTERON
  986. #ifdef DEBUG
  987. fprintf(stderr, "Opteron\n");
  988. #endif
  989. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  990. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  991. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  992. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  993. #ifdef EXPRECISION
  994. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  995. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  996. #endif
  997. #endif
  998. #ifdef BARCELONA
  999. #ifdef DEBUG
  1000. fprintf(stderr, "Barcelona\n");
  1001. #endif
  1002. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1003. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1004. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1005. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1006. #ifdef EXPRECISION
  1007. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1008. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1009. #endif
  1010. #endif
  1011. #ifdef BOBCAT
  1012. #ifdef DEBUG
  1013. fprintf(stderr, "Bobcate\n");
  1014. #endif
  1015. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1016. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1017. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1018. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1019. #ifdef EXPRECISION
  1020. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1021. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1022. #endif
  1023. #endif
  1024. #ifdef BULLDOZER
  1025. #ifdef DEBUG
  1026. fprintf(stderr, "Bulldozer\n");
  1027. #endif
  1028. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1029. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1030. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1031. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1032. #ifdef EXPRECISION
  1033. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1034. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1035. #endif
  1036. #endif
  1037. #ifdef EXCAVATOR
  1038. #ifdef DEBUG
  1039. fprintf(stderr, "Excavator\n");
  1040. #endif
  1041. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1042. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1043. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1044. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1045. #ifdef EXPRECISION
  1046. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1047. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1048. #endif
  1049. #endif
  1050. #ifdef PILEDRIVER
  1051. #ifdef DEBUG
  1052. fprintf(stderr, "Piledriver\n");
  1053. #endif
  1054. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1055. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1056. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1057. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1058. #ifdef EXPRECISION
  1059. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1060. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1061. #endif
  1062. #endif
  1063. #ifdef STEAMROLLER
  1064. #ifdef DEBUG
  1065. fprintf(stderr, "Steamroller\n");
  1066. #endif
  1067. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1068. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1069. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1070. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1071. #ifdef EXPRECISION
  1072. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1073. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1074. #endif
  1075. #endif
  1076. #ifdef ZEN
  1077. #ifdef DEBUG
  1078. fprintf(stderr, "Zen\n");
  1079. #endif
  1080. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1081. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1082. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1083. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1084. #ifdef EXPRECISION
  1085. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1086. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1087. #endif
  1088. #endif
  1089. #ifdef NANO
  1090. #ifdef DEBUG
  1091. fprintf(stderr, "NANO\n");
  1092. #endif
  1093. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1094. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1095. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1096. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1097. #ifdef EXPRECISION
  1098. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1099. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1100. #endif
  1101. #endif
  1102. #ifdef CGEMM3M_DEFAULT_P
  1103. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1104. #else
  1105. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1106. #endif
  1107. #ifdef ZGEMM3M_DEFAULT_P
  1108. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1109. #else
  1110. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1111. #endif
  1112. #ifdef EXPRECISION
  1113. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1114. #endif
  1115. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1116. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1117. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1118. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1119. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1120. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1121. #else
  1122. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1123. #endif
  1124. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1125. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1126. #else
  1127. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1128. #endif
  1129. #ifdef QUAD_PRECISION
  1130. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1131. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1132. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1133. #endif
  1134. #ifdef DEBUG
  1135. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1136. #endif
  1137. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1138. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1139. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1140. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1141. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1142. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1143. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1144. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1145. #ifdef EXPRECISION
  1146. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1147. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1148. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1149. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1150. #endif
  1151. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1152. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1153. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1154. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1155. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1156. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1157. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1158. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1159. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1160. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1161. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1162. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1163. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1164. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1165. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1166. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1167. #ifdef EXPRECISION
  1168. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1169. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1170. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1171. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1172. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1173. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1174. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1175. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1176. #endif
  1177. }
  1178. #endif //POWER
  1179. #endif //ZARCH
  1180. #endif //defined(ARCH_ARM64)