You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. 0, 0, 0,
  50. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  51. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  52. SHGEMM_DEFAULT_UNROLL_MN,
  53. #else
  54. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  55. #endif
  56. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  57. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  58. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  59. dsdot_kTS,
  60. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  61. sgemv_nTS, sgemv_tTS, sger_kTS,
  62. ssymv_LTS, ssymv_UTS,
  63. shgemm_kernelTS, shgemm_betaTS,
  64. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  65. shgemm_incopyTS, shgemm_itcopyTS,
  66. #else
  67. shgemm_oncopyTS, shgemm_otcopyTS,
  68. #endif
  69. shgemm_oncopyTS, shgemm_otcopyTS,
  70. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  71. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  72. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  73. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  74. #else
  75. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  76. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  77. #endif
  78. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  79. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  80. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  81. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  82. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  83. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  84. #else
  85. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  86. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  87. #endif
  88. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  89. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  90. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  91. ssymm_iutcopyTS, ssymm_iltcopyTS,
  92. #else
  93. ssymm_outcopyTS, ssymm_oltcopyTS,
  94. #endif
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #ifndef NO_LAPACK
  97. sneg_tcopyTS, slaswp_ncopyTS,
  98. #else
  99. NULL,NULL,
  100. #endif
  101. 0, 0, 0,
  102. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  103. #ifdef SGEMM_DEFAULT_UNROLL_MN
  104. SGEMM_DEFAULT_UNROLL_MN,
  105. #else
  106. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  107. #endif
  108. #ifdef HAVE_EXCLUSIVE_CACHE
  109. 1,
  110. #else
  111. 0,
  112. #endif
  113. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  114. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  115. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  116. dsdot_kTS,
  117. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  118. sgemv_nTS, sgemv_tTS, sger_kTS,
  119. ssymv_LTS, ssymv_UTS,
  120. sgemm_kernelTS, sgemm_betaTS,
  121. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  122. sgemm_incopyTS, sgemm_itcopyTS,
  123. #else
  124. sgemm_oncopyTS, sgemm_otcopyTS,
  125. #endif
  126. sgemm_oncopyTS, sgemm_otcopyTS,
  127. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  128. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  129. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  130. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  131. #else
  132. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  133. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  134. #endif
  135. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  136. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  137. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  138. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  139. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  140. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  141. #else
  142. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  143. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  144. #endif
  145. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  146. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  147. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  148. ssymm_iutcopyTS, ssymm_iltcopyTS,
  149. #else
  150. ssymm_outcopyTS, ssymm_oltcopyTS,
  151. #endif
  152. ssymm_outcopyTS, ssymm_oltcopyTS,
  153. #ifndef NO_LAPACK
  154. sneg_tcopyTS, slaswp_ncopyTS,
  155. #else
  156. NULL,NULL,
  157. #endif
  158. 0, 0, 0,
  159. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  160. #ifdef DGEMM_DEFAULT_UNROLL_MN
  161. DGEMM_DEFAULT_UNROLL_MN,
  162. #else
  163. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  164. #endif
  165. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  166. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  167. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  168. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  169. dgemv_nTS, dgemv_tTS, dger_kTS,
  170. dsymv_LTS, dsymv_UTS,
  171. dgemm_kernelTS, dgemm_betaTS,
  172. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  173. dgemm_incopyTS, dgemm_itcopyTS,
  174. #else
  175. dgemm_oncopyTS, dgemm_otcopyTS,
  176. #endif
  177. dgemm_oncopyTS, dgemm_otcopyTS,
  178. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  179. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  180. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  181. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  182. #else
  183. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  184. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  185. #endif
  186. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  187. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  188. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  189. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  190. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  191. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  192. #else
  193. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  194. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  195. #endif
  196. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  197. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  198. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  199. dsymm_iutcopyTS, dsymm_iltcopyTS,
  200. #else
  201. dsymm_outcopyTS, dsymm_oltcopyTS,
  202. #endif
  203. dsymm_outcopyTS, dsymm_oltcopyTS,
  204. #ifndef NO_LAPACK
  205. dneg_tcopyTS, dlaswp_ncopyTS,
  206. #else
  207. NULL, NULL,
  208. #endif
  209. #ifdef EXPRECISION
  210. 0, 0, 0,
  211. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  212. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  213. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  214. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  215. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  216. qgemv_nTS, qgemv_tTS, qger_kTS,
  217. qsymv_LTS, qsymv_UTS,
  218. qgemm_kernelTS, qgemm_betaTS,
  219. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  220. qgemm_incopyTS, qgemm_itcopyTS,
  221. #else
  222. qgemm_oncopyTS, qgemm_otcopyTS,
  223. #endif
  224. qgemm_oncopyTS, qgemm_otcopyTS,
  225. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  226. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  227. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  228. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  229. #else
  230. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  231. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  232. #endif
  233. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  234. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  235. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  236. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  237. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  238. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  239. #else
  240. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  241. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  242. #endif
  243. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  244. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  245. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  246. qsymm_iutcopyTS, qsymm_iltcopyTS,
  247. #else
  248. qsymm_outcopyTS, qsymm_oltcopyTS,
  249. #endif
  250. qsymm_outcopyTS, qsymm_oltcopyTS,
  251. #ifndef NO_LAPACK
  252. qneg_tcopyTS, qlaswp_ncopyTS,
  253. #else
  254. NULL, NULL,
  255. #endif
  256. #endif
  257. 0, 0, 0,
  258. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  259. #ifdef CGEMM_DEFAULT_UNROLL_MN
  260. CGEMM_DEFAULT_UNROLL_MN,
  261. #else
  262. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  263. #endif
  264. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  265. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  266. cdotu_kTS, cdotc_kTS, csrot_kTS,
  267. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  268. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  269. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  270. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  271. csymv_LTS, csymv_UTS,
  272. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  273. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  274. cgemm_betaTS,
  275. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  276. cgemm_incopyTS, cgemm_itcopyTS,
  277. #else
  278. cgemm_oncopyTS, cgemm_otcopyTS,
  279. #endif
  280. cgemm_oncopyTS, cgemm_otcopyTS,
  281. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  282. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  283. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  284. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  285. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  286. #else
  287. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  288. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  289. #endif
  290. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  291. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  292. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  293. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  294. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  295. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  296. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  297. #else
  298. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  299. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  300. #endif
  301. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  302. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  303. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  304. csymm_iutcopyTS, csymm_iltcopyTS,
  305. #else
  306. csymm_outcopyTS, csymm_oltcopyTS,
  307. #endif
  308. csymm_outcopyTS, csymm_oltcopyTS,
  309. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  310. chemm_iutcopyTS, chemm_iltcopyTS,
  311. #else
  312. chemm_outcopyTS, chemm_oltcopyTS,
  313. #endif
  314. chemm_outcopyTS, chemm_oltcopyTS,
  315. 0, 0, 0,
  316. #if defined(USE_GEMM3M)
  317. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  318. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  319. #else
  320. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  321. #endif
  322. cgemm3m_kernelTS,
  323. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  324. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  325. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  326. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  327. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  328. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  329. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  330. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  331. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  332. csymm3m_oucopybTS, csymm3m_olcopybTS,
  333. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  334. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  335. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  336. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  337. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  338. chemm3m_oucopybTS, chemm3m_olcopybTS,
  339. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  340. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  341. #else
  342. 0, 0, 0,
  343. NULL,
  344. NULL, NULL,
  345. NULL, NULL,
  346. NULL, NULL,
  347. NULL, NULL,
  348. NULL, NULL,
  349. NULL, NULL,
  350. NULL, NULL,
  351. NULL, NULL,
  352. NULL, NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. #endif
  363. #ifndef NO_LAPACK
  364. cneg_tcopyTS, claswp_ncopyTS,
  365. #else
  366. NULL, NULL,
  367. #endif
  368. 0, 0, 0,
  369. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  370. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  371. ZGEMM_DEFAULT_UNROLL_MN,
  372. #else
  373. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  374. #endif
  375. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  376. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  377. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  378. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  379. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  380. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  381. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  382. zsymv_LTS, zsymv_UTS,
  383. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  384. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  385. zgemm_betaTS,
  386. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  387. zgemm_incopyTS, zgemm_itcopyTS,
  388. #else
  389. zgemm_oncopyTS, zgemm_otcopyTS,
  390. #endif
  391. zgemm_oncopyTS, zgemm_otcopyTS,
  392. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  393. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  394. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  395. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  396. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  397. #else
  398. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  399. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  400. #endif
  401. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  402. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  403. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  404. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  405. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  406. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  407. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  408. #else
  409. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  410. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  411. #endif
  412. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  413. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  414. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  415. zsymm_iutcopyTS, zsymm_iltcopyTS,
  416. #else
  417. zsymm_outcopyTS, zsymm_oltcopyTS,
  418. #endif
  419. zsymm_outcopyTS, zsymm_oltcopyTS,
  420. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  421. zhemm_iutcopyTS, zhemm_iltcopyTS,
  422. #else
  423. zhemm_outcopyTS, zhemm_oltcopyTS,
  424. #endif
  425. zhemm_outcopyTS, zhemm_oltcopyTS,
  426. 0, 0, 0,
  427. #if defined(USE_GEMM3M)
  428. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  429. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  430. #else
  431. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  432. #endif
  433. zgemm3m_kernelTS,
  434. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  435. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  436. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  437. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  438. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  439. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  440. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  441. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  442. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  443. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  444. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  445. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  446. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  447. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  448. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  449. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  450. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  451. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  452. #else
  453. 0, 0, 0,
  454. NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. #endif
  474. #ifndef NO_LAPACK
  475. zneg_tcopyTS, zlaswp_ncopyTS,
  476. #else
  477. NULL, NULL,
  478. #endif
  479. #ifdef EXPRECISION
  480. 0, 0, 0,
  481. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  482. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  483. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  484. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  485. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  486. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  487. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  488. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  489. xsymv_LTS, xsymv_UTS,
  490. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  491. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  492. xgemm_betaTS,
  493. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  494. xgemm_incopyTS, xgemm_itcopyTS,
  495. #else
  496. xgemm_oncopyTS, xgemm_otcopyTS,
  497. #endif
  498. xgemm_oncopyTS, xgemm_otcopyTS,
  499. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  500. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  501. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  502. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  503. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  504. #else
  505. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  506. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  507. #endif
  508. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  509. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  510. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  511. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  512. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  513. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  514. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  515. #else
  516. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  517. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  518. #endif
  519. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  520. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  521. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  522. xsymm_iutcopyTS, xsymm_iltcopyTS,
  523. #else
  524. xsymm_outcopyTS, xsymm_oltcopyTS,
  525. #endif
  526. xsymm_outcopyTS, xsymm_oltcopyTS,
  527. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  528. xhemm_iutcopyTS, xhemm_iltcopyTS,
  529. #else
  530. xhemm_outcopyTS, xhemm_oltcopyTS,
  531. #endif
  532. xhemm_outcopyTS, xhemm_oltcopyTS,
  533. 0, 0, 0,
  534. #if defined(USE_GEMM3M)
  535. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  536. xgemm3m_kernelTS,
  537. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  538. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  539. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  540. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  541. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  542. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  543. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  544. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  545. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  546. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  547. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  548. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  549. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  550. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  551. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  552. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  553. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  554. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  555. #else
  556. 0, 0, 0,
  557. NULL,
  558. NULL, NULL,
  559. NULL, NULL,
  560. NULL, NULL,
  561. NULL, NULL,
  562. NULL, NULL,
  563. NULL, NULL,
  564. NULL, NULL,
  565. NULL, NULL,
  566. NULL, NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. #endif
  577. #ifndef NO_LAPACK
  578. xneg_tcopyTS, xlaswp_ncopyTS,
  579. #else
  580. NULL, NULL,
  581. #endif
  582. #endif
  583. init_parameter,
  584. SNUMOPT, DNUMOPT, QNUMOPT,
  585. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  586. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  587. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  588. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  589. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  590. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  591. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  592. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  593. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  594. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  595. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  596. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  597. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  598. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  599. };
  600. #if defined(ARCH_ARM64)
  601. static void init_parameter(void) {
  602. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  603. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  604. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  605. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  606. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  607. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  608. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  609. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  610. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  611. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  612. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  613. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  614. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  615. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  616. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  617. #ifdef EXPRECISION
  618. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  619. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  620. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  621. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  622. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  623. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  624. #endif
  625. #if defined(USE_GEMM3M)
  626. #ifdef CGEMM3M_DEFAULT_P
  627. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  628. #else
  629. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  630. #endif
  631. #ifdef ZGEMM3M_DEFAULT_P
  632. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  633. #else
  634. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  635. #endif
  636. #ifdef CGEMM3M_DEFAULT_Q
  637. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  638. #else
  639. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  640. #endif
  641. #ifdef ZGEMM3M_DEFAULT_Q
  642. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  643. #else
  644. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  645. #endif
  646. #ifdef CGEMM3M_DEFAULT_R
  647. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  648. #else
  649. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  650. #endif
  651. #ifdef ZGEMM3M_DEFAULT_R
  652. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  653. #else
  654. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  655. #endif
  656. #ifdef EXPRECISION
  657. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  658. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  659. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  660. #endif
  661. #endif
  662. }
  663. #else // defined(ARCH_ARM64)
  664. #if defined(ARCH_POWER)
  665. static void init_parameter(void) {
  666. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  667. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  668. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  669. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  670. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  671. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  672. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  673. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  674. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  675. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  676. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  677. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  678. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  679. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  680. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  681. }
  682. #else //POWER
  683. #if defined(ARCH_ZARCH)
  684. static void init_parameter(void) {
  685. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  686. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  687. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  688. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  689. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  690. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  691. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  692. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  693. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  694. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  695. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  696. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  697. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  698. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  699. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  700. }
  701. #else //ZARCH
  702. #ifdef ARCH_X86
  703. static int get_l2_size_old(void){
  704. int i, eax, ebx, ecx, edx, cpuid_level;
  705. int info[15];
  706. cpuid(2, &eax, &ebx, &ecx, &edx);
  707. info[ 0] = BITMASK(eax, 8, 0xff);
  708. info[ 1] = BITMASK(eax, 16, 0xff);
  709. info[ 2] = BITMASK(eax, 24, 0xff);
  710. info[ 3] = BITMASK(ebx, 0, 0xff);
  711. info[ 4] = BITMASK(ebx, 8, 0xff);
  712. info[ 5] = BITMASK(ebx, 16, 0xff);
  713. info[ 6] = BITMASK(ebx, 24, 0xff);
  714. info[ 7] = BITMASK(ecx, 0, 0xff);
  715. info[ 8] = BITMASK(ecx, 8, 0xff);
  716. info[ 9] = BITMASK(ecx, 16, 0xff);
  717. info[10] = BITMASK(ecx, 24, 0xff);
  718. info[11] = BITMASK(edx, 0, 0xff);
  719. info[12] = BITMASK(edx, 8, 0xff);
  720. info[13] = BITMASK(edx, 16, 0xff);
  721. info[14] = BITMASK(edx, 24, 0xff);
  722. for (i = 0; i < 15; i++){
  723. switch (info[i]){
  724. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  725. case 0x1a :
  726. return 96;
  727. case 0x39 :
  728. case 0x3b :
  729. case 0x41 :
  730. case 0x79 :
  731. case 0x81 :
  732. return 128;
  733. case 0x3a :
  734. return 192;
  735. case 0x21 :
  736. case 0x3c :
  737. case 0x42 :
  738. case 0x7a :
  739. case 0x7e :
  740. case 0x82 :
  741. return 256;
  742. case 0x3d :
  743. return 384;
  744. case 0x3e :
  745. case 0x43 :
  746. case 0x7b :
  747. case 0x7f :
  748. case 0x83 :
  749. case 0x86 :
  750. return 512;
  751. case 0x44 :
  752. case 0x78 :
  753. case 0x7c :
  754. case 0x84 :
  755. case 0x87 :
  756. return 1024;
  757. case 0x45 :
  758. case 0x7d :
  759. case 0x85 :
  760. return 2048;
  761. case 0x48 :
  762. return 3184;
  763. case 0x49 :
  764. return 4096;
  765. case 0x4e :
  766. return 6144;
  767. }
  768. }
  769. // return 0;
  770. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  771. return 256;
  772. }
  773. #endif
  774. static __inline__ int get_l2_size(void){
  775. int eax, ebx, ecx, edx, l2;
  776. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  777. l2 = BITMASK(ecx, 16, 0xffff);
  778. #ifndef ARCH_X86
  779. if (l2 <= 0) {
  780. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  781. return 256;
  782. }
  783. return l2;
  784. #else
  785. if (l2 > 0) return l2;
  786. return get_l2_size_old();
  787. #endif
  788. }
  789. static __inline__ int get_l3_size(void){
  790. int eax, ebx, ecx, edx;
  791. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  792. return BITMASK(edx, 18, 0x3fff) * 512;
  793. }
  794. static void init_parameter(void) {
  795. int l2 = get_l2_size();
  796. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  797. /* where the GEMM unrolling parameters do not depend on l2 */
  798. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  799. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  800. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  801. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  802. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  803. #ifdef CGEMM3M_DEFAULT_Q
  804. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  805. #else
  806. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  807. #endif
  808. #ifdef ZGEMM3M_DEFAULT_Q
  809. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  810. #else
  811. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  812. #endif
  813. #ifdef EXPRECISION
  814. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  815. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  816. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  817. #endif
  818. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  819. #ifdef DEBUG
  820. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  821. #endif
  822. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  823. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  824. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  825. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  826. #ifdef EXPRECISION
  827. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  828. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  829. #endif
  830. #endif
  831. #ifdef CORE_NORTHWOOD
  832. #ifdef DEBUG
  833. fprintf(stderr, "Northwood\n");
  834. #endif
  835. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  836. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  837. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  838. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  839. #ifdef EXPRECISION
  840. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  841. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  842. #endif
  843. #endif
  844. #ifdef ATOM
  845. #ifdef DEBUG
  846. fprintf(stderr, "Atom\n");
  847. #endif
  848. TABLE_NAME.sgemm_p = 256;
  849. TABLE_NAME.dgemm_p = 128;
  850. TABLE_NAME.cgemm_p = 128;
  851. TABLE_NAME.zgemm_p = 64;
  852. #ifdef EXPRECISION
  853. TABLE_NAME.qgemm_p = 64;
  854. TABLE_NAME.xgemm_p = 32;
  855. #endif
  856. #endif
  857. #ifdef CORE_PRESCOTT
  858. #ifdef DEBUG
  859. fprintf(stderr, "Prescott\n");
  860. #endif
  861. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  862. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  863. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  864. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  865. #ifdef EXPRECISION
  866. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  867. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  868. #endif
  869. #endif
  870. #ifdef CORE2
  871. #ifdef DEBUG
  872. fprintf(stderr, "Core2\n");
  873. #endif
  874. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  875. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  876. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  877. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  878. #ifdef EXPRECISION
  879. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  880. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  881. #endif
  882. #endif
  883. #ifdef PENRYN
  884. #ifdef DEBUG
  885. fprintf(stderr, "Penryn\n");
  886. #endif
  887. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  888. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  889. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  890. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  891. #ifdef EXPRECISION
  892. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  893. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  894. #endif
  895. #endif
  896. #ifdef DUNNINGTON
  897. #ifdef DEBUG
  898. fprintf(stderr, "Dunnington\n");
  899. #endif
  900. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  901. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  902. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  903. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  904. #ifdef EXPRECISION
  905. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  906. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  907. #endif
  908. #endif
  909. #ifdef NEHALEM
  910. #ifdef DEBUG
  911. fprintf(stderr, "Nehalem\n");
  912. #endif
  913. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  914. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  915. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  916. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  917. #ifdef EXPRECISION
  918. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  919. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  920. #endif
  921. #endif
  922. #ifdef SANDYBRIDGE
  923. #ifdef DEBUG
  924. fprintf(stderr, "Sandybridge\n");
  925. #endif
  926. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  927. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  928. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  929. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  930. #ifdef EXPRECISION
  931. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  932. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  933. #endif
  934. #endif
  935. #ifdef HASWELL
  936. #ifdef DEBUG
  937. fprintf(stderr, "Haswell\n");
  938. #endif
  939. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  940. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  941. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  942. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  943. #ifdef EXPRECISION
  944. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  945. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  946. #endif
  947. #endif
  948. #ifdef SKYLAKEX
  949. #ifdef DEBUG
  950. fprintf(stderr, "SkylakeX\n");
  951. #endif
  952. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  953. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  954. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  955. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  956. #ifdef EXPRECISION
  957. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  958. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  959. #endif
  960. #endif
  961. #ifdef OPTERON
  962. #ifdef DEBUG
  963. fprintf(stderr, "Opteron\n");
  964. #endif
  965. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  966. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  967. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  968. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  969. #ifdef EXPRECISION
  970. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  971. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  972. #endif
  973. #endif
  974. #ifdef BARCELONA
  975. #ifdef DEBUG
  976. fprintf(stderr, "Barcelona\n");
  977. #endif
  978. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  979. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  980. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  981. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  982. #ifdef EXPRECISION
  983. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  984. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  985. #endif
  986. #endif
  987. #ifdef BOBCAT
  988. #ifdef DEBUG
  989. fprintf(stderr, "Bobcate\n");
  990. #endif
  991. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  992. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  993. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  994. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  995. #ifdef EXPRECISION
  996. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  997. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  998. #endif
  999. #endif
  1000. #ifdef BULLDOZER
  1001. #ifdef DEBUG
  1002. fprintf(stderr, "Bulldozer\n");
  1003. #endif
  1004. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1005. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1006. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1007. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1008. #ifdef EXPRECISION
  1009. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1010. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1011. #endif
  1012. #endif
  1013. #ifdef EXCAVATOR
  1014. #ifdef DEBUG
  1015. fprintf(stderr, "Excavator\n");
  1016. #endif
  1017. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1018. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1019. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1020. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1021. #ifdef EXPRECISION
  1022. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1023. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1024. #endif
  1025. #endif
  1026. #ifdef PILEDRIVER
  1027. #ifdef DEBUG
  1028. fprintf(stderr, "Piledriver\n");
  1029. #endif
  1030. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1031. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1032. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1033. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1034. #ifdef EXPRECISION
  1035. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1036. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1037. #endif
  1038. #endif
  1039. #ifdef STEAMROLLER
  1040. #ifdef DEBUG
  1041. fprintf(stderr, "Steamroller\n");
  1042. #endif
  1043. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1044. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1045. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1046. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1047. #ifdef EXPRECISION
  1048. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1049. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1050. #endif
  1051. #endif
  1052. #ifdef ZEN
  1053. #ifdef DEBUG
  1054. fprintf(stderr, "Zen\n");
  1055. #endif
  1056. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1057. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1058. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1059. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1060. #ifdef EXPRECISION
  1061. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1062. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1063. #endif
  1064. #endif
  1065. #ifdef NANO
  1066. #ifdef DEBUG
  1067. fprintf(stderr, "NANO\n");
  1068. #endif
  1069. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1070. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1071. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1072. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1073. #ifdef EXPRECISION
  1074. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1075. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1076. #endif
  1077. #endif
  1078. #ifdef CGEMM3M_DEFAULT_P
  1079. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1080. #else
  1081. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1082. #endif
  1083. #ifdef ZGEMM3M_DEFAULT_P
  1084. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1085. #else
  1086. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1087. #endif
  1088. #ifdef EXPRECISION
  1089. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1090. #endif
  1091. TABLE_NAME.shgemm_p = ((TABLE_NAME.shgemm_p + SHGEMM_DEFAULT_UNROLL_M - 1)/SHGEMM_DEFAULT_UNROLL_M) * SHGEMM_DEFAULT_UNROLL_M;
  1092. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1093. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1094. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1095. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1096. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1097. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1098. #else
  1099. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1100. #endif
  1101. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1102. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1103. #else
  1104. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1105. #endif
  1106. #ifdef QUAD_PRECISION
  1107. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1108. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1109. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1110. #endif
  1111. #ifdef DEBUG
  1112. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1113. #endif
  1114. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1115. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1116. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1117. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1118. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1119. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1120. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1121. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1122. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1123. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1124. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1125. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1126. #ifdef EXPRECISION
  1127. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1128. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1129. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1130. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1131. #endif
  1132. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1133. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1134. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1135. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1136. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1137. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1138. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1139. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1140. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1141. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1142. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1143. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1144. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1145. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1146. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1147. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1148. #ifdef EXPRECISION
  1149. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1150. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1151. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1152. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1153. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1154. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1155. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1156. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1157. #endif
  1158. }
  1159. #endif //POWER
  1160. #endif //ZARCH
  1161. #endif //defined(ARCH_ARM64)