You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. 0, 0, 0,
  50. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  51. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  52. SHGEMM_DEFAULT_UNROLL_MN,
  53. #else
  54. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  55. #endif
  56. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  57. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  58. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  59. dsdot_kTS,
  60. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  61. sgemv_nTS, sgemv_tTS, sger_kTS,
  62. ssymv_LTS, ssymv_UTS,
  63. shgemm_kernelTS, shgemm_betaTS,
  64. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  65. shgemm_incopyTS, shgemm_itcopyTS,
  66. #else
  67. shgemm_oncopyTS, shgemm_otcopyTS,
  68. #endif
  69. shgemm_oncopyTS, shgemm_otcopyTS,
  70. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  71. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  72. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  73. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  74. #else
  75. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  76. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  77. #endif
  78. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  79. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  80. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  81. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  82. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  83. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  84. #else
  85. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  86. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  87. #endif
  88. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  89. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  90. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  91. ssymm_iutcopyTS, ssymm_iltcopyTS,
  92. #else
  93. ssymm_outcopyTS, ssymm_oltcopyTS,
  94. #endif
  95. ssymm_outcopyTS, ssymm_oltcopyTS,
  96. #ifndef NO_LAPACK
  97. sneg_tcopyTS, slaswp_ncopyTS,
  98. #else
  99. NULL,NULL,
  100. #endif
  101. 0, 0, 0,
  102. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  103. #ifdef SGEMM_DEFAULT_UNROLL_MN
  104. SGEMM_DEFAULT_UNROLL_MN,
  105. #else
  106. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  107. #endif
  108. #ifdef HAVE_EXCLUSIVE_CACHE
  109. 1,
  110. #else
  111. 0,
  112. #endif
  113. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  114. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  115. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  116. dsdot_kTS,
  117. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  118. sgemv_nTS, sgemv_tTS, sger_kTS,
  119. ssymv_LTS, ssymv_UTS,
  120. sgemm_kernelTS, sgemm_betaTS,
  121. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  122. sgemm_incopyTS, sgemm_itcopyTS,
  123. #else
  124. sgemm_oncopyTS, sgemm_otcopyTS,
  125. #endif
  126. sgemm_oncopyTS, sgemm_otcopyTS,
  127. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  128. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  129. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  130. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  131. #else
  132. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  133. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  134. #endif
  135. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  136. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  137. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  138. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  139. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  140. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  141. #else
  142. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  143. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  144. #endif
  145. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  146. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  147. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  148. ssymm_iutcopyTS, ssymm_iltcopyTS,
  149. #else
  150. ssymm_outcopyTS, ssymm_oltcopyTS,
  151. #endif
  152. ssymm_outcopyTS, ssymm_oltcopyTS,
  153. #ifndef NO_LAPACK
  154. sneg_tcopyTS, slaswp_ncopyTS,
  155. #else
  156. NULL,NULL,
  157. #endif
  158. 0, 0, 0,
  159. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  160. #ifdef DGEMM_DEFAULT_UNROLL_MN
  161. DGEMM_DEFAULT_UNROLL_MN,
  162. #else
  163. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  164. #endif
  165. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  166. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  167. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  168. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  169. dgemv_nTS, dgemv_tTS, dger_kTS,
  170. dsymv_LTS, dsymv_UTS,
  171. dgemm_kernelTS, dgemm_betaTS,
  172. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  173. dgemm_incopyTS, dgemm_itcopyTS,
  174. #else
  175. dgemm_oncopyTS, dgemm_otcopyTS,
  176. #endif
  177. dgemm_oncopyTS, dgemm_otcopyTS,
  178. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  179. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  180. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  181. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  182. #else
  183. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  184. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  185. #endif
  186. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  187. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  188. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  189. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  190. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  191. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  192. #else
  193. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  194. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  195. #endif
  196. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  197. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  198. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  199. dsymm_iutcopyTS, dsymm_iltcopyTS,
  200. #else
  201. dsymm_outcopyTS, dsymm_oltcopyTS,
  202. #endif
  203. dsymm_outcopyTS, dsymm_oltcopyTS,
  204. #ifndef NO_LAPACK
  205. dneg_tcopyTS, dlaswp_ncopyTS,
  206. #else
  207. NULL, NULL,
  208. #endif
  209. #ifdef EXPRECISION
  210. 0, 0, 0,
  211. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  212. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  213. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  214. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  215. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  216. qgemv_nTS, qgemv_tTS, qger_kTS,
  217. qsymv_LTS, qsymv_UTS,
  218. qgemm_kernelTS, qgemm_betaTS,
  219. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  220. qgemm_incopyTS, qgemm_itcopyTS,
  221. #else
  222. qgemm_oncopyTS, qgemm_otcopyTS,
  223. #endif
  224. qgemm_oncopyTS, qgemm_otcopyTS,
  225. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  226. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  227. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  228. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  229. #else
  230. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  231. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  232. #endif
  233. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  234. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  235. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  236. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  237. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  238. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  239. #else
  240. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  241. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  242. #endif
  243. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  244. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  245. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  246. qsymm_iutcopyTS, qsymm_iltcopyTS,
  247. #else
  248. qsymm_outcopyTS, qsymm_oltcopyTS,
  249. #endif
  250. qsymm_outcopyTS, qsymm_oltcopyTS,
  251. #ifndef NO_LAPACK
  252. qneg_tcopyTS, qlaswp_ncopyTS,
  253. #else
  254. NULL, NULL,
  255. #endif
  256. #endif
  257. 0, 0, 0,
  258. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  259. #ifdef CGEMM_DEFAULT_UNROLL_MN
  260. CGEMM_DEFAULT_UNROLL_MN,
  261. #else
  262. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  263. #endif
  264. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  265. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  266. cdotu_kTS, cdotc_kTS, csrot_kTS,
  267. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  268. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  269. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  270. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  271. csymv_LTS, csymv_UTS,
  272. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  273. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  274. cgemm_betaTS,
  275. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  276. cgemm_incopyTS, cgemm_itcopyTS,
  277. #else
  278. cgemm_oncopyTS, cgemm_otcopyTS,
  279. #endif
  280. cgemm_oncopyTS, cgemm_otcopyTS,
  281. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  282. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  283. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  284. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  285. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  286. #else
  287. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  288. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  289. #endif
  290. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  291. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  292. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  293. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  294. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  295. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  296. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  297. #else
  298. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  299. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  300. #endif
  301. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  302. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  303. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  304. csymm_iutcopyTS, csymm_iltcopyTS,
  305. #else
  306. csymm_outcopyTS, csymm_oltcopyTS,
  307. #endif
  308. csymm_outcopyTS, csymm_oltcopyTS,
  309. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  310. chemm_iutcopyTS, chemm_iltcopyTS,
  311. #else
  312. chemm_outcopyTS, chemm_oltcopyTS,
  313. #endif
  314. chemm_outcopyTS, chemm_oltcopyTS,
  315. 0, 0, 0,
  316. #if defined(USE_GEMM3M)
  317. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  318. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  319. #else
  320. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  321. #endif
  322. cgemm3m_kernelTS,
  323. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  324. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  325. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  326. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  327. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  328. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  329. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  330. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  331. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  332. csymm3m_oucopybTS, csymm3m_olcopybTS,
  333. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  334. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  335. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  336. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  337. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  338. chemm3m_oucopybTS, chemm3m_olcopybTS,
  339. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  340. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  341. #else
  342. 0, 0, 0,
  343. NULL,
  344. NULL, NULL,
  345. NULL, NULL,
  346. NULL, NULL,
  347. NULL, NULL,
  348. NULL, NULL,
  349. NULL, NULL,
  350. NULL, NULL,
  351. NULL, NULL,
  352. NULL, NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. #endif
  363. #ifndef NO_LAPACK
  364. cneg_tcopyTS, claswp_ncopyTS,
  365. #else
  366. NULL, NULL,
  367. #endif
  368. 0, 0, 0,
  369. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  370. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  371. ZGEMM_DEFAULT_UNROLL_MN,
  372. #else
  373. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  374. #endif
  375. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  376. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  377. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  378. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  379. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  380. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  381. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  382. zsymv_LTS, zsymv_UTS,
  383. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  384. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  385. zgemm_betaTS,
  386. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  387. zgemm_incopyTS, zgemm_itcopyTS,
  388. #else
  389. zgemm_oncopyTS, zgemm_otcopyTS,
  390. #endif
  391. zgemm_oncopyTS, zgemm_otcopyTS,
  392. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  393. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  394. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  395. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  396. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  397. #else
  398. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  399. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  400. #endif
  401. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  402. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  403. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  404. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  405. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  406. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  407. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  408. #else
  409. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  410. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  411. #endif
  412. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  413. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  414. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  415. zsymm_iutcopyTS, zsymm_iltcopyTS,
  416. #else
  417. zsymm_outcopyTS, zsymm_oltcopyTS,
  418. #endif
  419. zsymm_outcopyTS, zsymm_oltcopyTS,
  420. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  421. zhemm_iutcopyTS, zhemm_iltcopyTS,
  422. #else
  423. zhemm_outcopyTS, zhemm_oltcopyTS,
  424. #endif
  425. zhemm_outcopyTS, zhemm_oltcopyTS,
  426. 0, 0, 0,
  427. #if defined(USE_GEMM3M)
  428. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  429. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  430. #else
  431. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  432. #endif
  433. zgemm3m_kernelTS,
  434. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  435. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  436. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  437. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  438. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  439. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  440. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  441. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  442. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  443. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  444. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  445. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  446. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  447. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  448. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  449. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  450. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  451. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  452. #else
  453. 0, 0, 0,
  454. NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. #endif
  474. #ifndef NO_LAPACK
  475. zneg_tcopyTS, zlaswp_ncopyTS,
  476. #else
  477. NULL, NULL,
  478. #endif
  479. #ifdef EXPRECISION
  480. 0, 0, 0,
  481. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  482. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  483. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  484. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  485. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  486. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  487. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  488. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  489. xsymv_LTS, xsymv_UTS,
  490. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  491. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  492. xgemm_betaTS,
  493. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  494. xgemm_incopyTS, xgemm_itcopyTS,
  495. #else
  496. xgemm_oncopyTS, xgemm_otcopyTS,
  497. #endif
  498. xgemm_oncopyTS, xgemm_otcopyTS,
  499. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  500. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  501. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  502. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  503. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  504. #else
  505. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  506. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  507. #endif
  508. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  509. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  510. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  511. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  512. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  513. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  514. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  515. #else
  516. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  517. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  518. #endif
  519. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  520. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  521. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  522. xsymm_iutcopyTS, xsymm_iltcopyTS,
  523. #else
  524. xsymm_outcopyTS, xsymm_oltcopyTS,
  525. #endif
  526. xsymm_outcopyTS, xsymm_oltcopyTS,
  527. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  528. xhemm_iutcopyTS, xhemm_iltcopyTS,
  529. #else
  530. xhemm_outcopyTS, xhemm_oltcopyTS,
  531. #endif
  532. xhemm_outcopyTS, xhemm_oltcopyTS,
  533. 0, 0, 0,
  534. #if defined(USE_GEMM3M)
  535. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  536. xgemm3m_kernelTS,
  537. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  538. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  539. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  540. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  541. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  542. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  543. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  544. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  545. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  546. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  547. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  548. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  549. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  550. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  551. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  552. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  553. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  554. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  555. #else
  556. 0, 0, 0,
  557. NULL,
  558. NULL, NULL,
  559. NULL, NULL,
  560. NULL, NULL,
  561. NULL, NULL,
  562. NULL, NULL,
  563. NULL, NULL,
  564. NULL, NULL,
  565. NULL, NULL,
  566. NULL, NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. #endif
  577. #ifndef NO_LAPACK
  578. xneg_tcopyTS, xlaswp_ncopyTS,
  579. #else
  580. NULL, NULL,
  581. #endif
  582. #endif
  583. init_parameter,
  584. SNUMOPT, DNUMOPT, QNUMOPT,
  585. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  586. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  587. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  588. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  589. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  590. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  591. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  592. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  593. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  594. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  595. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  596. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  597. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  598. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  599. };
  600. #if defined(ARCH_ARM64)
  601. static void init_parameter(void) {
  602. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  603. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  604. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  605. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  606. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  607. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  608. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  609. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  610. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  611. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  612. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  613. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  614. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  615. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  616. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  617. #ifdef EXPRECISION
  618. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  619. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  620. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  621. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  622. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  623. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  624. #endif
  625. #if defined(USE_GEMM3M)
  626. #ifdef CGEMM3M_DEFAULT_P
  627. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  628. #else
  629. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  630. #endif
  631. #ifdef ZGEMM3M_DEFAULT_P
  632. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  633. #else
  634. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  635. #endif
  636. #ifdef CGEMM3M_DEFAULT_Q
  637. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  638. #else
  639. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  640. #endif
  641. #ifdef ZGEMM3M_DEFAULT_Q
  642. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  643. #else
  644. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  645. #endif
  646. #ifdef CGEMM3M_DEFAULT_R
  647. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  648. #else
  649. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  650. #endif
  651. #ifdef ZGEMM3M_DEFAULT_R
  652. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  653. #else
  654. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  655. #endif
  656. #ifdef EXPRECISION
  657. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  658. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  659. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  660. #endif
  661. #endif
  662. }
  663. #else // defined(ARCH_ARM64)
  664. #if defined(ARCH_POWER)
  665. static void init_parameter(void) {
  666. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  667. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  668. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  669. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  670. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  671. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  672. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  673. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  674. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  675. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  676. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  677. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  678. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  679. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  680. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  681. }
  682. #else //POWER
  683. #if defined(ARCH_ZARCH)
  684. static void init_parameter(void) {
  685. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  686. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  687. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  688. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  689. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  690. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  691. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  692. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  693. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  694. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  695. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  696. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  697. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  698. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  699. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  700. }
  701. #else //ZARCH
  702. #ifdef ARCH_X86
  703. static int get_l2_size_old(void){
  704. int i, eax, ebx, ecx, edx, cpuid_level;
  705. int info[15];
  706. cpuid(2, &eax, &ebx, &ecx, &edx);
  707. info[ 0] = BITMASK(eax, 8, 0xff);
  708. info[ 1] = BITMASK(eax, 16, 0xff);
  709. info[ 2] = BITMASK(eax, 24, 0xff);
  710. info[ 3] = BITMASK(ebx, 0, 0xff);
  711. info[ 4] = BITMASK(ebx, 8, 0xff);
  712. info[ 5] = BITMASK(ebx, 16, 0xff);
  713. info[ 6] = BITMASK(ebx, 24, 0xff);
  714. info[ 7] = BITMASK(ecx, 0, 0xff);
  715. info[ 8] = BITMASK(ecx, 8, 0xff);
  716. info[ 9] = BITMASK(ecx, 16, 0xff);
  717. info[10] = BITMASK(ecx, 24, 0xff);
  718. info[11] = BITMASK(edx, 0, 0xff);
  719. info[12] = BITMASK(edx, 8, 0xff);
  720. info[13] = BITMASK(edx, 16, 0xff);
  721. info[14] = BITMASK(edx, 24, 0xff);
  722. for (i = 0; i < 15; i++){
  723. switch (info[i]){
  724. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  725. case 0x1a :
  726. return 96;
  727. case 0x39 :
  728. case 0x3b :
  729. case 0x41 :
  730. case 0x79 :
  731. case 0x81 :
  732. return 128;
  733. case 0x3a :
  734. return 192;
  735. case 0x21 :
  736. case 0x3c :
  737. case 0x42 :
  738. case 0x7a :
  739. case 0x7e :
  740. case 0x82 :
  741. return 256;
  742. case 0x3d :
  743. return 384;
  744. case 0x3e :
  745. case 0x43 :
  746. case 0x7b :
  747. case 0x7f :
  748. case 0x83 :
  749. case 0x86 :
  750. return 512;
  751. case 0x44 :
  752. case 0x78 :
  753. case 0x7c :
  754. case 0x84 :
  755. case 0x87 :
  756. return 1024;
  757. case 0x45 :
  758. case 0x7d :
  759. case 0x85 :
  760. return 2048;
  761. case 0x48 :
  762. return 3184;
  763. case 0x49 :
  764. return 4096;
  765. case 0x4e :
  766. return 6144;
  767. }
  768. }
  769. // return 0;
  770. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  771. return 256;
  772. }
  773. #endif
  774. static __inline__ int get_l2_size(void){
  775. int eax, ebx, ecx, edx, l2;
  776. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  777. l2 = BITMASK(ecx, 16, 0xffff);
  778. #ifndef ARCH_X86
  779. if (l2 <= 0) {
  780. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  781. return 256;
  782. }
  783. return l2;
  784. #else
  785. if (l2 > 0) return l2;
  786. return get_l2_size_old();
  787. #endif
  788. }
  789. static __inline__ int get_l3_size(void){
  790. int eax, ebx, ecx, edx;
  791. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  792. return BITMASK(edx, 18, 0x3fff) * 512;
  793. }
  794. static void init_parameter(void) {
  795. int l2 = get_l2_size();
  796. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  797. /* where the GEMM unrolling parameters do not depend on l2 */
  798. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  799. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  800. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  801. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  802. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  803. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  804. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  805. #ifdef CGEMM3M_DEFAULT_Q
  806. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  807. #else
  808. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  809. #endif
  810. #ifdef ZGEMM3M_DEFAULT_Q
  811. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  812. #else
  813. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  814. #endif
  815. #ifdef EXPRECISION
  816. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  817. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  818. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  819. #endif
  820. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  821. #ifdef DEBUG
  822. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  823. #endif
  824. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  825. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  826. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  827. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  828. #ifdef EXPRECISION
  829. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  830. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  831. #endif
  832. #endif
  833. #ifdef CORE_NORTHWOOD
  834. #ifdef DEBUG
  835. fprintf(stderr, "Northwood\n");
  836. #endif
  837. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  838. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  839. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  840. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  841. #ifdef EXPRECISION
  842. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  843. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  844. #endif
  845. #endif
  846. #ifdef ATOM
  847. #ifdef DEBUG
  848. fprintf(stderr, "Atom\n");
  849. #endif
  850. TABLE_NAME.sgemm_p = 256;
  851. TABLE_NAME.dgemm_p = 128;
  852. TABLE_NAME.cgemm_p = 128;
  853. TABLE_NAME.zgemm_p = 64;
  854. #ifdef EXPRECISION
  855. TABLE_NAME.qgemm_p = 64;
  856. TABLE_NAME.xgemm_p = 32;
  857. #endif
  858. #endif
  859. #ifdef CORE_PRESCOTT
  860. #ifdef DEBUG
  861. fprintf(stderr, "Prescott\n");
  862. #endif
  863. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  864. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  865. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  866. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  867. #ifdef EXPRECISION
  868. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  869. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  870. #endif
  871. #endif
  872. #ifdef CORE2
  873. #ifdef DEBUG
  874. fprintf(stderr, "Core2\n");
  875. #endif
  876. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  877. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  878. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  879. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  880. #ifdef EXPRECISION
  881. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  882. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  883. #endif
  884. #endif
  885. #ifdef PENRYN
  886. #ifdef DEBUG
  887. fprintf(stderr, "Penryn\n");
  888. #endif
  889. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  890. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  891. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  892. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  893. #ifdef EXPRECISION
  894. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  895. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  896. #endif
  897. #endif
  898. #ifdef DUNNINGTON
  899. #ifdef DEBUG
  900. fprintf(stderr, "Dunnington\n");
  901. #endif
  902. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  903. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  904. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  905. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  906. #ifdef EXPRECISION
  907. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  908. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  909. #endif
  910. #endif
  911. #ifdef NEHALEM
  912. #ifdef DEBUG
  913. fprintf(stderr, "Nehalem\n");
  914. #endif
  915. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  916. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  917. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  918. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  919. #ifdef EXPRECISION
  920. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  921. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  922. #endif
  923. #endif
  924. #ifdef SANDYBRIDGE
  925. #ifdef DEBUG
  926. fprintf(stderr, "Sandybridge\n");
  927. #endif
  928. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  929. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  930. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  931. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  932. #ifdef EXPRECISION
  933. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  934. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  935. #endif
  936. #endif
  937. #ifdef HASWELL
  938. #ifdef DEBUG
  939. fprintf(stderr, "Haswell\n");
  940. #endif
  941. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  942. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  943. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  944. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  945. #ifdef EXPRECISION
  946. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  947. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  948. #endif
  949. #endif
  950. #ifdef SKYLAKEX
  951. #ifdef DEBUG
  952. fprintf(stderr, "SkylakeX\n");
  953. #endif
  954. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  955. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  956. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  957. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  958. #ifdef EXPRECISION
  959. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  960. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  961. #endif
  962. #endif
  963. #ifdef OPTERON
  964. #ifdef DEBUG
  965. fprintf(stderr, "Opteron\n");
  966. #endif
  967. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  968. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  969. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  970. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  971. #ifdef EXPRECISION
  972. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  973. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  974. #endif
  975. #endif
  976. #ifdef BARCELONA
  977. #ifdef DEBUG
  978. fprintf(stderr, "Barcelona\n");
  979. #endif
  980. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  981. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  982. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  983. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  984. #ifdef EXPRECISION
  985. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  986. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  987. #endif
  988. #endif
  989. #ifdef BOBCAT
  990. #ifdef DEBUG
  991. fprintf(stderr, "Bobcate\n");
  992. #endif
  993. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  994. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  995. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  996. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  997. #ifdef EXPRECISION
  998. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  999. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1000. #endif
  1001. #endif
  1002. #ifdef BULLDOZER
  1003. #ifdef DEBUG
  1004. fprintf(stderr, "Bulldozer\n");
  1005. #endif
  1006. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1007. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1008. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1009. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1010. #ifdef EXPRECISION
  1011. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1012. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1013. #endif
  1014. #endif
  1015. #ifdef EXCAVATOR
  1016. #ifdef DEBUG
  1017. fprintf(stderr, "Excavator\n");
  1018. #endif
  1019. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1020. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1021. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1022. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1023. #ifdef EXPRECISION
  1024. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1025. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1026. #endif
  1027. #endif
  1028. #ifdef PILEDRIVER
  1029. #ifdef DEBUG
  1030. fprintf(stderr, "Piledriver\n");
  1031. #endif
  1032. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1033. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1034. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1035. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1036. #ifdef EXPRECISION
  1037. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1038. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1039. #endif
  1040. #endif
  1041. #ifdef STEAMROLLER
  1042. #ifdef DEBUG
  1043. fprintf(stderr, "Steamroller\n");
  1044. #endif
  1045. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1046. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1047. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1048. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1049. #ifdef EXPRECISION
  1050. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1051. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1052. #endif
  1053. #endif
  1054. #ifdef ZEN
  1055. #ifdef DEBUG
  1056. fprintf(stderr, "Zen\n");
  1057. #endif
  1058. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1059. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1060. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1061. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1062. #ifdef EXPRECISION
  1063. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1064. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1065. #endif
  1066. #endif
  1067. #ifdef NANO
  1068. #ifdef DEBUG
  1069. fprintf(stderr, "NANO\n");
  1070. #endif
  1071. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1072. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1073. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1074. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1075. #ifdef EXPRECISION
  1076. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1077. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1078. #endif
  1079. #endif
  1080. #ifdef CGEMM3M_DEFAULT_P
  1081. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1082. #else
  1083. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1084. #endif
  1085. #ifdef ZGEMM3M_DEFAULT_P
  1086. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1087. #else
  1088. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1089. #endif
  1090. #ifdef EXPRECISION
  1091. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1092. #endif
  1093. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1094. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1095. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1096. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1097. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1098. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1099. #else
  1100. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1101. #endif
  1102. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1103. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1104. #else
  1105. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1106. #endif
  1107. #ifdef QUAD_PRECISION
  1108. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1109. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1110. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1111. #endif
  1112. #ifdef DEBUG
  1113. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1114. #endif
  1115. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1116. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1117. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1118. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1119. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1120. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1121. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1122. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1123. #ifdef EXPRECISION
  1124. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1125. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1126. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1127. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1128. #endif
  1129. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1130. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1131. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1132. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1133. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1134. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1135. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1136. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1137. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1138. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1139. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1140. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1141. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1142. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1143. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1144. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1145. #ifdef EXPRECISION
  1146. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1147. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1148. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1149. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1150. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1151. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1152. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1153. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1154. #endif
  1155. }
  1156. #endif //POWER
  1157. #endif //ZARCH
  1158. #endif //defined(ARCH_ARM64)