You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 40 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
11 years ago
11 years ago

  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. 0, 0, 0,
  50. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  51. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  52. SHGEMM_DEFAULT_UNROLL_MN,
  53. #else
  54. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  55. #endif
  56. shgemm_kernelTS, shgemm_betaTS,
  57. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  58. shgemm_incopyTS, shgemm_itcopyTS,
  59. #else
  60. shgemm_oncopyTS, shgemm_otcopyTS,
  61. #endif
  62. shgemm_oncopyTS, shgemm_otcopyTS,
  63. sgemm_kernelTS, sgemm_betaTS,
  64. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  65. #ifdef SGEMM_DEFAULT_UNROLL_MN
  66. SGEMM_DEFAULT_UNROLL_MN,
  67. #else
  68. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  69. #endif
  70. #ifdef HAVE_EXCLUSIVE_CACHE
  71. 1,
  72. #else
  73. 0,
  74. #endif
  75. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  76. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  77. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  78. dsdot_kTS,
  79. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  80. sgemv_nTS, sgemv_tTS, sger_kTS,
  81. ssymv_LTS, ssymv_UTS,
  82. sgemm_kernelTS, sgemm_betaTS,
  83. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  84. sgemm_incopyTS, sgemm_itcopyTS,
  85. #else
  86. sgemm_oncopyTS, sgemm_otcopyTS,
  87. #endif
  88. sgemm_oncopyTS, sgemm_otcopyTS,
  89. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  90. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  91. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  92. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  93. #else
  94. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  95. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  96. #endif
  97. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  98. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  99. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  100. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  101. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  102. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  103. #else
  104. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  105. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  106. #endif
  107. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  108. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  109. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  110. ssymm_iutcopyTS, ssymm_iltcopyTS,
  111. #else
  112. ssymm_outcopyTS, ssymm_oltcopyTS,
  113. #endif
  114. ssymm_outcopyTS, ssymm_oltcopyTS,
  115. #ifndef NO_LAPACK
  116. sneg_tcopyTS, slaswp_ncopyTS,
  117. #else
  118. NULL,NULL,
  119. #endif
  120. 0, 0, 0,
  121. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  122. #ifdef DGEMM_DEFAULT_UNROLL_MN
  123. DGEMM_DEFAULT_UNROLL_MN,
  124. #else
  125. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  126. #endif
  127. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  128. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  129. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  130. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  131. dgemv_nTS, dgemv_tTS, dger_kTS,
  132. dsymv_LTS, dsymv_UTS,
  133. dgemm_kernelTS, dgemm_betaTS,
  134. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  135. dgemm_incopyTS, dgemm_itcopyTS,
  136. #else
  137. dgemm_oncopyTS, dgemm_otcopyTS,
  138. #endif
  139. dgemm_oncopyTS, dgemm_otcopyTS,
  140. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  141. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  142. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  143. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  144. #else
  145. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  146. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  147. #endif
  148. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  149. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  150. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  151. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  152. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  153. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  154. #else
  155. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  156. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  157. #endif
  158. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  159. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  160. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  161. dsymm_iutcopyTS, dsymm_iltcopyTS,
  162. #else
  163. dsymm_outcopyTS, dsymm_oltcopyTS,
  164. #endif
  165. dsymm_outcopyTS, dsymm_oltcopyTS,
  166. #ifndef NO_LAPACK
  167. dneg_tcopyTS, dlaswp_ncopyTS,
  168. #else
  169. NULL, NULL,
  170. #endif
  171. #ifdef EXPRECISION
  172. 0, 0, 0,
  173. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  174. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  175. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  176. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  177. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  178. qgemv_nTS, qgemv_tTS, qger_kTS,
  179. qsymv_LTS, qsymv_UTS,
  180. qgemm_kernelTS, qgemm_betaTS,
  181. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  182. qgemm_incopyTS, qgemm_itcopyTS,
  183. #else
  184. qgemm_oncopyTS, qgemm_otcopyTS,
  185. #endif
  186. qgemm_oncopyTS, qgemm_otcopyTS,
  187. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  188. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  189. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  190. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  191. #else
  192. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  193. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  194. #endif
  195. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  196. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  197. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  198. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  199. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  200. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  201. #else
  202. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  203. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  204. #endif
  205. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  206. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  207. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  208. qsymm_iutcopyTS, qsymm_iltcopyTS,
  209. #else
  210. qsymm_outcopyTS, qsymm_oltcopyTS,
  211. #endif
  212. qsymm_outcopyTS, qsymm_oltcopyTS,
  213. #ifndef NO_LAPACK
  214. qneg_tcopyTS, qlaswp_ncopyTS,
  215. #else
  216. NULL, NULL,
  217. #endif
  218. #endif
  219. 0, 0, 0,
  220. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  221. #ifdef CGEMM_DEFAULT_UNROLL_MN
  222. CGEMM_DEFAULT_UNROLL_MN,
  223. #else
  224. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  225. #endif
  226. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  227. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  228. cdotu_kTS, cdotc_kTS, csrot_kTS,
  229. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  230. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  231. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  232. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  233. csymv_LTS, csymv_UTS,
  234. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  235. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  236. cgemm_betaTS,
  237. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  238. cgemm_incopyTS, cgemm_itcopyTS,
  239. #else
  240. cgemm_oncopyTS, cgemm_otcopyTS,
  241. #endif
  242. cgemm_oncopyTS, cgemm_otcopyTS,
  243. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  244. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  245. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  246. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  247. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  248. #else
  249. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  250. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  251. #endif
  252. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  253. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  254. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  255. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  256. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  257. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  258. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  259. #else
  260. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  261. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  262. #endif
  263. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  264. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  265. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  266. csymm_iutcopyTS, csymm_iltcopyTS,
  267. #else
  268. csymm_outcopyTS, csymm_oltcopyTS,
  269. #endif
  270. csymm_outcopyTS, csymm_oltcopyTS,
  271. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  272. chemm_iutcopyTS, chemm_iltcopyTS,
  273. #else
  274. chemm_outcopyTS, chemm_oltcopyTS,
  275. #endif
  276. chemm_outcopyTS, chemm_oltcopyTS,
  277. 0, 0, 0,
  278. #if defined(USE_GEMM3M)
  279. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  280. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  281. #else
  282. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  283. #endif
  284. cgemm3m_kernelTS,
  285. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  286. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  287. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  288. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  289. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  290. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  291. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  292. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  293. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  294. csymm3m_oucopybTS, csymm3m_olcopybTS,
  295. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  296. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  297. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  298. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  299. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  300. chemm3m_oucopybTS, chemm3m_olcopybTS,
  301. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  302. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  303. #else
  304. 0, 0, 0,
  305. NULL,
  306. NULL, NULL,
  307. NULL, NULL,
  308. NULL, NULL,
  309. NULL, NULL,
  310. NULL, NULL,
  311. NULL, NULL,
  312. NULL, NULL,
  313. NULL, NULL,
  314. NULL, NULL,
  315. NULL, NULL,
  316. NULL, NULL,
  317. NULL, NULL,
  318. NULL, NULL,
  319. NULL, NULL,
  320. NULL, NULL,
  321. NULL, NULL,
  322. NULL, NULL,
  323. NULL, NULL,
  324. #endif
  325. #ifndef NO_LAPACK
  326. cneg_tcopyTS, claswp_ncopyTS,
  327. #else
  328. NULL, NULL,
  329. #endif
  330. 0, 0, 0,
  331. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  332. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  333. ZGEMM_DEFAULT_UNROLL_MN,
  334. #else
  335. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  336. #endif
  337. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  338. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  339. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  340. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  341. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  342. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  343. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  344. zsymv_LTS, zsymv_UTS,
  345. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  346. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  347. zgemm_betaTS,
  348. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  349. zgemm_incopyTS, zgemm_itcopyTS,
  350. #else
  351. zgemm_oncopyTS, zgemm_otcopyTS,
  352. #endif
  353. zgemm_oncopyTS, zgemm_otcopyTS,
  354. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  355. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  356. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  357. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  358. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  359. #else
  360. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  361. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  362. #endif
  363. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  364. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  365. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  366. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  367. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  368. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  369. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  370. #else
  371. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  372. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  373. #endif
  374. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  375. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  376. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  377. zsymm_iutcopyTS, zsymm_iltcopyTS,
  378. #else
  379. zsymm_outcopyTS, zsymm_oltcopyTS,
  380. #endif
  381. zsymm_outcopyTS, zsymm_oltcopyTS,
  382. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  383. zhemm_iutcopyTS, zhemm_iltcopyTS,
  384. #else
  385. zhemm_outcopyTS, zhemm_oltcopyTS,
  386. #endif
  387. zhemm_outcopyTS, zhemm_oltcopyTS,
  388. 0, 0, 0,
  389. #if defined(USE_GEMM3M)
  390. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  391. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  392. #else
  393. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  394. #endif
  395. zgemm3m_kernelTS,
  396. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  397. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  398. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  399. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  400. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  401. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  402. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  403. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  404. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  405. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  406. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  407. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  408. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  409. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  410. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  411. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  412. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  413. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  414. #else
  415. 0, 0, 0,
  416. NULL,
  417. NULL, NULL,
  418. NULL, NULL,
  419. NULL, NULL,
  420. NULL, NULL,
  421. NULL, NULL,
  422. NULL, NULL,
  423. NULL, NULL,
  424. NULL, NULL,
  425. NULL, NULL,
  426. NULL, NULL,
  427. NULL, NULL,
  428. NULL, NULL,
  429. NULL, NULL,
  430. NULL, NULL,
  431. NULL, NULL,
  432. NULL, NULL,
  433. NULL, NULL,
  434. NULL, NULL,
  435. #endif
  436. #ifndef NO_LAPACK
  437. zneg_tcopyTS, zlaswp_ncopyTS,
  438. #else
  439. NULL, NULL,
  440. #endif
  441. #ifdef EXPRECISION
  442. 0, 0, 0,
  443. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  444. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  445. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  446. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  447. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  448. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  449. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  450. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  451. xsymv_LTS, xsymv_UTS,
  452. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  453. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  454. xgemm_betaTS,
  455. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  456. xgemm_incopyTS, xgemm_itcopyTS,
  457. #else
  458. xgemm_oncopyTS, xgemm_otcopyTS,
  459. #endif
  460. xgemm_oncopyTS, xgemm_otcopyTS,
  461. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  462. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  463. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  464. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  465. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  466. #else
  467. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  468. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  469. #endif
  470. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  471. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  472. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  473. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  474. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  475. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  476. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  477. #else
  478. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  479. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  480. #endif
  481. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  482. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  483. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  484. xsymm_iutcopyTS, xsymm_iltcopyTS,
  485. #else
  486. xsymm_outcopyTS, xsymm_oltcopyTS,
  487. #endif
  488. xsymm_outcopyTS, xsymm_oltcopyTS,
  489. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  490. xhemm_iutcopyTS, xhemm_iltcopyTS,
  491. #else
  492. xhemm_outcopyTS, xhemm_oltcopyTS,
  493. #endif
  494. xhemm_outcopyTS, xhemm_oltcopyTS,
  495. 0, 0, 0,
  496. #if defined(USE_GEMM3M)
  497. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  498. xgemm3m_kernelTS,
  499. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  500. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  501. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  502. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  503. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  504. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  505. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  506. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  507. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  508. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  509. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  510. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  511. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  512. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  513. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  514. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  515. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  516. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  517. #else
  518. 0, 0, 0,
  519. NULL,
  520. NULL, NULL,
  521. NULL, NULL,
  522. NULL, NULL,
  523. NULL, NULL,
  524. NULL, NULL,
  525. NULL, NULL,
  526. NULL, NULL,
  527. NULL, NULL,
  528. NULL, NULL,
  529. NULL, NULL,
  530. NULL, NULL,
  531. NULL, NULL,
  532. NULL, NULL,
  533. NULL, NULL,
  534. NULL, NULL,
  535. NULL, NULL,
  536. NULL, NULL,
  537. NULL, NULL,
  538. #endif
  539. #ifndef NO_LAPACK
  540. xneg_tcopyTS, xlaswp_ncopyTS,
  541. #else
  542. NULL, NULL,
  543. #endif
  544. #endif
  545. init_parameter,
  546. SNUMOPT, DNUMOPT, QNUMOPT,
  547. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  548. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  549. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  550. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  551. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  552. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  553. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  554. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  555. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  556. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  557. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  558. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  559. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  560. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  561. };
  562. #if defined(ARCH_ARM64)
  563. static void init_parameter(void) {
  564. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  565. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  566. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  567. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  568. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  569. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  570. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  571. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  572. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  573. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  574. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  575. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  576. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  577. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  578. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  579. #ifdef EXPRECISION
  580. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  581. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  582. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  583. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  584. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  585. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  586. #endif
  587. #if defined(USE_GEMM3M)
  588. #ifdef CGEMM3M_DEFAULT_P
  589. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  590. #else
  591. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  592. #endif
  593. #ifdef ZGEMM3M_DEFAULT_P
  594. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  595. #else
  596. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  597. #endif
  598. #ifdef CGEMM3M_DEFAULT_Q
  599. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  600. #else
  601. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  602. #endif
  603. #ifdef ZGEMM3M_DEFAULT_Q
  604. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  605. #else
  606. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  607. #endif
  608. #ifdef CGEMM3M_DEFAULT_R
  609. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  610. #else
  611. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  612. #endif
  613. #ifdef ZGEMM3M_DEFAULT_R
  614. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  615. #else
  616. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  617. #endif
  618. #ifdef EXPRECISION
  619. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  620. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  621. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  622. #endif
  623. #endif
  624. }
  625. #else // defined(ARCH_ARM64)
  626. #if defined(ARCH_POWER)
  627. static void init_parameter(void) {
  628. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  629. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  630. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  631. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  632. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  633. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  634. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  635. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  636. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  637. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  638. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  639. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  640. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  641. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  642. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  643. }
  644. #else //POWER
  645. #if defined(ARCH_ZARCH)
  646. static void init_parameter(void) {
  647. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  648. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  649. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  650. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  651. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  652. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  653. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  654. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  655. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  656. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  657. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  658. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  659. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  660. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  661. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  662. }
  663. #else //ZARCH
  664. #ifdef ARCH_X86
  665. static int get_l2_size_old(void){
  666. int i, eax, ebx, ecx, edx, cpuid_level;
  667. int info[15];
  668. cpuid(2, &eax, &ebx, &ecx, &edx);
  669. info[ 0] = BITMASK(eax, 8, 0xff);
  670. info[ 1] = BITMASK(eax, 16, 0xff);
  671. info[ 2] = BITMASK(eax, 24, 0xff);
  672. info[ 3] = BITMASK(ebx, 0, 0xff);
  673. info[ 4] = BITMASK(ebx, 8, 0xff);
  674. info[ 5] = BITMASK(ebx, 16, 0xff);
  675. info[ 6] = BITMASK(ebx, 24, 0xff);
  676. info[ 7] = BITMASK(ecx, 0, 0xff);
  677. info[ 8] = BITMASK(ecx, 8, 0xff);
  678. info[ 9] = BITMASK(ecx, 16, 0xff);
  679. info[10] = BITMASK(ecx, 24, 0xff);
  680. info[11] = BITMASK(edx, 0, 0xff);
  681. info[12] = BITMASK(edx, 8, 0xff);
  682. info[13] = BITMASK(edx, 16, 0xff);
  683. info[14] = BITMASK(edx, 24, 0xff);
  684. for (i = 0; i < 15; i++){
  685. switch (info[i]){
  686. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  687. case 0x1a :
  688. return 96;
  689. case 0x39 :
  690. case 0x3b :
  691. case 0x41 :
  692. case 0x79 :
  693. case 0x81 :
  694. return 128;
  695. case 0x3a :
  696. return 192;
  697. case 0x21 :
  698. case 0x3c :
  699. case 0x42 :
  700. case 0x7a :
  701. case 0x7e :
  702. case 0x82 :
  703. return 256;
  704. case 0x3d :
  705. return 384;
  706. case 0x3e :
  707. case 0x43 :
  708. case 0x7b :
  709. case 0x7f :
  710. case 0x83 :
  711. case 0x86 :
  712. return 512;
  713. case 0x44 :
  714. case 0x78 :
  715. case 0x7c :
  716. case 0x84 :
  717. case 0x87 :
  718. return 1024;
  719. case 0x45 :
  720. case 0x7d :
  721. case 0x85 :
  722. return 2048;
  723. case 0x48 :
  724. return 3184;
  725. case 0x49 :
  726. return 4096;
  727. case 0x4e :
  728. return 6144;
  729. }
  730. }
  731. // return 0;
  732. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  733. return 256;
  734. }
  735. #endif
  736. static __inline__ int get_l2_size(void){
  737. int eax, ebx, ecx, edx, l2;
  738. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  739. l2 = BITMASK(ecx, 16, 0xffff);
  740. #ifndef ARCH_X86
  741. if (l2 <= 0) {
  742. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  743. return 256;
  744. }
  745. return l2;
  746. #else
  747. if (l2 > 0) return l2;
  748. return get_l2_size_old();
  749. #endif
  750. }
  751. static __inline__ int get_l3_size(void){
  752. int eax, ebx, ecx, edx;
  753. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  754. return BITMASK(edx, 18, 0x3fff) * 512;
  755. }
  756. static void init_parameter(void) {
  757. int l2 = get_l2_size();
  758. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  759. /* where the GEMM unrolling parameters do not depend on l2 */
  760. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  761. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  762. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  763. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  764. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  765. #ifdef CGEMM3M_DEFAULT_Q
  766. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  767. #else
  768. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  769. #endif
  770. #ifdef ZGEMM3M_DEFAULT_Q
  771. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  772. #else
  773. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  774. #endif
  775. #ifdef EXPRECISION
  776. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  777. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  778. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  779. #endif
  780. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  781. #ifdef DEBUG
  782. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  783. #endif
  784. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  785. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  786. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  787. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  788. #ifdef EXPRECISION
  789. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  790. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  791. #endif
  792. #endif
  793. #ifdef CORE_NORTHWOOD
  794. #ifdef DEBUG
  795. fprintf(stderr, "Northwood\n");
  796. #endif
  797. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  798. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  799. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  800. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  801. #ifdef EXPRECISION
  802. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  803. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  804. #endif
  805. #endif
  806. #ifdef ATOM
  807. #ifdef DEBUG
  808. fprintf(stderr, "Atom\n");
  809. #endif
  810. TABLE_NAME.sgemm_p = 256;
  811. TABLE_NAME.dgemm_p = 128;
  812. TABLE_NAME.cgemm_p = 128;
  813. TABLE_NAME.zgemm_p = 64;
  814. #ifdef EXPRECISION
  815. TABLE_NAME.qgemm_p = 64;
  816. TABLE_NAME.xgemm_p = 32;
  817. #endif
  818. #endif
  819. #ifdef CORE_PRESCOTT
  820. #ifdef DEBUG
  821. fprintf(stderr, "Prescott\n");
  822. #endif
  823. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  824. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  825. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  826. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  827. #ifdef EXPRECISION
  828. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  829. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  830. #endif
  831. #endif
  832. #ifdef CORE2
  833. #ifdef DEBUG
  834. fprintf(stderr, "Core2\n");
  835. #endif
  836. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  837. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  838. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  839. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  840. #ifdef EXPRECISION
  841. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  842. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  843. #endif
  844. #endif
  845. #ifdef PENRYN
  846. #ifdef DEBUG
  847. fprintf(stderr, "Penryn\n");
  848. #endif
  849. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  850. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  851. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  852. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  853. #ifdef EXPRECISION
  854. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  855. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  856. #endif
  857. #endif
  858. #ifdef DUNNINGTON
  859. #ifdef DEBUG
  860. fprintf(stderr, "Dunnington\n");
  861. #endif
  862. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  863. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  864. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  865. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  866. #ifdef EXPRECISION
  867. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  868. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  869. #endif
  870. #endif
  871. #ifdef NEHALEM
  872. #ifdef DEBUG
  873. fprintf(stderr, "Nehalem\n");
  874. #endif
  875. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  876. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  877. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  878. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  879. #ifdef EXPRECISION
  880. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  881. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  882. #endif
  883. #endif
  884. #ifdef SANDYBRIDGE
  885. #ifdef DEBUG
  886. fprintf(stderr, "Sandybridge\n");
  887. #endif
  888. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  889. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  890. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  891. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  892. #ifdef EXPRECISION
  893. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  894. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  895. #endif
  896. #endif
  897. #ifdef HASWELL
  898. #ifdef DEBUG
  899. fprintf(stderr, "Haswell\n");
  900. #endif
  901. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  902. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  903. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  904. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  905. #ifdef EXPRECISION
  906. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  907. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  908. #endif
  909. #endif
  910. #ifdef SKYLAKEX
  911. #ifdef DEBUG
  912. fprintf(stderr, "SkylakeX\n");
  913. #endif
  914. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  915. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  916. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  917. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  918. #ifdef EXPRECISION
  919. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  920. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  921. #endif
  922. #endif
  923. #ifdef OPTERON
  924. #ifdef DEBUG
  925. fprintf(stderr, "Opteron\n");
  926. #endif
  927. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  928. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  929. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  930. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  931. #ifdef EXPRECISION
  932. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  933. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  934. #endif
  935. #endif
  936. #ifdef BARCELONA
  937. #ifdef DEBUG
  938. fprintf(stderr, "Barcelona\n");
  939. #endif
  940. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  941. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  942. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  943. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  944. #ifdef EXPRECISION
  945. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  946. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  947. #endif
  948. #endif
  949. #ifdef BOBCAT
  950. #ifdef DEBUG
  951. fprintf(stderr, "Bobcate\n");
  952. #endif
  953. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  954. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  955. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  956. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  957. #ifdef EXPRECISION
  958. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  959. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  960. #endif
  961. #endif
  962. #ifdef BULLDOZER
  963. #ifdef DEBUG
  964. fprintf(stderr, "Bulldozer\n");
  965. #endif
  966. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  967. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  968. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  969. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  970. #ifdef EXPRECISION
  971. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  972. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  973. #endif
  974. #endif
  975. #ifdef EXCAVATOR
  976. #ifdef DEBUG
  977. fprintf(stderr, "Excavator\n");
  978. #endif
  979. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  980. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  981. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  982. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  983. #ifdef EXPRECISION
  984. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  985. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  986. #endif
  987. #endif
  988. #ifdef PILEDRIVER
  989. #ifdef DEBUG
  990. fprintf(stderr, "Piledriver\n");
  991. #endif
  992. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  993. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  994. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  995. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  996. #ifdef EXPRECISION
  997. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  998. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  999. #endif
  1000. #endif
  1001. #ifdef STEAMROLLER
  1002. #ifdef DEBUG
  1003. fprintf(stderr, "Steamroller\n");
  1004. #endif
  1005. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1006. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1007. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1008. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1009. #ifdef EXPRECISION
  1010. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1011. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1012. #endif
  1013. #endif
  1014. #ifdef ZEN
  1015. #ifdef DEBUG
  1016. fprintf(stderr, "Zen\n");
  1017. #endif
  1018. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1019. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1020. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1021. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1022. #ifdef EXPRECISION
  1023. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1024. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1025. #endif
  1026. #endif
  1027. #ifdef NANO
  1028. #ifdef DEBUG
  1029. fprintf(stderr, "NANO\n");
  1030. #endif
  1031. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1032. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1033. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1034. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1035. #ifdef EXPRECISION
  1036. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1037. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1038. #endif
  1039. #endif
  1040. #ifdef CGEMM3M_DEFAULT_P
  1041. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1042. #else
  1043. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1044. #endif
  1045. #ifdef ZGEMM3M_DEFAULT_P
  1046. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1047. #else
  1048. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1049. #endif
  1050. #ifdef EXPRECISION
  1051. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1052. #endif
  1053. TABLE_NAME.shgemm_p = ((TABLE_NAME.shgemm_p + SHGEMM_DEFAULT_UNROLL_M - 1)/SHGEMM_DEFAULT_UNROLL_M) * SHGEMM_DEFAULT_UNROLL_M;
  1054. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1055. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1056. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1057. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1058. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1059. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1060. #else
  1061. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1062. #endif
  1063. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1064. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1065. #else
  1066. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1067. #endif
  1068. #ifdef QUAD_PRECISION
  1069. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1070. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1071. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1072. #endif
  1073. #ifdef DEBUG
  1074. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1075. #endif
  1076. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1077. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1078. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1079. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1080. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1081. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1082. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1083. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1084. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1085. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1086. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1087. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1088. #ifdef EXPRECISION
  1089. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1090. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1091. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1092. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1093. #endif
  1094. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1095. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1096. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1097. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1098. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1099. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1100. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1101. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1102. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1103. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1104. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1105. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1106. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1107. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1108. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1109. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1110. #ifdef EXPRECISION
  1111. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1112. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1113. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1114. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1115. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1116. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1117. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1118. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1119. #endif
  1120. }
  1121. #endif //POWER
  1122. #endif //ZARCH
  1123. #endif //defined(ARCH_ARM64)