You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 27 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  48. 0, 0, 0,
  49. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  50. #ifdef HAVE_EXCLUSIVE_CACHE
  51. 1,
  52. #else
  53. 0,
  54. #endif
  55. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  56. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  57. snrm2_kTS, sasum_kTS, scopy_kTS, sdot_kTS,
  58. dsdot_kTS,
  59. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  60. sgemv_nTS, sgemv_tTS, sger_kTS,
  61. ssymv_LTS, ssymv_UTS,
  62. sgemm_kernelTS, sgemm_betaTS,
  63. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  64. sgemm_incopyTS, sgemm_itcopyTS,
  65. #else
  66. sgemm_oncopyTS, sgemm_otcopyTS,
  67. #endif
  68. sgemm_oncopyTS, sgemm_otcopyTS,
  69. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  70. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  71. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  72. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  73. #else
  74. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  75. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  76. #endif
  77. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  78. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  79. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  80. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  81. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  82. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  83. #else
  84. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  85. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  86. #endif
  87. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  88. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  89. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  90. ssymm_iutcopyTS, ssymm_iltcopyTS,
  91. #else
  92. ssymm_outcopyTS, ssymm_oltcopyTS,
  93. #endif
  94. ssymm_outcopyTS, ssymm_oltcopyTS,
  95. #ifndef NO_LAPACK
  96. sneg_tcopyTS, slaswp_ncopyTS,
  97. #else
  98. NULL,NULL,
  99. #endif
  100. 0, 0, 0,
  101. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  102. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  103. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  104. dnrm2_kTS, dasum_kTS, dcopy_kTS, ddot_kTS,
  105. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  106. dgemv_nTS, dgemv_tTS, dger_kTS,
  107. dsymv_LTS, dsymv_UTS,
  108. dgemm_kernelTS, dgemm_betaTS,
  109. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  110. dgemm_incopyTS, dgemm_itcopyTS,
  111. #else
  112. dgemm_oncopyTS, dgemm_otcopyTS,
  113. #endif
  114. dgemm_oncopyTS, dgemm_otcopyTS,
  115. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  116. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  117. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  118. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  119. #else
  120. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  121. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  122. #endif
  123. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  124. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  125. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  126. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  127. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  128. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  129. #else
  130. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  131. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  132. #endif
  133. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  134. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  135. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  136. dsymm_iutcopyTS, dsymm_iltcopyTS,
  137. #else
  138. dsymm_outcopyTS, dsymm_oltcopyTS,
  139. #endif
  140. dsymm_outcopyTS, dsymm_oltcopyTS,
  141. #ifndef NO_LAPACK
  142. dneg_tcopyTS, dlaswp_ncopyTS,
  143. #else
  144. NULL, NULL,
  145. #endif
  146. #ifdef EXPRECISION
  147. 0, 0, 0,
  148. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  149. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  150. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  151. qnrm2_kTS, qasum_kTS, qcopy_kTS, qdot_kTS,
  152. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  153. qgemv_nTS, qgemv_tTS, qger_kTS,
  154. qsymv_LTS, qsymv_UTS,
  155. qgemm_kernelTS, qgemm_betaTS,
  156. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  157. qgemm_incopyTS, qgemm_itcopyTS,
  158. #else
  159. qgemm_oncopyTS, qgemm_otcopyTS,
  160. #endif
  161. qgemm_oncopyTS, qgemm_otcopyTS,
  162. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  163. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  164. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  165. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  166. #else
  167. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  168. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  169. #endif
  170. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  171. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  172. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  173. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  174. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  175. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  176. #else
  177. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  178. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  179. #endif
  180. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  181. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  182. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  183. qsymm_iutcopyTS, qsymm_iltcopyTS,
  184. #else
  185. qsymm_outcopyTS, qsymm_oltcopyTS,
  186. #endif
  187. qsymm_outcopyTS, qsymm_oltcopyTS,
  188. #ifndef NO_LAPACK
  189. qneg_tcopyTS, qlaswp_ncopyTS,
  190. #else
  191. NULL, NULL,
  192. #endif
  193. #endif
  194. 0, 0, 0,
  195. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  196. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  197. cnrm2_kTS, casum_kTS, ccopy_kTS,
  198. cdotu_kTS, cdotc_kTS, csrot_kTS,
  199. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  200. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  201. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  202. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  203. csymv_LTS, csymv_UTS,
  204. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  205. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  206. cgemm_betaTS,
  207. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  208. cgemm_incopyTS, cgemm_itcopyTS,
  209. #else
  210. cgemm_oncopyTS, cgemm_otcopyTS,
  211. #endif
  212. cgemm_oncopyTS, cgemm_otcopyTS,
  213. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  214. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  215. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  216. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  217. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  218. #else
  219. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  220. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  221. #endif
  222. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  223. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  224. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  225. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  226. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  227. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  228. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  229. #else
  230. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  231. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  232. #endif
  233. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  234. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  235. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  236. csymm_iutcopyTS, csymm_iltcopyTS,
  237. #else
  238. csymm_outcopyTS, csymm_oltcopyTS,
  239. #endif
  240. csymm_outcopyTS, csymm_oltcopyTS,
  241. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  242. chemm_iutcopyTS, chemm_iltcopyTS,
  243. #else
  244. chemm_outcopyTS, chemm_oltcopyTS,
  245. #endif
  246. chemm_outcopyTS, chemm_oltcopyTS,
  247. cgemm3m_kernelTS,
  248. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  249. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  250. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  251. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  252. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  253. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  254. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  255. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  256. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  257. csymm3m_oucopybTS, csymm3m_olcopybTS,
  258. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  259. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  260. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  261. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  262. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  263. chemm3m_oucopybTS, chemm3m_olcopybTS,
  264. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  265. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  266. #ifndef NO_LAPACK
  267. cneg_tcopyTS, claswp_ncopyTS,
  268. #else
  269. NULL, NULL,
  270. #endif
  271. 0, 0, 0,
  272. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  273. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  274. znrm2_kTS, zasum_kTS, zcopy_kTS,
  275. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  276. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  277. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  278. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  279. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  280. zsymv_LTS, zsymv_UTS,
  281. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  282. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  283. zgemm_betaTS,
  284. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  285. zgemm_incopyTS, zgemm_itcopyTS,
  286. #else
  287. zgemm_oncopyTS, zgemm_otcopyTS,
  288. #endif
  289. zgemm_oncopyTS, zgemm_otcopyTS,
  290. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  291. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  292. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  293. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  294. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  295. #else
  296. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  297. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  298. #endif
  299. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  300. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  301. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  302. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  303. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  304. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  305. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  306. #else
  307. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  308. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  309. #endif
  310. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  311. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  312. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  313. zsymm_iutcopyTS, zsymm_iltcopyTS,
  314. #else
  315. zsymm_outcopyTS, zsymm_oltcopyTS,
  316. #endif
  317. zsymm_outcopyTS, zsymm_oltcopyTS,
  318. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  319. zhemm_iutcopyTS, zhemm_iltcopyTS,
  320. #else
  321. zhemm_outcopyTS, zhemm_oltcopyTS,
  322. #endif
  323. zhemm_outcopyTS, zhemm_oltcopyTS,
  324. zgemm3m_kernelTS,
  325. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  326. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  327. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  328. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  329. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  330. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  331. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  332. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  333. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  334. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  335. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  336. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  337. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  338. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  339. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  340. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  341. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  342. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  343. #ifndef NO_LAPACK
  344. zneg_tcopyTS, zlaswp_ncopyTS,
  345. #else
  346. NULL, NULL,
  347. #endif
  348. #ifdef EXPRECISION
  349. 0, 0, 0,
  350. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  351. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  352. xnrm2_kTS, xasum_kTS, xcopy_kTS,
  353. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  354. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  355. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  356. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  357. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  358. xsymv_LTS, xsymv_UTS,
  359. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  360. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  361. xgemm_betaTS,
  362. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  363. xgemm_incopyTS, xgemm_itcopyTS,
  364. #else
  365. xgemm_oncopyTS, xgemm_otcopyTS,
  366. #endif
  367. xgemm_oncopyTS, xgemm_otcopyTS,
  368. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  369. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  370. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  371. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  372. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  373. #else
  374. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  375. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  376. #endif
  377. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  378. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  379. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  380. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  381. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  382. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  383. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  384. #else
  385. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  386. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  387. #endif
  388. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  389. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  390. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  391. xsymm_iutcopyTS, xsymm_iltcopyTS,
  392. #else
  393. xsymm_outcopyTS, xsymm_oltcopyTS,
  394. #endif
  395. xsymm_outcopyTS, xsymm_oltcopyTS,
  396. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  397. xhemm_iutcopyTS, xhemm_iltcopyTS,
  398. #else
  399. xhemm_outcopyTS, xhemm_oltcopyTS,
  400. #endif
  401. xhemm_outcopyTS, xhemm_oltcopyTS,
  402. xgemm3m_kernelTS,
  403. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  404. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  405. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  406. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  407. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  408. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  409. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  410. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  411. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  412. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  413. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  414. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  415. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  416. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  417. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  418. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  419. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  420. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  421. #ifndef NO_LAPACK
  422. xneg_tcopyTS, xlaswp_ncopyTS,
  423. #else
  424. NULL, NULL,
  425. #endif
  426. #endif
  427. init_parameter,
  428. SNUMOPT, DNUMOPT, QNUMOPT,
  429. };
  430. #ifdef ARCH_X86
  431. static int get_l2_size_old(void){
  432. int i, eax, ebx, ecx, edx, cpuid_level;
  433. int info[15];
  434. cpuid(2, &eax, &ebx, &ecx, &edx);
  435. info[ 0] = BITMASK(eax, 8, 0xff);
  436. info[ 1] = BITMASK(eax, 16, 0xff);
  437. info[ 2] = BITMASK(eax, 24, 0xff);
  438. info[ 3] = BITMASK(ebx, 0, 0xff);
  439. info[ 4] = BITMASK(ebx, 8, 0xff);
  440. info[ 5] = BITMASK(ebx, 16, 0xff);
  441. info[ 6] = BITMASK(ebx, 24, 0xff);
  442. info[ 7] = BITMASK(ecx, 0, 0xff);
  443. info[ 8] = BITMASK(ecx, 8, 0xff);
  444. info[ 9] = BITMASK(ecx, 16, 0xff);
  445. info[10] = BITMASK(ecx, 24, 0xff);
  446. info[11] = BITMASK(edx, 0, 0xff);
  447. info[12] = BITMASK(edx, 8, 0xff);
  448. info[13] = BITMASK(edx, 16, 0xff);
  449. info[14] = BITMASK(edx, 24, 0xff);
  450. for (i = 0; i < 15; i++){
  451. switch (info[i]){
  452. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  453. case 0x1a :
  454. return 96;
  455. case 0x39 :
  456. case 0x3b :
  457. case 0x41 :
  458. case 0x79 :
  459. case 0x81 :
  460. return 128;
  461. case 0x3a :
  462. return 192;
  463. case 0x21 :
  464. case 0x3c :
  465. case 0x42 :
  466. case 0x7a :
  467. case 0x7e :
  468. case 0x82 :
  469. return 256;
  470. case 0x3d :
  471. return 384;
  472. case 0x3e :
  473. case 0x43 :
  474. case 0x7b :
  475. case 0x7f :
  476. case 0x83 :
  477. case 0x86 :
  478. return 512;
  479. case 0x44 :
  480. case 0x78 :
  481. case 0x7c :
  482. case 0x84 :
  483. case 0x87 :
  484. return 1024;
  485. case 0x45 :
  486. case 0x7d :
  487. case 0x85 :
  488. return 2048;
  489. case 0x48 :
  490. return 3184;
  491. case 0x49 :
  492. return 4096;
  493. case 0x4e :
  494. return 6144;
  495. }
  496. }
  497. return 0;
  498. }
  499. #endif
  500. static __inline__ int get_l2_size(void){
  501. int eax, ebx, ecx, edx, l2;
  502. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  503. l2 = BITMASK(ecx, 16, 0xffff);
  504. #ifndef ARCH_X86
  505. return l2;
  506. #else
  507. if (l2 > 0) return l2;
  508. return get_l2_size_old();
  509. #endif
  510. }
  511. static __inline__ int get_l3_size(void){
  512. int eax, ebx, ecx, edx;
  513. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  514. return BITMASK(edx, 18, 0x3fff) * 512;
  515. }
  516. static void init_parameter(void) {
  517. int l2 = get_l2_size();
  518. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  519. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  520. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  521. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  522. #ifdef EXPRECISION
  523. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  524. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  525. #endif
  526. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH)
  527. #ifdef DEBUG
  528. fprintf(stderr, "Katmai, Coppermine, Banias\n");
  529. #endif
  530. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  531. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  532. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  533. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  534. #ifdef EXPRECISION
  535. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  536. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  537. #endif
  538. #endif
  539. #ifdef CORE_NORTHWOOD
  540. #ifdef DEBUG
  541. fprintf(stderr, "Northwood\n");
  542. #endif
  543. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  544. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  545. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  546. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  547. #ifdef EXPRECISION
  548. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  549. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  550. #endif
  551. #endif
  552. #ifdef ATOM
  553. #ifdef DEBUG
  554. fprintf(stderr, "Atom\n");
  555. #endif
  556. TABLE_NAME.sgemm_p = 256;
  557. TABLE_NAME.dgemm_p = 128;
  558. TABLE_NAME.cgemm_p = 128;
  559. TABLE_NAME.zgemm_p = 64;
  560. #ifdef EXPRECISION
  561. TABLE_NAME.qgemm_p = 64;
  562. TABLE_NAME.xgemm_p = 32;
  563. #endif
  564. #endif
  565. #ifdef CORE_PRESCOTT
  566. #ifdef DEBUG
  567. fprintf(stderr, "Prescott\n");
  568. #endif
  569. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  570. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  571. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  572. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  573. #ifdef EXPRECISION
  574. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  575. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  576. #endif
  577. #endif
  578. #ifdef CORE2
  579. #ifdef DEBUG
  580. fprintf(stderr, "Core2\n");
  581. #endif
  582. TABLE_NAME.sgemm_p = 92 * (l2 >> 9);
  583. TABLE_NAME.dgemm_p = 46 * (l2 >> 9);
  584. TABLE_NAME.cgemm_p = 46 * (l2 >> 9);
  585. TABLE_NAME.zgemm_p = 23 * (l2 >> 9);
  586. #ifdef EXPRECISION
  587. TABLE_NAME.qgemm_p = 92 * (l2 >> 9);
  588. TABLE_NAME.xgemm_p = 46 * (l2 >> 9);
  589. #endif
  590. #endif
  591. #ifdef PENRYN
  592. #ifdef DEBUG
  593. fprintf(stderr, "Penryn\n");
  594. #endif
  595. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  596. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  597. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  598. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  599. #ifdef EXPRECISION
  600. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  601. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  602. #endif
  603. #endif
  604. #ifdef NEHALEM
  605. #ifdef DEBUG
  606. fprintf(stderr, "Nehalem\n");
  607. #endif
  608. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  609. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  610. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  611. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  612. #ifdef EXPRECISION
  613. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  614. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  615. #endif
  616. #endif
  617. #ifdef OPTERON
  618. #ifdef DEBUG
  619. fprintf(stderr, "Opteron\n");
  620. #endif
  621. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  622. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  623. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  624. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  625. #ifdef EXPRECISION
  626. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  627. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  628. #endif
  629. #endif
  630. #ifdef BARCELONA
  631. #ifdef DEBUG
  632. fprintf(stderr, "Barcelona\n");
  633. #endif
  634. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  635. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  636. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  637. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  638. #ifdef EXPRECISION
  639. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  640. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  641. #endif
  642. #endif
  643. #ifdef NANO
  644. #ifdef DEBUG
  645. fprintf(stderr, "NANO\n");
  646. #endif
  647. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  648. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  649. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  650. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  651. #ifdef EXPRECISION
  652. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  653. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  654. #endif
  655. #endif
  656. TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
  657. TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
  658. TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1);
  659. TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1);
  660. #ifdef QUAD_PRECISION
  661. TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
  662. TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1);
  663. #endif
  664. #ifdef DEBUG
  665. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  666. #endif
  667. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  668. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  669. + TABLE_NAME.align) & ~TABLE_NAME.align)
  670. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  671. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  672. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  673. + TABLE_NAME.align) & ~TABLE_NAME.align)
  674. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  675. #ifdef EXPRECISION
  676. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  677. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  678. + TABLE_NAME.align) & ~TABLE_NAME.align)
  679. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  680. #endif
  681. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  682. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  683. + TABLE_NAME.align) & ~TABLE_NAME.align)
  684. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  685. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  686. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  687. + TABLE_NAME.align) & ~TABLE_NAME.align)
  688. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  689. #ifdef EXPRECISION
  690. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  691. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  692. + TABLE_NAME.align) & ~TABLE_NAME.align)
  693. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  694. #endif
  695. }

OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.