You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 53 kB

6 years ago
6 years ago
6 years ago
2 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. SBGEMM_ALIGN_K,
  58. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  59. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  60. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  61. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  62. dsdot_kTS,
  63. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  64. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  65. ssymv_LTS, ssymv_UTS,
  66. sbgemm_kernelTS, sbgemm_betaTS,
  67. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  68. sbgemm_incopyTS, sbgemm_itcopyTS,
  69. #else
  70. sbgemm_oncopyTS, sbgemm_otcopyTS,
  71. #endif
  72. sbgemm_oncopyTS, sbgemm_otcopyTS,
  73. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  74. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  75. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  76. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  77. #else
  78. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  79. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  80. #endif
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  84. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  85. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  86. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  87. #else
  88. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  89. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  90. #endif
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  94. ssymm_iutcopyTS, ssymm_iltcopyTS,
  95. #else
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #endif
  98. ssymm_outcopyTS, ssymm_oltcopyTS,
  99. #ifndef NO_LAPACK
  100. sneg_tcopyTS, slaswp_ncopyTS,
  101. #else
  102. NULL,NULL,
  103. #endif
  104. #ifdef SMALL_MATRIX_OPT
  105. sbgemm_small_matrix_permitTS,
  106. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  107. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  108. #endif
  109. #endif
  110. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  111. 0, 0, 0,
  112. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  113. #ifdef SGEMM_DEFAULT_UNROLL_MN
  114. SGEMM_DEFAULT_UNROLL_MN,
  115. #else
  116. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  117. #endif
  118. #endif
  119. #ifdef HAVE_EXCLUSIVE_CACHE
  120. 1,
  121. #else
  122. 0,
  123. #endif
  124. #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  125. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  126. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  127. snrm2_kTS, sasum_kTS,
  128. #endif
  129. #if BUILD_SINGLE == 1
  130. ssum_kTS,
  131. #endif
  132. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  133. scopy_kTS, sdot_kTS,
  134. // dsdot_kTS,
  135. srot_kTS, saxpy_kTS,
  136. #endif
  137. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  138. sscal_kTS,
  139. #endif
  140. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  141. sswap_kTS,
  142. sgemv_nTS, sgemv_tTS,
  143. #endif
  144. #if BUILD_SINGLE == 1
  145. sger_kTS,
  146. ssymv_LTS, ssymv_UTS,
  147. #endif
  148. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  149. #ifdef ARCH_X86_64
  150. sgemm_directTS,
  151. sgemm_direct_performantTS,
  152. #endif
  153. sgemm_kernelTS, sgemm_betaTS,
  154. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  155. sgemm_incopyTS, sgemm_itcopyTS,
  156. #else
  157. sgemm_oncopyTS, sgemm_otcopyTS,
  158. #endif
  159. sgemm_oncopyTS, sgemm_otcopyTS,
  160. #endif
  161. #if BUILD_SINGLE == 1
  162. #ifdef SMALL_MATRIX_OPT
  163. sgemm_small_matrix_permitTS,
  164. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  165. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  166. #endif
  167. #endif
  168. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  169. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  170. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  171. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  172. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  173. #else
  174. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  175. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  176. #endif
  177. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  178. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  179. #endif
  180. #if BUILD_SINGLE == 1
  181. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  182. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  183. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  184. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  185. #else
  186. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  187. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  188. #endif
  189. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  190. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  191. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  192. ssymm_iutcopyTS, ssymm_iltcopyTS,
  193. #else
  194. ssymm_outcopyTS, ssymm_oltcopyTS,
  195. #endif
  196. ssymm_outcopyTS, ssymm_oltcopyTS,
  197. #endif
  198. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  199. #ifndef NO_LAPACK
  200. sneg_tcopyTS, slaswp_ncopyTS,
  201. #else
  202. NULL,NULL,
  203. #endif
  204. #endif
  205. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  206. 0, 0, 0,
  207. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  208. #ifdef DGEMM_DEFAULT_UNROLL_MN
  209. DGEMM_DEFAULT_UNROLL_MN,
  210. #else
  211. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  212. #endif
  213. #endif
  214. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  215. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  216. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  217. dnrm2_kTS, dasum_kTS,
  218. #endif
  219. #if (BUILD_DOUBLE==1)
  220. dsum_kTS,
  221. #endif
  222. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  223. dcopy_kTS, ddot_kTS,
  224. #endif
  225. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  226. dsdot_kTS,
  227. #endif
  228. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  229. drot_kTS,
  230. daxpy_kTS,
  231. dscal_kTS,
  232. dswap_kTS,
  233. dgemv_nTS, dgemv_tTS,
  234. #endif
  235. #if (BUILD_DOUBLE==1)
  236. dger_kTS,
  237. dsymv_LTS, dsymv_UTS,
  238. #endif
  239. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  240. dgemm_kernelTS, dgemm_betaTS,
  241. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  242. dgemm_incopyTS, dgemm_itcopyTS,
  243. #else
  244. dgemm_oncopyTS, dgemm_otcopyTS,
  245. #endif
  246. dgemm_oncopyTS, dgemm_otcopyTS,
  247. #endif
  248. #if (BUILD_DOUBLE==1)
  249. #ifdef SMALL_MATRIX_OPT
  250. dgemm_small_matrix_permitTS,
  251. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  252. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  253. #endif
  254. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  255. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  256. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  257. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  258. #else
  259. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  260. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  261. #endif
  262. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  263. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  264. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  265. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  266. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  267. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  268. #else
  269. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  270. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  271. #endif
  272. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  273. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  274. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  275. dsymm_iutcopyTS, dsymm_iltcopyTS,
  276. #else
  277. dsymm_outcopyTS, dsymm_oltcopyTS,
  278. #endif
  279. dsymm_outcopyTS, dsymm_oltcopyTS,
  280. #ifndef NO_LAPACK
  281. dneg_tcopyTS, dlaswp_ncopyTS,
  282. #else
  283. NULL, NULL,
  284. #endif
  285. #endif
  286. #ifdef EXPRECISION
  287. 0, 0, 0,
  288. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  289. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  290. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  291. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  292. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  293. qgemv_nTS, qgemv_tTS, qger_kTS,
  294. qsymv_LTS, qsymv_UTS,
  295. qgemm_kernelTS, qgemm_betaTS,
  296. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  297. qgemm_incopyTS, qgemm_itcopyTS,
  298. #else
  299. qgemm_oncopyTS, qgemm_otcopyTS,
  300. #endif
  301. qgemm_oncopyTS, qgemm_otcopyTS,
  302. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  303. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  304. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  305. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  306. #else
  307. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  308. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  309. #endif
  310. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  311. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  312. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  313. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  314. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  315. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  316. #else
  317. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  318. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  319. #endif
  320. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  321. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  322. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  323. qsymm_iutcopyTS, qsymm_iltcopyTS,
  324. #else
  325. qsymm_outcopyTS, qsymm_oltcopyTS,
  326. #endif
  327. qsymm_outcopyTS, qsymm_oltcopyTS,
  328. #ifndef NO_LAPACK
  329. qneg_tcopyTS, qlaswp_ncopyTS,
  330. #else
  331. NULL, NULL,
  332. #endif
  333. #endif
  334. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  335. 0, 0, 0,
  336. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  337. #ifdef CGEMM_DEFAULT_UNROLL_MN
  338. CGEMM_DEFAULT_UNROLL_MN,
  339. #else
  340. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  341. #endif
  342. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  343. #endif
  344. #if (BUILD_COMPLEX)
  345. cnrm2_kTS, casum_kTS, csum_kTS,
  346. #endif
  347. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  348. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  349. #endif
  350. #if (BUILD_COMPLEX)
  351. csrot_kTS,
  352. #endif
  353. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  354. caxpy_kTS,
  355. caxpyc_kTS,
  356. cscal_kTS,
  357. cswap_kTS,
  358. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  359. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  360. #endif
  361. #if (BUILD_COMPLEX)
  362. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  363. csymv_LTS, csymv_UTS,
  364. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  365. #endif
  366. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  367. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  368. cgemm_betaTS,
  369. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  370. cgemm_incopyTS, cgemm_itcopyTS,
  371. #else
  372. cgemm_oncopyTS, cgemm_otcopyTS,
  373. #endif
  374. cgemm_oncopyTS, cgemm_otcopyTS,
  375. #ifdef SMALL_MATRIX_OPT
  376. cgemm_small_matrix_permitTS,
  377. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  378. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  379. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  380. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  381. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  382. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  383. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  384. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  385. #endif
  386. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  387. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  388. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  389. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  390. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  391. #else
  392. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  393. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  394. #endif
  395. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  396. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  397. #endif
  398. #if (BUILD_COMPLEX)
  399. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  400. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  401. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  402. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  403. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  404. #else
  405. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  406. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  407. #endif
  408. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  409. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  410. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  411. csymm_iutcopyTS, csymm_iltcopyTS,
  412. #else
  413. csymm_outcopyTS, csymm_oltcopyTS,
  414. #endif
  415. csymm_outcopyTS, csymm_oltcopyTS,
  416. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  417. chemm_iutcopyTS, chemm_iltcopyTS,
  418. #else
  419. chemm_outcopyTS, chemm_oltcopyTS,
  420. #endif
  421. chemm_outcopyTS, chemm_oltcopyTS,
  422. 0, 0, 0,
  423. #if (USE_GEMM3M)
  424. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  425. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  426. #else
  427. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  428. #endif
  429. cgemm3m_kernelTS,
  430. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  431. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  432. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  433. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  434. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  435. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  436. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  437. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  438. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  439. csymm3m_oucopybTS, csymm3m_olcopybTS,
  440. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  441. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  442. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  443. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  444. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  445. chemm3m_oucopybTS, chemm3m_olcopybTS,
  446. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  447. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  448. #else
  449. 0, 0, 0,
  450. NULL,
  451. NULL, NULL,
  452. NULL, NULL,
  453. NULL, NULL,
  454. NULL, NULL,
  455. NULL, NULL,
  456. NULL, NULL,
  457. NULL, NULL,
  458. NULL, NULL,
  459. NULL, NULL,
  460. NULL, NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. #endif
  470. #endif
  471. #if (BUILD_COMPLEX || BUILD_COMPLEX16)
  472. #ifndef NO_LAPACK
  473. cneg_tcopyTS,
  474. claswp_ncopyTS,
  475. #else
  476. NULL, NULL,
  477. #endif
  478. #endif
  479. #if BUILD_COMPLEX16 == 1
  480. 0, 0, 0,
  481. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  482. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  483. ZGEMM_DEFAULT_UNROLL_MN,
  484. #else
  485. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  486. #endif
  487. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  488. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  489. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  490. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  491. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  492. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  493. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  494. zsymv_LTS, zsymv_UTS,
  495. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  496. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  497. zgemm_betaTS,
  498. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  499. zgemm_incopyTS, zgemm_itcopyTS,
  500. #else
  501. zgemm_oncopyTS, zgemm_otcopyTS,
  502. #endif
  503. zgemm_oncopyTS, zgemm_otcopyTS,
  504. #ifdef SMALL_MATRIX_OPT
  505. zgemm_small_matrix_permitTS,
  506. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  507. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  508. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  509. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  510. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  511. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  512. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  513. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  514. #endif
  515. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  516. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  517. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  518. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  519. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  520. #else
  521. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  522. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  523. #endif
  524. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  525. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  526. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  527. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  528. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  529. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  530. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  531. #else
  532. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  533. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  534. #endif
  535. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  536. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  537. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  538. zsymm_iutcopyTS, zsymm_iltcopyTS,
  539. #else
  540. zsymm_outcopyTS, zsymm_oltcopyTS,
  541. #endif
  542. zsymm_outcopyTS, zsymm_oltcopyTS,
  543. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  544. zhemm_iutcopyTS, zhemm_iltcopyTS,
  545. #else
  546. zhemm_outcopyTS, zhemm_oltcopyTS,
  547. #endif
  548. zhemm_outcopyTS, zhemm_oltcopyTS,
  549. 0, 0, 0,
  550. #if (USE_GEMM3M)
  551. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  552. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  553. #else
  554. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  555. #endif
  556. zgemm3m_kernelTS,
  557. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  558. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  559. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  560. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  561. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  562. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  563. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  564. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  565. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  566. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  567. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  568. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  569. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  570. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  571. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  572. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  573. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  574. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  575. #else
  576. 0, 0, 0,
  577. NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. NULL, NULL,
  583. NULL, NULL,
  584. NULL, NULL,
  585. NULL, NULL,
  586. NULL, NULL,
  587. NULL, NULL,
  588. NULL, NULL,
  589. NULL, NULL,
  590. NULL, NULL,
  591. NULL, NULL,
  592. NULL, NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. NULL, NULL,
  596. #endif
  597. #ifndef NO_LAPACK
  598. zneg_tcopyTS, zlaswp_ncopyTS,
  599. #else
  600. NULL, NULL,
  601. #endif
  602. #endif
  603. #ifdef EXPRECISION
  604. 0, 0, 0,
  605. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  606. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  607. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  608. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  609. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  610. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  611. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  612. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  613. xsymv_LTS, xsymv_UTS,
  614. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  615. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  616. xgemm_betaTS,
  617. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  618. xgemm_incopyTS, xgemm_itcopyTS,
  619. #else
  620. xgemm_oncopyTS, xgemm_otcopyTS,
  621. #endif
  622. xgemm_oncopyTS, xgemm_otcopyTS,
  623. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  624. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  625. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  626. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  627. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  628. #else
  629. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  630. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  631. #endif
  632. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  633. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  634. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  635. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  636. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  637. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  638. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  639. #else
  640. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  641. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  642. #endif
  643. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  644. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  645. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  646. xsymm_iutcopyTS, xsymm_iltcopyTS,
  647. #else
  648. xsymm_outcopyTS, xsymm_oltcopyTS,
  649. #endif
  650. xsymm_outcopyTS, xsymm_oltcopyTS,
  651. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  652. xhemm_iutcopyTS, xhemm_iltcopyTS,
  653. #else
  654. xhemm_outcopyTS, xhemm_oltcopyTS,
  655. #endif
  656. xhemm_outcopyTS, xhemm_oltcopyTS,
  657. 0, 0, 0,
  658. #if (USE_GEMM3M)
  659. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  660. xgemm3m_kernelTS,
  661. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  662. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  663. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  664. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  665. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  666. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  667. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  668. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  669. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  670. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  671. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  672. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  673. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  674. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  675. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  676. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  677. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  678. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  679. #else
  680. 0, 0, 0,
  681. NULL,
  682. NULL, NULL,
  683. NULL, NULL,
  684. NULL, NULL,
  685. NULL, NULL,
  686. NULL, NULL,
  687. NULL, NULL,
  688. NULL, NULL,
  689. NULL, NULL,
  690. NULL, NULL,
  691. NULL, NULL,
  692. NULL, NULL,
  693. NULL, NULL,
  694. NULL, NULL,
  695. NULL, NULL,
  696. NULL, NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. NULL, NULL,
  700. #endif
  701. #ifndef NO_LAPACK
  702. xneg_tcopyTS, xlaswp_ncopyTS,
  703. #else
  704. NULL, NULL,
  705. #endif
  706. #endif
  707. init_parameter,
  708. SNUMOPT, DNUMOPT, QNUMOPT,
  709. #if BUILD_SINGLE == 1
  710. saxpby_kTS,
  711. #endif
  712. #if BUILD_DOUBLE == 1
  713. daxpby_kTS,
  714. #endif
  715. #if BUILD_COMPLEX == 1
  716. caxpby_kTS,
  717. #endif
  718. #if BUILD_COMPLEX16== 1
  719. zaxpby_kTS,
  720. #endif
  721. #if BUILD_SINGLE == 1
  722. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  723. #endif
  724. #if BUILD_DOUBLE== 1
  725. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  726. #endif
  727. #if BUILD_COMPLEX == 1
  728. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  729. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  730. #endif
  731. #if BUILD_COMPLEX16 == 1
  732. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  733. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  734. #endif
  735. #if BUILD_SINGLE == 1
  736. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  737. #endif
  738. #if BUILD_DOUBLE== 1
  739. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  740. #endif
  741. #if BUILD_COMPLEX== 1
  742. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  743. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  744. #endif
  745. #if BUILD_COMPLEX16==1
  746. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  747. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  748. #endif
  749. #if BUILD_SINGLE == 1
  750. sgeadd_kTS,
  751. #endif
  752. #if BUILD_DOUBLE==1
  753. dgeadd_kTS,
  754. #endif
  755. #if BUILD_COMPLEX==1
  756. cgeadd_kTS,
  757. #endif
  758. #if BUILD_COMPLEX16==1
  759. zgeadd_kTS,
  760. #endif
  761. 1, // align_k
  762. };
  763. #if (ARCH_ARM64)
  764. static void init_parameter(void) {
  765. #if (BUILD_BFLOAT16)
  766. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  767. #endif
  768. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  769. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  770. #endif
  771. #if BUILD_DOUBLE == 1
  772. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  773. #endif
  774. #if BUILD_COMPLEX==1
  775. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  776. #endif
  777. #if BUILD_COMPLEX16==1
  778. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  779. #endif
  780. #if (BUILD_BFLOAT16)
  781. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  782. #endif
  783. #if BUILD_SINGLE == 1
  784. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  785. #endif
  786. #if BUILD_DOUBLE== 1
  787. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  788. #endif
  789. #if BUILD_COMPLEX== 1
  790. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  791. #endif
  792. #if BUILD_COMPLEX16==1
  793. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  794. #endif
  795. #if (BUILD_BFLOAT16)
  796. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  797. #endif
  798. #if BUILD_SINGLE == 1
  799. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  800. #endif
  801. #if BUILD_DOUBLE==1
  802. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  803. #endif
  804. #if BUILD_COMPLEX==1
  805. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  806. #endif
  807. #if BUILD_COMPLEX16==1
  808. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  809. #endif
  810. #ifdef EXPRECISION
  811. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  812. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  813. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  814. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  815. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  816. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  817. #endif
  818. #if (USE_GEMM3M)
  819. #ifdef CGEMM3M_DEFAULT_P
  820. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  821. #else
  822. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  823. #endif
  824. #ifdef ZGEMM3M_DEFAULT_P
  825. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  826. #else
  827. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  828. #endif
  829. #ifdef CGEMM3M_DEFAULT_Q
  830. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  831. #else
  832. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  833. #endif
  834. #ifdef ZGEMM3M_DEFAULT_Q
  835. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  836. #else
  837. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  838. #endif
  839. #ifdef CGEMM3M_DEFAULT_R
  840. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  841. #else
  842. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  843. #endif
  844. #ifdef ZGEMM3M_DEFAULT_R
  845. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  846. #else
  847. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  848. #endif
  849. #ifdef EXPRECISION
  850. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  851. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  852. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  853. #endif
  854. #endif
  855. }
  856. #else // (ARCH_ARM64)
  857. #if defined(ARCH_MIPS64)
  858. static void init_parameter(void) {
  859. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  860. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  861. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  862. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  863. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  864. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  865. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  866. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  867. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  868. TABLE_NAME.dgemm_r = 640;
  869. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  870. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  871. #ifdef EXPRECISION
  872. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  873. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  874. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  875. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  876. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  877. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  878. #endif
  879. #if defined(USE_GEMM3M)
  880. #ifdef CGEMM3M_DEFAULT_P
  881. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  882. #else
  883. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  884. #endif
  885. #ifdef ZGEMM3M_DEFAULT_P
  886. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  887. #else
  888. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  889. #endif
  890. #ifdef CGEMM3M_DEFAULT_Q
  891. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  892. #else
  893. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  894. #endif
  895. #ifdef ZGEMM3M_DEFAULT_Q
  896. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  897. #else
  898. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  899. #endif
  900. #ifdef CGEMM3M_DEFAULT_R
  901. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  902. #else
  903. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  904. #endif
  905. #ifdef ZGEMM3M_DEFAULT_R
  906. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  907. #else
  908. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  909. #endif
  910. #ifdef EXPRECISION
  911. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  912. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  913. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  914. #endif
  915. #endif
  916. }
  917. #else // (ARCH_MIPS64)
  918. #if (ARCH_LOONGARCH64)
  919. static void init_parameter(void) {
  920. #ifdef BUILD_BFLOAT16
  921. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  922. #endif
  923. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  924. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  925. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  926. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  927. #ifdef BUILD_BFLOAT16
  928. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  929. #endif
  930. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  931. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  932. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  933. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  934. #ifdef BUILD_BFLOAT16
  935. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  936. #endif
  937. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  938. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  939. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  940. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  941. }
  942. #else // (ARCH_LOONGARCH64)
  943. #if (ARCH_POWER)
  944. static void init_parameter(void) {
  945. #ifdef BUILD_BFLOAT16
  946. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  947. #endif
  948. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  949. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  950. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  951. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  952. #ifdef BUILD_BFLOAT16
  953. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  954. #endif
  955. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  956. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  957. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  958. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  959. #ifdef BUILD_BFLOAT16
  960. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  961. #endif
  962. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  963. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  964. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  965. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  966. }
  967. #else //POWER
  968. #if (ARCH_ZARCH)
  969. static void init_parameter(void) {
  970. #ifdef BUILD_BFLOAT16
  971. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  972. #endif
  973. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  974. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  975. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  976. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  977. #ifdef BUILD_BFLOAT16
  978. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  979. #endif
  980. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  981. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  982. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  983. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  984. #ifdef BUILD_BFLOAT16
  985. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  986. #endif
  987. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  988. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  989. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  990. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  991. }
  992. #else //ZARCH
  993. #ifdef ARCH_X86
  994. static int get_l2_size_old(void){
  995. int i, eax, ebx, ecx, edx, cpuid_level;
  996. int info[15];
  997. cpuid(2, &eax, &ebx, &ecx, &edx);
  998. info[ 0] = BITMASK(eax, 8, 0xff);
  999. info[ 1] = BITMASK(eax, 16, 0xff);
  1000. info[ 2] = BITMASK(eax, 24, 0xff);
  1001. info[ 3] = BITMASK(ebx, 0, 0xff);
  1002. info[ 4] = BITMASK(ebx, 8, 0xff);
  1003. info[ 5] = BITMASK(ebx, 16, 0xff);
  1004. info[ 6] = BITMASK(ebx, 24, 0xff);
  1005. info[ 7] = BITMASK(ecx, 0, 0xff);
  1006. info[ 8] = BITMASK(ecx, 8, 0xff);
  1007. info[ 9] = BITMASK(ecx, 16, 0xff);
  1008. info[10] = BITMASK(ecx, 24, 0xff);
  1009. info[11] = BITMASK(edx, 0, 0xff);
  1010. info[12] = BITMASK(edx, 8, 0xff);
  1011. info[13] = BITMASK(edx, 16, 0xff);
  1012. info[14] = BITMASK(edx, 24, 0xff);
  1013. for (i = 0; i < 15; i++){
  1014. switch (info[i]){
  1015. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1016. case 0x1a :
  1017. return 96;
  1018. case 0x39 :
  1019. case 0x3b :
  1020. case 0x41 :
  1021. case 0x79 :
  1022. case 0x81 :
  1023. return 128;
  1024. case 0x3a :
  1025. return 192;
  1026. case 0x21 :
  1027. case 0x3c :
  1028. case 0x42 :
  1029. case 0x7a :
  1030. case 0x7e :
  1031. case 0x82 :
  1032. return 256;
  1033. case 0x3d :
  1034. return 384;
  1035. case 0x3e :
  1036. case 0x43 :
  1037. case 0x7b :
  1038. case 0x7f :
  1039. case 0x83 :
  1040. case 0x86 :
  1041. return 512;
  1042. case 0x44 :
  1043. case 0x78 :
  1044. case 0x7c :
  1045. case 0x84 :
  1046. case 0x87 :
  1047. return 1024;
  1048. case 0x45 :
  1049. case 0x7d :
  1050. case 0x85 :
  1051. return 2048;
  1052. case 0x48 :
  1053. return 3184;
  1054. case 0x49 :
  1055. return 4096;
  1056. case 0x4e :
  1057. return 6144;
  1058. }
  1059. }
  1060. // return 0;
  1061. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1062. return 256;
  1063. }
  1064. #endif
  1065. static __inline__ int get_l2_size(void){
  1066. int eax, ebx, ecx, edx, l2;
  1067. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1068. l2 = BITMASK(ecx, 16, 0xffff);
  1069. #ifndef ARCH_X86
  1070. if (l2 <= 0) {
  1071. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1072. return 256;
  1073. }
  1074. return l2;
  1075. #else
  1076. if (l2 > 0) return l2;
  1077. return get_l2_size_old();
  1078. #endif
  1079. }
  1080. static __inline__ int get_l3_size(void){
  1081. int eax, ebx, ecx, edx;
  1082. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1083. return BITMASK(edx, 18, 0x3fff) * 512;
  1084. }
  1085. static void init_parameter(void) {
  1086. int l2 = get_l2_size();
  1087. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1088. /* where the GEMM unrolling parameters do not depend on l2 */
  1089. #ifdef BUILD_BFLOAT16
  1090. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1091. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1092. #endif
  1093. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1094. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1095. #endif
  1096. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1097. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1098. #endif
  1099. #if BUILD_COMPLEX == 1
  1100. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1101. #endif
  1102. #if BUILD_COMPLEX16==1
  1103. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1104. #endif
  1105. #if BUILD_COMPLEX == 1
  1106. #ifdef CGEMM3M_DEFAULT_Q
  1107. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1108. #else
  1109. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1110. #endif
  1111. #endif
  1112. #if BUILD_COMPLEX16 == 1
  1113. #ifdef ZGEMM3M_DEFAULT_Q
  1114. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1115. #else
  1116. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1117. #endif
  1118. #endif
  1119. #ifdef EXPRECISION
  1120. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1121. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1122. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1123. #endif
  1124. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1125. #ifdef DEBUG
  1126. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1127. #endif
  1128. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1129. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1130. #endif
  1131. #if BUILD_DOUBLE == 1
  1132. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1133. #endif
  1134. #if BUILD_COMPLEX==1
  1135. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1136. #endif
  1137. #if BUILD_COMPLEX16==1
  1138. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1139. #endif
  1140. #ifdef EXPRECISION
  1141. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1142. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1143. #endif
  1144. #endif
  1145. #ifdef CORE_NORTHWOOD
  1146. #ifdef DEBUG
  1147. fprintf(stderr, "Northwood\n");
  1148. #endif
  1149. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1150. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1151. #endif
  1152. #if BUILD_DOUBLE == 1
  1153. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1154. #endif
  1155. #if BUILD_COMPLEX==1
  1156. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1157. #endif
  1158. #if BUILD_COMPLEX16==1
  1159. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1160. #endif
  1161. #ifdef EXPRECISION
  1162. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1163. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1164. #endif
  1165. #endif
  1166. #ifdef ATOM
  1167. #ifdef DEBUG
  1168. fprintf(stderr, "Atom\n");
  1169. #endif
  1170. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1171. TABLE_NAME.sgemm_p = 256;
  1172. #endif
  1173. #if BUILD_DOUBLE ==1
  1174. TABLE_NAME.dgemm_p = 128;
  1175. #endif
  1176. #if BUILD_COMPLEX==1
  1177. TABLE_NAME.cgemm_p = 128;
  1178. #endif
  1179. #if BUILD_COMPLEX16==1
  1180. TABLE_NAME.zgemm_p = 64;
  1181. #endif
  1182. #ifdef EXPRECISION
  1183. TABLE_NAME.qgemm_p = 64;
  1184. TABLE_NAME.xgemm_p = 32;
  1185. #endif
  1186. #endif
  1187. #ifdef CORE_PRESCOTT
  1188. #ifdef DEBUG
  1189. fprintf(stderr, "Prescott\n");
  1190. #endif
  1191. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1192. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1193. #endif
  1194. #if BUILD_DOUBLE ==1
  1195. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1196. #endif
  1197. #if BUILD_COMPLEX==1
  1198. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1199. #endif
  1200. #if BUILD_COMPLEX16 == 1
  1201. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1202. #endif
  1203. #ifdef EXPRECISION
  1204. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1205. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1206. #endif
  1207. #endif
  1208. #ifdef CORE2
  1209. #ifdef DEBUG
  1210. fprintf(stderr, "Core2\n");
  1211. #endif
  1212. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1213. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1214. #endif
  1215. #if BUILD_DOUBLE==1
  1216. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1217. #endif
  1218. #if BUILD_COMPLEX==1
  1219. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1220. #endif
  1221. #if BUILD_COMPLEX16==1
  1222. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1223. #endif
  1224. #ifdef EXPRECISION
  1225. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1226. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1227. #endif
  1228. #endif
  1229. #ifdef PENRYN
  1230. #ifdef DEBUG
  1231. fprintf(stderr, "Penryn\n");
  1232. #endif
  1233. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1234. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1235. #endif
  1236. #if BUILD_DOUBLE == 1
  1237. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1238. #endif
  1239. #if BUILD_COMPLEX==1
  1240. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1241. #endif
  1242. #if BUILD_COMPLEX16==1
  1243. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1244. #endif
  1245. #ifdef EXPRECISION
  1246. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1247. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1248. #endif
  1249. #endif
  1250. #ifdef DUNNINGTON
  1251. #ifdef DEBUG
  1252. fprintf(stderr, "Dunnington\n");
  1253. #endif
  1254. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1255. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1256. #endif
  1257. #if BUILD_DOUBLE ==1
  1258. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1259. #endif
  1260. #if BUILD_COMPLEX==1
  1261. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1262. #endif
  1263. #if BUILD_COMPLEX16==1
  1264. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1265. #endif
  1266. #ifdef EXPRECISION
  1267. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1268. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1269. #endif
  1270. #endif
  1271. #ifdef NEHALEM
  1272. #ifdef DEBUG
  1273. fprintf(stderr, "Nehalem\n");
  1274. #endif
  1275. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1276. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1277. #endif
  1278. #if BUILD_DOUBLE
  1279. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1280. #endif
  1281. #if BUILD_COMPLEX
  1282. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1283. #endif
  1284. #if BUILD_COMPLEX16
  1285. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1286. #endif
  1287. #ifdef EXPRECISION
  1288. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1289. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1290. #endif
  1291. #endif
  1292. #ifdef SANDYBRIDGE
  1293. #ifdef DEBUG
  1294. fprintf(stderr, "Sandybridge\n");
  1295. #endif
  1296. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1297. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1298. #endif
  1299. #if BUILD_DOUBLE
  1300. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1301. #endif
  1302. #if BUILD_COMPLEX
  1303. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1304. #endif
  1305. #if BUILD_COMPLEX16
  1306. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1307. #endif
  1308. #ifdef EXPRECISION
  1309. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1310. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1311. #endif
  1312. #endif
  1313. #ifdef HASWELL
  1314. #ifdef DEBUG
  1315. fprintf(stderr, "Haswell\n");
  1316. #endif
  1317. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1318. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1319. #endif
  1320. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1321. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1322. #endif
  1323. #if BUILD_COMPLEX
  1324. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1325. #endif
  1326. #if BUILD_COMPLEX16
  1327. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1328. #endif
  1329. #ifdef EXPRECISION
  1330. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1331. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1332. #endif
  1333. #endif
  1334. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1335. #ifdef DEBUG
  1336. fprintf(stderr, "SkylakeX\n");
  1337. #endif
  1338. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1339. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1340. #endif
  1341. #if BUILD_DOUBLE
  1342. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1343. #endif
  1344. #if BUILD_COMPLEX
  1345. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1346. #endif
  1347. #if BUILD_COMPLEX16
  1348. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1349. #endif
  1350. #ifdef EXPRECISION
  1351. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1352. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1353. #endif
  1354. #endif
  1355. #ifdef OPTERON
  1356. #ifdef DEBUG
  1357. fprintf(stderr, "Opteron\n");
  1358. #endif
  1359. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1360. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1361. #endif
  1362. #if BUILD_DOUBLE
  1363. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1364. #endif
  1365. #if BUILD_COMPLEX
  1366. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1367. #endif
  1368. #if BUILD_COMPLEX16
  1369. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1370. #endif
  1371. #ifdef EXPRECISION
  1372. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1373. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1374. #endif
  1375. #endif
  1376. #ifdef BARCELONA
  1377. #ifdef DEBUG
  1378. fprintf(stderr, "Barcelona\n");
  1379. #endif
  1380. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1381. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1382. #endif
  1383. #if BUILD_DOUBLE
  1384. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1385. #endif
  1386. #if BUILD_COMPLEX
  1387. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1388. #endif
  1389. #if BUILD_COMPLEX16
  1390. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1391. #endif
  1392. #ifdef EXPRECISION
  1393. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1394. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1395. #endif
  1396. #endif
  1397. #ifdef BOBCAT
  1398. #ifdef DEBUG
  1399. fprintf(stderr, "Bobcate\n");
  1400. #endif
  1401. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1402. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1403. #endif
  1404. #if BUILD_DOUBLE
  1405. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1406. #endif
  1407. #if BUILD_COMPLEX
  1408. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1409. #endif
  1410. #if BUILD_COMPLEX16
  1411. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1412. #endif
  1413. #ifdef EXPRECISION
  1414. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1415. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1416. #endif
  1417. #endif
  1418. #ifdef BULLDOZER
  1419. #ifdef DEBUG
  1420. fprintf(stderr, "Bulldozer\n");
  1421. #endif
  1422. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1423. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1424. #endif
  1425. #if BUILD_DOUBLE
  1426. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1427. #endif
  1428. #if BUILD_COMPLEX
  1429. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1430. #endif
  1431. #if BUILD_COMPLEX16
  1432. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1433. #endif
  1434. #ifdef EXPRECISION
  1435. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1436. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1437. #endif
  1438. #endif
  1439. #ifdef EXCAVATOR
  1440. #ifdef DEBUG
  1441. fprintf(stderr, "Excavator\n");
  1442. #endif
  1443. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1444. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1445. #endif
  1446. #if BUILD_DOUBLE
  1447. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1448. #endif
  1449. #if BUILD_COMPLEX
  1450. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1451. #endif
  1452. #if BUILD_COMPLEX16
  1453. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1454. #endif
  1455. #ifdef EXPRECISION
  1456. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1457. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1458. #endif
  1459. #endif
  1460. #ifdef PILEDRIVER
  1461. #ifdef DEBUG
  1462. fprintf(stderr, "Piledriver\n");
  1463. #endif
  1464. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1465. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1466. #endif
  1467. #if BUILD_DOUBLE
  1468. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1469. #endif
  1470. #if BUILD_COMPLEX
  1471. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1472. #endif
  1473. #if BUILD_COMPLEX16
  1474. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1475. #endif
  1476. #ifdef EXPRECISION
  1477. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1478. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1479. #endif
  1480. #endif
  1481. #ifdef STEAMROLLER
  1482. #ifdef DEBUG
  1483. fprintf(stderr, "Steamroller\n");
  1484. #endif
  1485. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1486. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1487. #endif
  1488. #if BUILD_DOUBLE
  1489. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1490. #endif
  1491. #if BUILD_COMPLEX
  1492. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1493. #endif
  1494. #if BUILD_COMPLEX16
  1495. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1496. #endif
  1497. #ifdef EXPRECISION
  1498. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1499. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1500. #endif
  1501. #endif
  1502. #ifdef ZEN
  1503. #ifdef DEBUG
  1504. fprintf(stderr, "Zen\n");
  1505. #endif
  1506. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1507. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1508. #endif
  1509. #if BUILD_DOUBLE
  1510. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1511. #endif
  1512. #if BUILD_COMPLEX
  1513. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1514. #endif
  1515. #if BUILD_COMPLEX16
  1516. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1517. #endif
  1518. #ifdef EXPRECISION
  1519. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1520. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1521. #endif
  1522. #endif
  1523. #ifdef NANO
  1524. #ifdef DEBUG
  1525. fprintf(stderr, "NANO\n");
  1526. #endif
  1527. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1528. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1529. #endif
  1530. #if (BUILD_DOUBLE==1)
  1531. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1532. #endif
  1533. #if (BUILD_COMPLEX==1)
  1534. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1535. #endif
  1536. #if (BUILD_COMPLEX16==1)
  1537. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1538. #endif
  1539. #ifdef EXPRECISION
  1540. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1541. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1542. #endif
  1543. #endif
  1544. #if BUILD_COMPLEX==1
  1545. #ifdef CGEMM3M_DEFAULT_P
  1546. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1547. #else
  1548. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1549. #endif
  1550. #endif
  1551. #if BUILD_COMPLEX16==1
  1552. #ifdef ZGEMM3M_DEFAULT_P
  1553. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1554. #else
  1555. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1556. #endif
  1557. #endif
  1558. #ifdef EXPRECISION
  1559. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1560. #endif
  1561. #if BUILD_SINGLE == 1
  1562. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1563. #endif
  1564. #if BUILD_DOUBLE== 1
  1565. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1566. #endif
  1567. #if BUILD_COMPLEX==1
  1568. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1569. #endif
  1570. #if BUILD_COMPLEX16==1
  1571. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1572. #endif
  1573. #if BUILD_COMPLEX==1
  1574. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1575. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1576. #else
  1577. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1578. #endif
  1579. #endif
  1580. #if BUILD_COMPLEX16==1
  1581. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1582. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1583. #else
  1584. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1585. #endif
  1586. #endif
  1587. #ifdef QUAD_PRECISION
  1588. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1589. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1590. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1591. #endif
  1592. #ifdef DEBUG
  1593. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1594. #endif
  1595. #if BUILD_BFLOAT16==1
  1596. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1597. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1598. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1599. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1600. #endif
  1601. #if BUILD_SINGLE==1
  1602. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1603. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1604. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1605. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1606. #endif
  1607. #if BUILD_DOUBLE==1
  1608. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1609. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1610. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1611. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1612. #endif
  1613. #ifdef EXPRECISION
  1614. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1615. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1616. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1617. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1618. #endif
  1619. #if BUILD_COMPLEX ==1
  1620. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1621. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1622. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1623. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1624. #endif
  1625. #if BUILD_COMPLEX16 ==1
  1626. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1627. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1628. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1629. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1630. #endif
  1631. #if BUILD_COMPLEX == 1
  1632. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1633. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1634. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1635. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1636. #endif
  1637. #if BUILD_COMPLEX16 == 1
  1638. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1639. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1640. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1641. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1642. #endif
  1643. #ifdef EXPRECISION
  1644. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1645. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1646. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1647. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1648. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1649. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1650. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1651. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1652. #endif
  1653. }
  1654. #endif //POWER
  1655. #endif //ZARCH
  1656. #endif //(ARCH_LOONGARCH64)
  1657. #endif //(ARCH_MIPS64)
  1658. #endif //(ARCH_ARM64)