You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 54 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_BFLOAT16
  50. 0, 0, 0,
  51. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  53. SBGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. SBGEMM_ALIGN_K,
  58. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  59. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  60. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  61. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  62. dsdot_kTS,
  63. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  64. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  65. ssymv_LTS, ssymv_UTS,
  66. sbgemm_kernelTS, sbgemm_betaTS,
  67. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  68. sbgemm_incopyTS, sbgemm_itcopyTS,
  69. #else
  70. sbgemm_oncopyTS, sbgemm_otcopyTS,
  71. #endif
  72. sbgemm_oncopyTS, sbgemm_otcopyTS,
  73. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  74. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  75. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  76. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  77. #else
  78. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  79. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  80. #endif
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  84. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  85. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  86. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  87. #else
  88. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  89. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  90. #endif
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  94. ssymm_iutcopyTS, ssymm_iltcopyTS,
  95. #else
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #endif
  98. ssymm_outcopyTS, ssymm_oltcopyTS,
  99. #ifndef NO_LAPACK
  100. sneg_tcopyTS, slaswp_ncopyTS,
  101. #else
  102. NULL,NULL,
  103. #endif
  104. #ifdef SMALL_MATRIX_OPT
  105. sbgemm_small_matrix_permitTS,
  106. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  107. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  108. #endif
  109. #endif
  110. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  111. 0, 0, 0,
  112. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  113. #ifdef SGEMM_DEFAULT_UNROLL_MN
  114. SGEMM_DEFAULT_UNROLL_MN,
  115. #else
  116. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  117. #endif
  118. #endif
  119. #ifdef HAVE_EXCLUSIVE_CACHE
  120. 1,
  121. #else
  122. 0,
  123. #endif
  124. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  125. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  126. #endif
  127. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  128. isamax_kTS,
  129. #endif
  130. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  131. isamin_kTS, ismax_kTS, ismin_kTS,
  132. snrm2_kTS, sasum_kTS,
  133. #endif
  134. #if BUILD_SINGLE == 1
  135. ssum_kTS,
  136. #endif
  137. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  138. scopy_kTS, sdot_kTS,
  139. // dsdot_kTS,
  140. srot_kTS, saxpy_kTS,
  141. #endif
  142. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  143. sscal_kTS,
  144. #endif
  145. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  146. sswap_kTS,
  147. sgemv_nTS, sgemv_tTS,
  148. #endif
  149. #if BUILD_SINGLE == 1
  150. sger_kTS,
  151. #endif
  152. #if BUILD_SINGLE == 1
  153. ssymv_LTS, ssymv_UTS,
  154. #endif
  155. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  156. #ifdef ARCH_X86_64
  157. sgemm_directTS,
  158. sgemm_direct_performantTS,
  159. #endif
  160. sgemm_kernelTS, sgemm_betaTS,
  161. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  162. sgemm_incopyTS, sgemm_itcopyTS,
  163. #else
  164. sgemm_oncopyTS, sgemm_otcopyTS,
  165. #endif
  166. sgemm_oncopyTS, sgemm_otcopyTS,
  167. #endif
  168. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  169. #ifdef SMALL_MATRIX_OPT
  170. sgemm_small_matrix_permitTS,
  171. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  172. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  173. #endif
  174. #endif
  175. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  176. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  177. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  178. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  179. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  180. #else
  181. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  182. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  183. #endif
  184. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  185. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  186. #endif
  187. #if (BUILD_SINGLE==1)
  188. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  189. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  190. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  191. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  192. #else
  193. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  194. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  195. #endif
  196. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  197. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  198. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  199. ssymm_iutcopyTS, ssymm_iltcopyTS,
  200. #else
  201. ssymm_outcopyTS, ssymm_oltcopyTS,
  202. #endif
  203. ssymm_outcopyTS, ssymm_oltcopyTS,
  204. #ifndef NO_LAPACK
  205. sneg_tcopyTS, slaswp_ncopyTS,
  206. #else
  207. NULL,NULL,
  208. #endif
  209. #endif
  210. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  211. 0, 0, 0,
  212. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  213. #ifdef DGEMM_DEFAULT_UNROLL_MN
  214. DGEMM_DEFAULT_UNROLL_MN,
  215. #else
  216. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  217. #endif
  218. #endif
  219. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  220. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  221. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  222. dnrm2_kTS, dasum_kTS,
  223. #endif
  224. #if (BUILD_DOUBLE==1)
  225. dsum_kTS,
  226. #endif
  227. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  228. dcopy_kTS, ddot_kTS,
  229. #endif
  230. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  231. dsdot_kTS,
  232. #endif
  233. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  234. drot_kTS,
  235. daxpy_kTS,
  236. dscal_kTS,
  237. dswap_kTS,
  238. dgemv_nTS, dgemv_tTS,
  239. #endif
  240. #if (BUILD_DOUBLE==1)
  241. dger_kTS,
  242. dsymv_LTS, dsymv_UTS,
  243. #endif
  244. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  245. dgemm_kernelTS, dgemm_betaTS,
  246. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  247. dgemm_incopyTS, dgemm_itcopyTS,
  248. #else
  249. dgemm_oncopyTS, dgemm_otcopyTS,
  250. #endif
  251. dgemm_oncopyTS, dgemm_otcopyTS,
  252. #endif
  253. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  254. #ifdef SMALL_MATRIX_OPT
  255. dgemm_small_matrix_permitTS,
  256. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  257. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  258. #endif
  259. #endif
  260. #if (BUILD_DOUBLE==1)
  261. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  262. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  263. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  264. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  265. #else
  266. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  267. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  268. #endif
  269. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  270. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  271. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  272. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  273. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  274. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  275. #else
  276. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  277. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  278. #endif
  279. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  280. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  281. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  282. dsymm_iutcopyTS, dsymm_iltcopyTS,
  283. #else
  284. dsymm_outcopyTS, dsymm_oltcopyTS,
  285. #endif
  286. dsymm_outcopyTS, dsymm_oltcopyTS,
  287. #ifndef NO_LAPACK
  288. dneg_tcopyTS, dlaswp_ncopyTS,
  289. #else
  290. NULL, NULL,
  291. #endif
  292. #endif
  293. #ifdef EXPRECISION
  294. 0, 0, 0,
  295. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  296. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  297. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  298. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  299. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  300. qgemv_nTS, qgemv_tTS, qger_kTS,
  301. qsymv_LTS, qsymv_UTS,
  302. qgemm_kernelTS, qgemm_betaTS,
  303. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  304. qgemm_incopyTS, qgemm_itcopyTS,
  305. #else
  306. qgemm_oncopyTS, qgemm_otcopyTS,
  307. #endif
  308. qgemm_oncopyTS, qgemm_otcopyTS,
  309. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  310. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  311. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  312. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  313. #else
  314. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  315. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  316. #endif
  317. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  318. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  319. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  320. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  321. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  322. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  323. #else
  324. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  325. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  326. #endif
  327. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  328. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  329. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  330. qsymm_iutcopyTS, qsymm_iltcopyTS,
  331. #else
  332. qsymm_outcopyTS, qsymm_oltcopyTS,
  333. #endif
  334. qsymm_outcopyTS, qsymm_oltcopyTS,
  335. #ifndef NO_LAPACK
  336. qneg_tcopyTS, qlaswp_ncopyTS,
  337. #else
  338. NULL, NULL,
  339. #endif
  340. #endif
  341. #if (BUILD_COMPLEX)
  342. 0, 0, 0,
  343. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  344. #ifdef CGEMM_DEFAULT_UNROLL_MN
  345. CGEMM_DEFAULT_UNROLL_MN,
  346. #else
  347. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  348. #endif
  349. #if (BUILD_COMPLEX)
  350. camax_kTS, camin_kTS,
  351. #endif
  352. #if (BUILD_COMPLEX)
  353. icamax_kTS,
  354. #endif
  355. #if (BUILD_COMPLEX)
  356. icamin_kTS,
  357. cnrm2_kTS, casum_kTS, csum_kTS,
  358. #endif
  359. #if (BUILD_COMPLEX)
  360. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  361. #endif
  362. #if (BUILD_COMPLEX)
  363. csrot_kTS,
  364. #endif
  365. #if (BUILD_COMPLEX)
  366. caxpy_kTS,
  367. caxpyc_kTS,
  368. cscal_kTS,
  369. cswap_kTS,
  370. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  371. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  372. #endif
  373. #if (BUILD_COMPLEX)
  374. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  375. csymv_LTS, csymv_UTS,
  376. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  377. #endif
  378. #if (BUILD_COMPLEX)
  379. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  380. cgemm_betaTS,
  381. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  382. cgemm_incopyTS, cgemm_itcopyTS,
  383. #else
  384. cgemm_oncopyTS, cgemm_otcopyTS,
  385. #endif
  386. cgemm_oncopyTS, cgemm_otcopyTS,
  387. #ifdef SMALL_MATRIX_OPT
  388. cgemm_small_matrix_permitTS,
  389. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  390. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  391. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  392. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  393. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  394. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  395. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  396. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  397. #endif
  398. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  399. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  400. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  401. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  402. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  403. #else
  404. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  405. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  406. #endif
  407. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  408. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  409. #endif
  410. #endif
  411. #if (BUILD_COMPLEX)
  412. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  413. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  414. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  415. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  416. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  417. #else
  418. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  419. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  420. #endif
  421. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  422. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  423. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  424. csymm_iutcopyTS, csymm_iltcopyTS,
  425. #else
  426. csymm_outcopyTS, csymm_oltcopyTS,
  427. #endif
  428. csymm_outcopyTS, csymm_oltcopyTS,
  429. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  430. chemm_iutcopyTS, chemm_iltcopyTS,
  431. #else
  432. chemm_outcopyTS, chemm_oltcopyTS,
  433. #endif
  434. chemm_outcopyTS, chemm_oltcopyTS,
  435. 0, 0, 0,
  436. #if (USE_GEMM3M)
  437. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  438. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  439. #else
  440. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  441. #endif
  442. cgemm3m_kernelTS,
  443. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  444. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  445. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  446. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  447. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  448. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  449. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  450. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  451. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  452. csymm3m_oucopybTS, csymm3m_olcopybTS,
  453. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  454. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  455. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  456. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  457. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  458. chemm3m_oucopybTS, chemm3m_olcopybTS,
  459. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  460. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  461. #else
  462. 0, 0, 0,
  463. NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. NULL, NULL,
  480. NULL, NULL,
  481. NULL, NULL,
  482. #endif
  483. #endif
  484. #if (BUILD_COMPLEX)
  485. #ifndef NO_LAPACK
  486. cneg_tcopyTS,
  487. claswp_ncopyTS,
  488. #else
  489. NULL, NULL,
  490. #endif
  491. #endif
  492. #if BUILD_COMPLEX16 == 1
  493. 0, 0, 0,
  494. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  495. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  496. ZGEMM_DEFAULT_UNROLL_MN,
  497. #else
  498. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  499. #endif
  500. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  501. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  502. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  503. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  504. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  505. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  506. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  507. zsymv_LTS, zsymv_UTS,
  508. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  509. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  510. zgemm_betaTS,
  511. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  512. zgemm_incopyTS, zgemm_itcopyTS,
  513. #else
  514. zgemm_oncopyTS, zgemm_otcopyTS,
  515. #endif
  516. zgemm_oncopyTS, zgemm_otcopyTS,
  517. #ifdef SMALL_MATRIX_OPT
  518. zgemm_small_matrix_permitTS,
  519. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  520. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  521. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  522. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  523. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  524. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  525. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  526. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  527. #endif
  528. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  529. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  530. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  531. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  532. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  533. #else
  534. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  535. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  536. #endif
  537. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  538. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  539. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  540. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  541. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  542. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  543. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  544. #else
  545. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  546. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  547. #endif
  548. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  549. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  550. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  551. zsymm_iutcopyTS, zsymm_iltcopyTS,
  552. #else
  553. zsymm_outcopyTS, zsymm_oltcopyTS,
  554. #endif
  555. zsymm_outcopyTS, zsymm_oltcopyTS,
  556. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  557. zhemm_iutcopyTS, zhemm_iltcopyTS,
  558. #else
  559. zhemm_outcopyTS, zhemm_oltcopyTS,
  560. #endif
  561. zhemm_outcopyTS, zhemm_oltcopyTS,
  562. 0, 0, 0,
  563. #if (USE_GEMM3M)
  564. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  565. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  566. #else
  567. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  568. #endif
  569. zgemm3m_kernelTS,
  570. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  571. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  572. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  573. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  574. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  575. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  576. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  577. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  578. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  579. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  580. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  581. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  582. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  583. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  584. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  585. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  586. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  587. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  588. #else
  589. 0, 0, 0,
  590. NULL,
  591. NULL, NULL,
  592. NULL, NULL,
  593. NULL, NULL,
  594. NULL, NULL,
  595. NULL, NULL,
  596. NULL, NULL,
  597. NULL, NULL,
  598. NULL, NULL,
  599. NULL, NULL,
  600. NULL, NULL,
  601. NULL, NULL,
  602. NULL, NULL,
  603. NULL, NULL,
  604. NULL, NULL,
  605. NULL, NULL,
  606. NULL, NULL,
  607. NULL, NULL,
  608. NULL, NULL,
  609. #endif
  610. #ifndef NO_LAPACK
  611. zneg_tcopyTS, zlaswp_ncopyTS,
  612. #else
  613. NULL, NULL,
  614. #endif
  615. #endif
  616. #ifdef EXPRECISION
  617. 0, 0, 0,
  618. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  619. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  620. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  621. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  622. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  623. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  624. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  625. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  626. xsymv_LTS, xsymv_UTS,
  627. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  628. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  629. xgemm_betaTS,
  630. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  631. xgemm_incopyTS, xgemm_itcopyTS,
  632. #else
  633. xgemm_oncopyTS, xgemm_otcopyTS,
  634. #endif
  635. xgemm_oncopyTS, xgemm_otcopyTS,
  636. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  637. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  638. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  639. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  640. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  641. #else
  642. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  643. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  644. #endif
  645. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  646. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  647. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  648. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  649. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  650. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  651. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  652. #else
  653. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  654. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  655. #endif
  656. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  657. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  658. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  659. xsymm_iutcopyTS, xsymm_iltcopyTS,
  660. #else
  661. xsymm_outcopyTS, xsymm_oltcopyTS,
  662. #endif
  663. xsymm_outcopyTS, xsymm_oltcopyTS,
  664. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  665. xhemm_iutcopyTS, xhemm_iltcopyTS,
  666. #else
  667. xhemm_outcopyTS, xhemm_oltcopyTS,
  668. #endif
  669. xhemm_outcopyTS, xhemm_oltcopyTS,
  670. 0, 0, 0,
  671. #if (USE_GEMM3M)
  672. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  673. xgemm3m_kernelTS,
  674. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  675. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  676. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  677. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  678. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  679. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  680. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  681. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  682. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  683. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  684. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  685. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  686. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  687. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  688. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  689. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  690. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  691. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  692. #else
  693. 0, 0, 0,
  694. NULL,
  695. NULL, NULL,
  696. NULL, NULL,
  697. NULL, NULL,
  698. NULL, NULL,
  699. NULL, NULL,
  700. NULL, NULL,
  701. NULL, NULL,
  702. NULL, NULL,
  703. NULL, NULL,
  704. NULL, NULL,
  705. NULL, NULL,
  706. NULL, NULL,
  707. NULL, NULL,
  708. NULL, NULL,
  709. NULL, NULL,
  710. NULL, NULL,
  711. NULL, NULL,
  712. NULL, NULL,
  713. #endif
  714. #ifndef NO_LAPACK
  715. xneg_tcopyTS, xlaswp_ncopyTS,
  716. #else
  717. NULL, NULL,
  718. #endif
  719. #endif
  720. init_parameter,
  721. SNUMOPT, DNUMOPT, QNUMOPT,
  722. #if BUILD_SINGLE == 1
  723. saxpby_kTS,
  724. #endif
  725. #if BUILD_DOUBLE == 1
  726. daxpby_kTS,
  727. #endif
  728. #if BUILD_COMPLEX == 1
  729. caxpby_kTS,
  730. #endif
  731. #if BUILD_COMPLEX16== 1
  732. zaxpby_kTS,
  733. #endif
  734. #if BUILD_SINGLE == 1
  735. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  736. #endif
  737. #if BUILD_DOUBLE== 1
  738. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  739. #endif
  740. #if BUILD_COMPLEX == 1
  741. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  742. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  743. #endif
  744. #if BUILD_COMPLEX16 == 1
  745. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  746. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  747. #endif
  748. #if BUILD_SINGLE == 1
  749. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  750. #endif
  751. #if BUILD_DOUBLE== 1
  752. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  753. #endif
  754. #if BUILD_COMPLEX== 1
  755. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  756. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  757. #endif
  758. #if BUILD_COMPLEX16==1
  759. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  760. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  761. #endif
  762. #if BUILD_SINGLE == 1
  763. sgeadd_kTS,
  764. #endif
  765. #if BUILD_DOUBLE==1
  766. dgeadd_kTS,
  767. #endif
  768. #if BUILD_COMPLEX==1
  769. cgeadd_kTS,
  770. #endif
  771. #if BUILD_COMPLEX16==1
  772. zgeadd_kTS,
  773. #endif
  774. };
  775. #if (ARCH_ARM64)
  776. static void init_parameter(void) {
  777. #if (BUILD_BFLOAT16)
  778. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  779. #endif
  780. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  781. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  782. #endif
  783. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  784. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  785. #endif
  786. #if BUILD_COMPLEX==1
  787. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  788. #endif
  789. #if BUILD_COMPLEX16==1
  790. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  791. #endif
  792. #if (BUILD_BFLOAT16)
  793. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  794. #endif
  795. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  796. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  797. #endif
  798. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  799. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  800. #endif
  801. #if BUILD_COMPLEX== 1
  802. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  803. #endif
  804. #if BUILD_COMPLEX16==1
  805. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  806. #endif
  807. #if (BUILD_BFLOAT16)
  808. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  809. #endif
  810. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  811. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  812. #endif
  813. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  814. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  815. #endif
  816. #if BUILD_COMPLEX==1
  817. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  818. #endif
  819. #if BUILD_COMPLEX16==1
  820. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  821. #endif
  822. #ifdef EXPRECISION
  823. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  824. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  825. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  826. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  827. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  828. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  829. #endif
  830. #if (USE_GEMM3M)
  831. #ifdef CGEMM3M_DEFAULT_P
  832. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  833. #else
  834. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  835. #endif
  836. #ifdef ZGEMM3M_DEFAULT_P
  837. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  838. #else
  839. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  840. #endif
  841. #ifdef CGEMM3M_DEFAULT_Q
  842. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  843. #else
  844. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  845. #endif
  846. #ifdef ZGEMM3M_DEFAULT_Q
  847. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  848. #else
  849. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  850. #endif
  851. #ifdef CGEMM3M_DEFAULT_R
  852. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  853. #else
  854. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  855. #endif
  856. #ifdef ZGEMM3M_DEFAULT_R
  857. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  858. #else
  859. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  860. #endif
  861. #ifdef EXPRECISION
  862. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  863. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  864. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  865. #endif
  866. #endif
  867. }
  868. #else // (ARCH_ARM64)
  869. #if defined(ARCH_MIPS64)
  870. static void init_parameter(void) {
  871. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  872. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  873. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  874. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  875. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  876. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  877. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  878. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  879. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  880. TABLE_NAME.dgemm_r = 640;
  881. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  882. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  883. #ifdef EXPRECISION
  884. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  885. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  886. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  887. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  888. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  889. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  890. #endif
  891. #if defined(USE_GEMM3M)
  892. #ifdef CGEMM3M_DEFAULT_P
  893. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  894. #else
  895. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  896. #endif
  897. #ifdef ZGEMM3M_DEFAULT_P
  898. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  899. #else
  900. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  901. #endif
  902. #ifdef CGEMM3M_DEFAULT_Q
  903. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  904. #else
  905. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  906. #endif
  907. #ifdef ZGEMM3M_DEFAULT_Q
  908. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  909. #else
  910. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  911. #endif
  912. #ifdef CGEMM3M_DEFAULT_R
  913. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  914. #else
  915. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  916. #endif
  917. #ifdef ZGEMM3M_DEFAULT_R
  918. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  919. #else
  920. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  921. #endif
  922. #ifdef EXPRECISION
  923. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  924. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  925. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  926. #endif
  927. #endif
  928. }
  929. #else // (ARCH_MIPS64)
  930. #if (ARCH_LOONGARCH64)
  931. static void init_parameter(void) {
  932. #ifdef BUILD_BFLOAT16
  933. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  934. #endif
  935. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  936. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  937. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  938. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  939. #ifdef BUILD_BFLOAT16
  940. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  941. #endif
  942. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  943. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  944. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  945. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  946. #ifdef BUILD_BFLOAT16
  947. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  948. #endif
  949. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  950. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  951. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  952. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  953. }
  954. #else // (ARCH_LOONGARCH64)
  955. #if (ARCH_POWER)
  956. static void init_parameter(void) {
  957. #ifdef BUILD_BFLOAT16
  958. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  959. #endif
  960. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  961. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  962. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  963. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  964. #ifdef BUILD_BFLOAT16
  965. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  966. #endif
  967. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  968. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  969. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  970. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  971. #ifdef BUILD_BFLOAT16
  972. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  973. #endif
  974. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  975. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  976. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  977. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  978. }
  979. #else //POWER
  980. #if (ARCH_ZARCH)
  981. static void init_parameter(void) {
  982. #ifdef BUILD_BFLOAT16
  983. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  984. #endif
  985. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  986. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  987. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  988. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  989. #ifdef BUILD_BFLOAT16
  990. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  991. #endif
  992. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  993. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  994. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  995. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  996. #ifdef BUILD_BFLOAT16
  997. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  998. #endif
  999. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1000. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1001. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1002. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1003. }
  1004. #else //ZARCH
  1005. #ifdef ARCH_X86
  1006. static int get_l2_size_old(void){
  1007. int i, eax, ebx, ecx, edx, cpuid_level;
  1008. int info[15];
  1009. cpuid(2, &eax, &ebx, &ecx, &edx);
  1010. info[ 0] = BITMASK(eax, 8, 0xff);
  1011. info[ 1] = BITMASK(eax, 16, 0xff);
  1012. info[ 2] = BITMASK(eax, 24, 0xff);
  1013. info[ 3] = BITMASK(ebx, 0, 0xff);
  1014. info[ 4] = BITMASK(ebx, 8, 0xff);
  1015. info[ 5] = BITMASK(ebx, 16, 0xff);
  1016. info[ 6] = BITMASK(ebx, 24, 0xff);
  1017. info[ 7] = BITMASK(ecx, 0, 0xff);
  1018. info[ 8] = BITMASK(ecx, 8, 0xff);
  1019. info[ 9] = BITMASK(ecx, 16, 0xff);
  1020. info[10] = BITMASK(ecx, 24, 0xff);
  1021. info[11] = BITMASK(edx, 0, 0xff);
  1022. info[12] = BITMASK(edx, 8, 0xff);
  1023. info[13] = BITMASK(edx, 16, 0xff);
  1024. info[14] = BITMASK(edx, 24, 0xff);
  1025. for (i = 0; i < 15; i++){
  1026. switch (info[i]){
  1027. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1028. case 0x1a :
  1029. return 96;
  1030. case 0x39 :
  1031. case 0x3b :
  1032. case 0x41 :
  1033. case 0x79 :
  1034. case 0x81 :
  1035. return 128;
  1036. case 0x3a :
  1037. return 192;
  1038. case 0x21 :
  1039. case 0x3c :
  1040. case 0x42 :
  1041. case 0x7a :
  1042. case 0x7e :
  1043. case 0x82 :
  1044. return 256;
  1045. case 0x3d :
  1046. return 384;
  1047. case 0x3e :
  1048. case 0x43 :
  1049. case 0x7b :
  1050. case 0x7f :
  1051. case 0x83 :
  1052. case 0x86 :
  1053. return 512;
  1054. case 0x44 :
  1055. case 0x78 :
  1056. case 0x7c :
  1057. case 0x84 :
  1058. case 0x87 :
  1059. return 1024;
  1060. case 0x45 :
  1061. case 0x7d :
  1062. case 0x85 :
  1063. return 2048;
  1064. case 0x48 :
  1065. return 3184;
  1066. case 0x49 :
  1067. return 4096;
  1068. case 0x4e :
  1069. return 6144;
  1070. }
  1071. }
  1072. // return 0;
  1073. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1074. return 256;
  1075. }
  1076. #endif
  1077. static __inline__ int get_l2_size(void){
  1078. int eax, ebx, ecx, edx, l2;
  1079. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1080. l2 = BITMASK(ecx, 16, 0xffff);
  1081. #ifndef ARCH_X86
  1082. if (l2 <= 0) {
  1083. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1084. return 256;
  1085. }
  1086. return l2;
  1087. #else
  1088. if (l2 > 0) return l2;
  1089. return get_l2_size_old();
  1090. #endif
  1091. }
  1092. static __inline__ int get_l3_size(void){
  1093. int eax, ebx, ecx, edx;
  1094. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1095. return BITMASK(edx, 18, 0x3fff) * 512;
  1096. }
  1097. static void init_parameter(void) {
  1098. int l2 = get_l2_size();
  1099. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1100. /* where the GEMM unrolling parameters do not depend on l2 */
  1101. #ifdef BUILD_BFLOAT16
  1102. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1103. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1104. #endif
  1105. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1106. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1107. #endif
  1108. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1109. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1110. #endif
  1111. #if BUILD_COMPLEX == 1
  1112. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1113. #endif
  1114. #if BUILD_COMPLEX16==1
  1115. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1116. #endif
  1117. #if BUILD_COMPLEX == 1
  1118. #ifdef CGEMM3M_DEFAULT_Q
  1119. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1120. #else
  1121. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1122. #endif
  1123. #endif
  1124. #if BUILD_COMPLEX16 == 1
  1125. #ifdef ZGEMM3M_DEFAULT_Q
  1126. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1127. #else
  1128. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1129. #endif
  1130. #endif
  1131. #ifdef EXPRECISION
  1132. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1133. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1134. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1135. #endif
  1136. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1137. #ifdef DEBUG
  1138. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1139. #endif
  1140. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1141. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1142. #endif
  1143. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1144. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1145. #endif
  1146. #if BUILD_COMPLEX==1
  1147. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1148. #endif
  1149. #if BUILD_COMPLEX16==1
  1150. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1151. #endif
  1152. #ifdef EXPRECISION
  1153. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1154. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1155. #endif
  1156. #endif
  1157. #ifdef CORE_NORTHWOOD
  1158. #ifdef DEBUG
  1159. fprintf(stderr, "Northwood\n");
  1160. #endif
  1161. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1162. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1163. #endif
  1164. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1165. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1166. #endif
  1167. #if BUILD_COMPLEX==1
  1168. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1169. #endif
  1170. #if BUILD_COMPLEX16==1
  1171. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1172. #endif
  1173. #ifdef EXPRECISION
  1174. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1175. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1176. #endif
  1177. #endif
  1178. #ifdef ATOM
  1179. #ifdef DEBUG
  1180. fprintf(stderr, "Atom\n");
  1181. #endif
  1182. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1183. TABLE_NAME.sgemm_p = 256;
  1184. #endif
  1185. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1186. TABLE_NAME.dgemm_p = 128;
  1187. #endif
  1188. #if BUILD_COMPLEX==1
  1189. TABLE_NAME.cgemm_p = 128;
  1190. #endif
  1191. #if BUILD_COMPLEX16==1
  1192. TABLE_NAME.zgemm_p = 64;
  1193. #endif
  1194. #ifdef EXPRECISION
  1195. TABLE_NAME.qgemm_p = 64;
  1196. TABLE_NAME.xgemm_p = 32;
  1197. #endif
  1198. #endif
  1199. #ifdef CORE_PRESCOTT
  1200. #ifdef DEBUG
  1201. fprintf(stderr, "Prescott\n");
  1202. #endif
  1203. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1204. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1205. #endif
  1206. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1207. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1208. #endif
  1209. #if BUILD_COMPLEX==1
  1210. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1211. #endif
  1212. #if BUILD_COMPLEX16 == 1
  1213. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1214. #endif
  1215. #ifdef EXPRECISION
  1216. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1217. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1218. #endif
  1219. #endif
  1220. #ifdef CORE2
  1221. #ifdef DEBUG
  1222. fprintf(stderr, "Core2\n");
  1223. #endif
  1224. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1225. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1226. #endif
  1227. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1228. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1229. #endif
  1230. #if BUILD_COMPLEX==1
  1231. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1232. #endif
  1233. #if BUILD_COMPLEX16==1
  1234. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1235. #endif
  1236. #ifdef EXPRECISION
  1237. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1238. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1239. #endif
  1240. #endif
  1241. #ifdef PENRYN
  1242. #ifdef DEBUG
  1243. fprintf(stderr, "Penryn\n");
  1244. #endif
  1245. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1246. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1247. #endif
  1248. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1249. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1250. #endif
  1251. #if BUILD_COMPLEX==1
  1252. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1253. #endif
  1254. #if BUILD_COMPLEX16==1
  1255. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1256. #endif
  1257. #ifdef EXPRECISION
  1258. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1259. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1260. #endif
  1261. #endif
  1262. #ifdef DUNNINGTON
  1263. #ifdef DEBUG
  1264. fprintf(stderr, "Dunnington\n");
  1265. #endif
  1266. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1267. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1268. #endif
  1269. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1270. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1271. #endif
  1272. #if BUILD_COMPLEX==1
  1273. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1274. #endif
  1275. #if BUILD_COMPLEX16==1
  1276. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1277. #endif
  1278. #ifdef EXPRECISION
  1279. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1280. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1281. #endif
  1282. #endif
  1283. #ifdef NEHALEM
  1284. #ifdef DEBUG
  1285. fprintf(stderr, "Nehalem\n");
  1286. #endif
  1287. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1288. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1289. #endif
  1290. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1291. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1292. #endif
  1293. #if BUILD_COMPLEX
  1294. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1295. #endif
  1296. #if BUILD_COMPLEX16
  1297. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1298. #endif
  1299. #ifdef EXPRECISION
  1300. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1301. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1302. #endif
  1303. #endif
  1304. #ifdef SANDYBRIDGE
  1305. #ifdef DEBUG
  1306. fprintf(stderr, "Sandybridge\n");
  1307. #endif
  1308. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1309. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1310. #endif
  1311. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1312. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1313. #endif
  1314. #if BUILD_COMPLEX
  1315. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1316. #endif
  1317. #if BUILD_COMPLEX16
  1318. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1319. #endif
  1320. #ifdef EXPRECISION
  1321. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1322. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1323. #endif
  1324. #endif
  1325. #ifdef HASWELL
  1326. #ifdef DEBUG
  1327. fprintf(stderr, "Haswell\n");
  1328. #endif
  1329. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1330. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1331. #endif
  1332. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1333. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1334. #endif
  1335. #if BUILD_COMPLEX
  1336. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1337. #endif
  1338. #if BUILD_COMPLEX16
  1339. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1340. #endif
  1341. #ifdef EXPRECISION
  1342. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1343. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1344. #endif
  1345. #endif
  1346. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1347. #ifdef DEBUG
  1348. fprintf(stderr, "SkylakeX\n");
  1349. #endif
  1350. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1351. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1352. #endif
  1353. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1354. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1355. #endif
  1356. #if BUILD_COMPLEX
  1357. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1358. #endif
  1359. #if BUILD_COMPLEX16
  1360. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1361. #endif
  1362. #ifdef EXPRECISION
  1363. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1364. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1365. #endif
  1366. #endif
  1367. #ifdef OPTERON
  1368. #ifdef DEBUG
  1369. fprintf(stderr, "Opteron\n");
  1370. #endif
  1371. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1372. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1373. #endif
  1374. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1375. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1376. #endif
  1377. #if BUILD_COMPLEX
  1378. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1379. #endif
  1380. #if BUILD_COMPLEX16
  1381. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1382. #endif
  1383. #ifdef EXPRECISION
  1384. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1385. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1386. #endif
  1387. #endif
  1388. #ifdef BARCELONA
  1389. #ifdef DEBUG
  1390. fprintf(stderr, "Barcelona\n");
  1391. #endif
  1392. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1393. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1394. #endif
  1395. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1396. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1397. #endif
  1398. #if BUILD_COMPLEX
  1399. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1400. #endif
  1401. #if BUILD_COMPLEX16
  1402. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1403. #endif
  1404. #ifdef EXPRECISION
  1405. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1406. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1407. #endif
  1408. #endif
  1409. #ifdef BOBCAT
  1410. #ifdef DEBUG
  1411. fprintf(stderr, "Bobcate\n");
  1412. #endif
  1413. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1414. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1415. #endif
  1416. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1417. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1418. #endif
  1419. #if BUILD_COMPLEX
  1420. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1421. #endif
  1422. #if BUILD_COMPLEX16
  1423. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1424. #endif
  1425. #ifdef EXPRECISION
  1426. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1427. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1428. #endif
  1429. #endif
  1430. #ifdef BULLDOZER
  1431. #ifdef DEBUG
  1432. fprintf(stderr, "Bulldozer\n");
  1433. #endif
  1434. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1435. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1436. #endif
  1437. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1438. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1439. #endif
  1440. #if BUILD_COMPLEX
  1441. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1442. #endif
  1443. #if BUILD_COMPLEX16
  1444. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1445. #endif
  1446. #ifdef EXPRECISION
  1447. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1448. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1449. #endif
  1450. #endif
  1451. #ifdef EXCAVATOR
  1452. #ifdef DEBUG
  1453. fprintf(stderr, "Excavator\n");
  1454. #endif
  1455. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1456. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1457. #endif
  1458. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1459. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1460. #endif
  1461. #if BUILD_COMPLEX
  1462. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1463. #endif
  1464. #if BUILD_COMPLEX16
  1465. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1466. #endif
  1467. #ifdef EXPRECISION
  1468. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1469. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1470. #endif
  1471. #endif
  1472. #ifdef PILEDRIVER
  1473. #ifdef DEBUG
  1474. fprintf(stderr, "Piledriver\n");
  1475. #endif
  1476. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1477. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1478. #endif
  1479. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1480. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1481. #endif
  1482. #if BUILD_COMPLEX
  1483. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1484. #endif
  1485. #if BUILD_COMPLEX16
  1486. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1487. #endif
  1488. #ifdef EXPRECISION
  1489. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1490. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1491. #endif
  1492. #endif
  1493. #ifdef STEAMROLLER
  1494. #ifdef DEBUG
  1495. fprintf(stderr, "Steamroller\n");
  1496. #endif
  1497. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1498. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1499. #endif
  1500. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1501. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1502. #endif
  1503. #if BUILD_COMPLEX
  1504. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1505. #endif
  1506. #if BUILD_COMPLEX16
  1507. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1508. #endif
  1509. #ifdef EXPRECISION
  1510. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1511. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1512. #endif
  1513. #endif
  1514. #ifdef ZEN
  1515. #ifdef DEBUG
  1516. fprintf(stderr, "Zen\n");
  1517. #endif
  1518. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1519. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1520. #endif
  1521. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1522. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1523. #endif
  1524. #if BUILD_COMPLEX
  1525. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1526. #endif
  1527. #if BUILD_COMPLEX16
  1528. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1529. #endif
  1530. #ifdef EXPRECISION
  1531. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1532. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1533. #endif
  1534. #endif
  1535. #ifdef NANO
  1536. #ifdef DEBUG
  1537. fprintf(stderr, "NANO\n");
  1538. #endif
  1539. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1540. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1541. #endif
  1542. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1543. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1544. #endif
  1545. #if (BUILD_COMPLEX==1)
  1546. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1547. #endif
  1548. #if (BUILD_COMPLEX16==1)
  1549. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1550. #endif
  1551. #ifdef EXPRECISION
  1552. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1553. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1554. #endif
  1555. #endif
  1556. #if BUILD_COMPLEX==1
  1557. #ifdef CGEMM3M_DEFAULT_P
  1558. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1559. #else
  1560. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1561. #endif
  1562. #endif
  1563. #if BUILD_COMPLEX16==1
  1564. #ifdef ZGEMM3M_DEFAULT_P
  1565. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1566. #else
  1567. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1568. #endif
  1569. #endif
  1570. #ifdef EXPRECISION
  1571. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1572. #endif
  1573. #if BUILD_SINGLE == 1
  1574. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1575. #endif
  1576. #if BUILD_DOUBLE== 1
  1577. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1578. #endif
  1579. #if BUILD_COMPLEX==1
  1580. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1581. #endif
  1582. #if BUILD_COMPLEX16==1
  1583. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1584. #endif
  1585. #if BUILD_COMPLEX==1
  1586. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1587. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1588. #else
  1589. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1590. #endif
  1591. #endif
  1592. #if BUILD_COMPLEX16==1
  1593. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1594. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1595. #else
  1596. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1597. #endif
  1598. #endif
  1599. #ifdef QUAD_PRECISION
  1600. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1601. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1602. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1603. #endif
  1604. #ifdef DEBUG
  1605. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1606. #endif
  1607. #if BUILD_BFLOAT16==1
  1608. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1609. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1610. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1611. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1612. #endif
  1613. #if BUILD_SINGLE==1
  1614. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1615. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1616. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1617. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1618. #endif
  1619. #if BUILD_DOUBLE==1
  1620. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1621. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1622. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1623. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1624. #endif
  1625. #ifdef EXPRECISION
  1626. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1627. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1628. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1629. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1630. #endif
  1631. #if BUILD_COMPLEX ==1
  1632. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1633. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1634. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1635. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1636. #endif
  1637. #if BUILD_COMPLEX16 ==1
  1638. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1639. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1640. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1641. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1642. #endif
  1643. #if BUILD_COMPLEX == 1
  1644. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1645. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1646. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1647. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1648. #endif
  1649. #if BUILD_COMPLEX16 == 1
  1650. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1651. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1652. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1653. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1654. #endif
  1655. #ifdef EXPRECISION
  1656. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1657. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1658. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1659. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1660. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1661. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1662. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1663. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1664. #endif
  1665. }
  1666. #endif //POWER
  1667. #endif //ZARCH
  1668. #endif //(ARCH_LOONGARCH64)
  1669. #endif //(ARCH_MIPS64)
  1670. #endif //(ARCH_ARM64)