You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 58 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  54. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  55. SBGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. SBGEMM_ALIGN_K,
  60. 0, // need_amxtile_permission
  61. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  62. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  63. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  64. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  65. dsdot_kTS,
  66. srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  67. sbgemv_nTS, sbgemv_tTS, sger_kTS,
  68. ssymv_LTS, ssymv_UTS,
  69. sbgemm_kernelTS, sbgemm_betaTS,
  70. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  71. sbgemm_incopyTS, sbgemm_itcopyTS,
  72. #else
  73. sbgemm_oncopyTS, sbgemm_otcopyTS,
  74. #endif
  75. sbgemm_oncopyTS, sbgemm_otcopyTS,
  76. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  77. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  78. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  79. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  80. #else
  81. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  82. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  83. #endif
  84. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  85. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  86. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  87. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  88. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  89. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  90. #else
  91. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  92. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  93. #endif
  94. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  95. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  96. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  97. ssymm_iutcopyTS, ssymm_iltcopyTS,
  98. #else
  99. ssymm_outcopyTS, ssymm_oltcopyTS,
  100. #endif
  101. ssymm_outcopyTS, ssymm_oltcopyTS,
  102. #ifndef NO_LAPACK
  103. sneg_tcopyTS, slaswp_ncopyTS,
  104. #else
  105. NULL,NULL,
  106. #endif
  107. #ifdef SMALL_MATRIX_OPT
  108. sbgemm_small_matrix_permitTS,
  109. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  110. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  111. #endif
  112. #endif
  113. #ifdef BUILD_HFLOAT16
  114. 0, 0, 0,
  115. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  116. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  117. SHGEMM_DEFAULT_UNROLL_MN,
  118. #else
  119. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  120. #endif
  121. shgemm_kernelTS, shgemm_betaTS,
  122. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  123. shgemm_incopyTS, shgemm_itcopyTS,
  124. #else
  125. shgemm_oncopyTS, shgemm_otcopyTS,
  126. #endif
  127. shgemm_oncopyTS, shgemm_otcopyTS,
  128. #endif
  129. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  130. 0, 0, 0,
  131. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  132. #ifdef SGEMM_DEFAULT_UNROLL_MN
  133. SGEMM_DEFAULT_UNROLL_MN,
  134. #else
  135. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  136. #endif
  137. #endif
  138. #ifdef HAVE_EXCLUSIVE_CACHE
  139. 1,
  140. #else
  141. 0,
  142. #endif
  143. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  144. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  145. #endif
  146. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  147. isamax_kTS,
  148. #endif
  149. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  150. isamin_kTS, ismax_kTS, ismin_kTS,
  151. snrm2_kTS, sasum_kTS,
  152. #endif
  153. #if BUILD_SINGLE == 1
  154. ssum_kTS,
  155. #endif
  156. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  157. scopy_kTS, sdot_kTS,
  158. // dsdot_kTS,
  159. srot_kTS, srotm_kTS, saxpy_kTS,
  160. #endif
  161. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  162. sscal_kTS,
  163. #endif
  164. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  165. sswap_kTS,
  166. sgemv_nTS, sgemv_tTS,
  167. #endif
  168. #if BUILD_SINGLE == 1
  169. sger_kTS,
  170. #endif
  171. #if BUILD_SINGLE == 1
  172. ssymv_LTS, ssymv_UTS,
  173. #endif
  174. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  175. #ifdef ARCH_X86_64
  176. sgemm_directTS,
  177. sgemm_direct_performantTS,
  178. #endif
  179. #ifdef ARCH_ARM64
  180. sgemm_directTS,
  181. #endif
  182. sgemm_kernelTS, sgemm_betaTS,
  183. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  184. sgemm_incopyTS, sgemm_itcopyTS,
  185. #else
  186. sgemm_oncopyTS, sgemm_otcopyTS,
  187. #endif
  188. sgemm_oncopyTS, sgemm_otcopyTS,
  189. #endif
  190. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  191. #ifdef SMALL_MATRIX_OPT
  192. sgemm_small_matrix_permitTS,
  193. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  194. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  195. #endif
  196. #endif
  197. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  198. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  199. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  200. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  201. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  202. #else
  203. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  204. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  205. #endif
  206. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  207. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  208. #endif
  209. #if (BUILD_SINGLE==1)
  210. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  211. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  212. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  213. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  214. #else
  215. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  216. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  217. #endif
  218. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  219. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  220. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  221. ssymm_iutcopyTS, ssymm_iltcopyTS,
  222. #else
  223. ssymm_outcopyTS, ssymm_oltcopyTS,
  224. #endif
  225. ssymm_outcopyTS, ssymm_oltcopyTS,
  226. #ifndef NO_LAPACK
  227. sneg_tcopyTS, slaswp_ncopyTS,
  228. #else
  229. NULL,NULL,
  230. #endif
  231. #endif
  232. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  233. 0, 0, 0,
  234. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  235. #ifdef DGEMM_DEFAULT_UNROLL_MN
  236. DGEMM_DEFAULT_UNROLL_MN,
  237. #else
  238. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  239. #endif
  240. #endif
  241. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  242. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  243. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  244. dnrm2_kTS, dasum_kTS,
  245. #endif
  246. #if (BUILD_DOUBLE==1)
  247. dsum_kTS,
  248. #endif
  249. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  250. dcopy_kTS, ddot_kTS,
  251. #endif
  252. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  253. dsdot_kTS,
  254. #endif
  255. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  256. drot_kTS,
  257. drotm_kTS,
  258. daxpy_kTS,
  259. dscal_kTS,
  260. dswap_kTS,
  261. dgemv_nTS, dgemv_tTS,
  262. #endif
  263. #if (BUILD_DOUBLE==1)
  264. dger_kTS,
  265. dsymv_LTS, dsymv_UTS,
  266. #endif
  267. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  268. dgemm_kernelTS, dgemm_betaTS,
  269. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  270. dgemm_incopyTS, dgemm_itcopyTS,
  271. #else
  272. dgemm_oncopyTS, dgemm_otcopyTS,
  273. #endif
  274. dgemm_oncopyTS, dgemm_otcopyTS,
  275. #endif
  276. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  277. #ifdef SMALL_MATRIX_OPT
  278. dgemm_small_matrix_permitTS,
  279. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  280. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  281. #endif
  282. #endif
  283. #if (BUILD_DOUBLE==1)
  284. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  285. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  286. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  287. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  288. #else
  289. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  290. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  291. #endif
  292. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  293. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  294. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  295. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  296. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  297. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  298. #else
  299. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  300. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  301. #endif
  302. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  303. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  304. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  305. dsymm_iutcopyTS, dsymm_iltcopyTS,
  306. #else
  307. dsymm_outcopyTS, dsymm_oltcopyTS,
  308. #endif
  309. dsymm_outcopyTS, dsymm_oltcopyTS,
  310. #ifndef NO_LAPACK
  311. dneg_tcopyTS, dlaswp_ncopyTS,
  312. #else
  313. NULL, NULL,
  314. #endif
  315. #endif
  316. #ifdef EXPRECISION
  317. 0, 0, 0,
  318. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  319. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  320. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  321. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  322. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  323. qgemv_nTS, qgemv_tTS, qger_kTS,
  324. qsymv_LTS, qsymv_UTS,
  325. qgemm_kernelTS, qgemm_betaTS,
  326. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  327. qgemm_incopyTS, qgemm_itcopyTS,
  328. #else
  329. qgemm_oncopyTS, qgemm_otcopyTS,
  330. #endif
  331. qgemm_oncopyTS, qgemm_otcopyTS,
  332. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  333. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  334. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  335. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  336. #else
  337. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  338. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  339. #endif
  340. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  341. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  342. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  343. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  344. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  345. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  346. #else
  347. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  348. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  349. #endif
  350. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  351. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  352. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  353. qsymm_iutcopyTS, qsymm_iltcopyTS,
  354. #else
  355. qsymm_outcopyTS, qsymm_oltcopyTS,
  356. #endif
  357. qsymm_outcopyTS, qsymm_oltcopyTS,
  358. #ifndef NO_LAPACK
  359. qneg_tcopyTS, qlaswp_ncopyTS,
  360. #else
  361. NULL, NULL,
  362. #endif
  363. #endif
  364. #if (BUILD_COMPLEX)
  365. 0, 0, 0,
  366. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  367. #ifdef CGEMM_DEFAULT_UNROLL_MN
  368. CGEMM_DEFAULT_UNROLL_MN,
  369. #else
  370. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  371. #endif
  372. #if (BUILD_COMPLEX)
  373. camax_kTS, camin_kTS,
  374. #endif
  375. #if (BUILD_COMPLEX)
  376. icamax_kTS,
  377. #endif
  378. #if (BUILD_COMPLEX)
  379. icamin_kTS,
  380. cnrm2_kTS, casum_kTS, csum_kTS,
  381. #endif
  382. #if (BUILD_COMPLEX)
  383. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  384. #endif
  385. #if (BUILD_COMPLEX)
  386. csrot_kTS,
  387. #endif
  388. #if (BUILD_COMPLEX)
  389. caxpy_kTS,
  390. caxpyc_kTS,
  391. cscal_kTS,
  392. cswap_kTS,
  393. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  394. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  395. #endif
  396. #if (BUILD_COMPLEX)
  397. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  398. csymv_LTS, csymv_UTS,
  399. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  400. #endif
  401. #if (BUILD_COMPLEX)
  402. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  403. cgemm_betaTS,
  404. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  405. cgemm_incopyTS, cgemm_itcopyTS,
  406. #else
  407. cgemm_oncopyTS, cgemm_otcopyTS,
  408. #endif
  409. cgemm_oncopyTS, cgemm_otcopyTS,
  410. #ifdef SMALL_MATRIX_OPT
  411. cgemm_small_matrix_permitTS,
  412. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  413. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  414. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  415. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  416. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  417. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  418. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  419. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  420. #endif
  421. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  422. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  423. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  424. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  425. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  426. #else
  427. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  428. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  429. #endif
  430. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  431. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  432. #endif
  433. #endif
  434. #if (BUILD_COMPLEX)
  435. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  436. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  437. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  438. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  439. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  440. #else
  441. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  442. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  443. #endif
  444. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  445. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  446. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  447. csymm_iutcopyTS, csymm_iltcopyTS,
  448. #else
  449. csymm_outcopyTS, csymm_oltcopyTS,
  450. #endif
  451. csymm_outcopyTS, csymm_oltcopyTS,
  452. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  453. chemm_iutcopyTS, chemm_iltcopyTS,
  454. #else
  455. chemm_outcopyTS, chemm_oltcopyTS,
  456. #endif
  457. chemm_outcopyTS, chemm_oltcopyTS,
  458. 0, 0, 0,
  459. #if (USE_GEMM3M)
  460. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  461. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  462. #else
  463. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  464. #endif
  465. cgemm3m_kernelTS,
  466. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  467. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  468. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  469. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  470. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  471. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  472. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  473. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  474. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  475. csymm3m_oucopybTS, csymm3m_olcopybTS,
  476. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  477. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  478. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  479. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  480. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  481. chemm3m_oucopybTS, chemm3m_olcopybTS,
  482. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  483. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  484. #else
  485. 0, 0, 0,
  486. NULL,
  487. NULL, NULL,
  488. NULL, NULL,
  489. NULL, NULL,
  490. NULL, NULL,
  491. NULL, NULL,
  492. NULL, NULL,
  493. NULL, NULL,
  494. NULL, NULL,
  495. NULL, NULL,
  496. NULL, NULL,
  497. NULL, NULL,
  498. NULL, NULL,
  499. NULL, NULL,
  500. NULL, NULL,
  501. NULL, NULL,
  502. NULL, NULL,
  503. NULL, NULL,
  504. NULL, NULL,
  505. #endif
  506. #endif
  507. #if (BUILD_COMPLEX)
  508. #ifndef NO_LAPACK
  509. cneg_tcopyTS,
  510. claswp_ncopyTS,
  511. #else
  512. NULL, NULL,
  513. #endif
  514. #endif
  515. #if BUILD_COMPLEX16 == 1
  516. 0, 0, 0,
  517. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  518. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  519. ZGEMM_DEFAULT_UNROLL_MN,
  520. #else
  521. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  522. #endif
  523. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  524. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  525. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  526. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  527. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  528. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  529. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  530. zsymv_LTS, zsymv_UTS,
  531. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  532. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  533. zgemm_betaTS,
  534. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  535. zgemm_incopyTS, zgemm_itcopyTS,
  536. #else
  537. zgemm_oncopyTS, zgemm_otcopyTS,
  538. #endif
  539. zgemm_oncopyTS, zgemm_otcopyTS,
  540. #ifdef SMALL_MATRIX_OPT
  541. zgemm_small_matrix_permitTS,
  542. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  543. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  544. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  545. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  546. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  547. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  548. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  549. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  550. #endif
  551. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  552. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  553. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  554. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  555. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  556. #else
  557. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  558. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  559. #endif
  560. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  561. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  562. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  563. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  564. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  565. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  566. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  567. #else
  568. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  569. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  570. #endif
  571. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  572. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  573. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  574. zsymm_iutcopyTS, zsymm_iltcopyTS,
  575. #else
  576. zsymm_outcopyTS, zsymm_oltcopyTS,
  577. #endif
  578. zsymm_outcopyTS, zsymm_oltcopyTS,
  579. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  580. zhemm_iutcopyTS, zhemm_iltcopyTS,
  581. #else
  582. zhemm_outcopyTS, zhemm_oltcopyTS,
  583. #endif
  584. zhemm_outcopyTS, zhemm_oltcopyTS,
  585. 0, 0, 0,
  586. #if (USE_GEMM3M)
  587. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  588. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  589. #else
  590. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  591. #endif
  592. zgemm3m_kernelTS,
  593. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  594. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  595. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  596. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  597. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  598. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  599. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  600. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  601. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  602. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  603. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  604. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  605. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  606. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  607. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  608. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  609. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  610. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  611. #else
  612. 0, 0, 0,
  613. NULL,
  614. NULL, NULL,
  615. NULL, NULL,
  616. NULL, NULL,
  617. NULL, NULL,
  618. NULL, NULL,
  619. NULL, NULL,
  620. NULL, NULL,
  621. NULL, NULL,
  622. NULL, NULL,
  623. NULL, NULL,
  624. NULL, NULL,
  625. NULL, NULL,
  626. NULL, NULL,
  627. NULL, NULL,
  628. NULL, NULL,
  629. NULL, NULL,
  630. NULL, NULL,
  631. NULL, NULL,
  632. #endif
  633. #ifndef NO_LAPACK
  634. zneg_tcopyTS, zlaswp_ncopyTS,
  635. #else
  636. NULL, NULL,
  637. #endif
  638. #endif
  639. #ifdef EXPRECISION
  640. 0, 0, 0,
  641. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  642. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  643. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  644. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  645. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  646. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  647. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  648. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  649. xsymv_LTS, xsymv_UTS,
  650. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  651. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  652. xgemm_betaTS,
  653. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  654. xgemm_incopyTS, xgemm_itcopyTS,
  655. #else
  656. xgemm_oncopyTS, xgemm_otcopyTS,
  657. #endif
  658. xgemm_oncopyTS, xgemm_otcopyTS,
  659. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  660. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  661. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  662. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  663. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  664. #else
  665. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  666. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  667. #endif
  668. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  669. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  670. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  671. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  672. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  673. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  674. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  675. #else
  676. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  677. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  678. #endif
  679. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  680. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  681. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  682. xsymm_iutcopyTS, xsymm_iltcopyTS,
  683. #else
  684. xsymm_outcopyTS, xsymm_oltcopyTS,
  685. #endif
  686. xsymm_outcopyTS, xsymm_oltcopyTS,
  687. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  688. xhemm_iutcopyTS, xhemm_iltcopyTS,
  689. #else
  690. xhemm_outcopyTS, xhemm_oltcopyTS,
  691. #endif
  692. xhemm_outcopyTS, xhemm_oltcopyTS,
  693. 0, 0, 0,
  694. #if (USE_GEMM3M)
  695. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  696. xgemm3m_kernelTS,
  697. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  698. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  699. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  700. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  701. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  702. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  703. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  704. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  705. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  706. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  707. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  708. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  709. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  710. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  711. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  712. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  713. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  714. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  715. #else
  716. 0, 0, 0,
  717. NULL,
  718. NULL, NULL,
  719. NULL, NULL,
  720. NULL, NULL,
  721. NULL, NULL,
  722. NULL, NULL,
  723. NULL, NULL,
  724. NULL, NULL,
  725. NULL, NULL,
  726. NULL, NULL,
  727. NULL, NULL,
  728. NULL, NULL,
  729. NULL, NULL,
  730. NULL, NULL,
  731. NULL, NULL,
  732. NULL, NULL,
  733. NULL, NULL,
  734. NULL, NULL,
  735. NULL, NULL,
  736. #endif
  737. #ifndef NO_LAPACK
  738. xneg_tcopyTS, xlaswp_ncopyTS,
  739. #else
  740. NULL, NULL,
  741. #endif
  742. #endif
  743. init_parameter,
  744. SNUMOPT, DNUMOPT, QNUMOPT,
  745. #if BUILD_SINGLE == 1
  746. saxpby_kTS,
  747. #endif
  748. #if BUILD_DOUBLE == 1
  749. daxpby_kTS,
  750. #endif
  751. #if BUILD_COMPLEX == 1
  752. caxpby_kTS,
  753. #endif
  754. #if BUILD_COMPLEX16== 1
  755. zaxpby_kTS,
  756. #endif
  757. #if BUILD_SINGLE == 1
  758. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  759. #endif
  760. #if BUILD_DOUBLE== 1
  761. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  762. #endif
  763. #if BUILD_COMPLEX == 1
  764. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  765. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  766. #endif
  767. #if BUILD_COMPLEX16 == 1
  768. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  769. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  770. #endif
  771. #if BUILD_SINGLE == 1
  772. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  773. #endif
  774. #if BUILD_DOUBLE== 1
  775. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  776. #endif
  777. #if BUILD_COMPLEX== 1
  778. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  779. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  780. #endif
  781. #if BUILD_COMPLEX16==1
  782. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  783. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  784. #endif
  785. #if BUILD_SINGLE == 1
  786. sgeadd_kTS,
  787. #endif
  788. #if BUILD_DOUBLE==1
  789. dgeadd_kTS,
  790. #endif
  791. #if BUILD_COMPLEX==1
  792. cgeadd_kTS,
  793. #endif
  794. #if BUILD_COMPLEX16==1
  795. zgeadd_kTS,
  796. #endif
  797. };
  798. #if (ARCH_ARM64)
  799. static void init_parameter(void) {
  800. #if (BUILD_BFLOAT16)
  801. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  802. #endif
  803. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  804. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  805. #endif
  806. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  807. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  808. #endif
  809. #if BUILD_COMPLEX==1
  810. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  811. #endif
  812. #if BUILD_COMPLEX16==1
  813. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  814. #endif
  815. #if (BUILD_BFLOAT16)
  816. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  817. #endif
  818. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  819. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  820. #endif
  821. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  822. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  823. #endif
  824. #if BUILD_COMPLEX== 1
  825. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  826. #endif
  827. #if BUILD_COMPLEX16==1
  828. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  829. #endif
  830. #if (BUILD_BFLOAT16)
  831. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  832. #endif
  833. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  834. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  835. #endif
  836. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  837. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  838. #endif
  839. #if BUILD_COMPLEX==1
  840. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  841. #endif
  842. #if BUILD_COMPLEX16==1
  843. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  844. #endif
  845. #ifdef EXPRECISION
  846. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  847. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  848. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  849. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  850. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  851. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  852. #endif
  853. #if (USE_GEMM3M)
  854. #ifdef CGEMM3M_DEFAULT_P
  855. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  856. #else
  857. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  858. #endif
  859. #ifdef ZGEMM3M_DEFAULT_P
  860. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  861. #else
  862. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  863. #endif
  864. #ifdef CGEMM3M_DEFAULT_Q
  865. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  866. #else
  867. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  868. #endif
  869. #ifdef ZGEMM3M_DEFAULT_Q
  870. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  871. #else
  872. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  873. #endif
  874. #ifdef CGEMM3M_DEFAULT_R
  875. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  876. #else
  877. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  878. #endif
  879. #ifdef ZGEMM3M_DEFAULT_R
  880. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  881. #else
  882. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  883. #endif
  884. #ifdef EXPRECISION
  885. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  886. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  887. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  888. #endif
  889. #endif
  890. }
  891. #else // (ARCH_ARM64)
  892. #if defined(ARCH_MIPS64)
  893. static void init_parameter(void) {
  894. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  895. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  896. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  897. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  898. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  899. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  900. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  901. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  902. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  903. TABLE_NAME.dgemm_r = 640;
  904. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  905. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  906. #ifdef EXPRECISION
  907. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  908. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  909. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  910. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  911. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  912. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  913. #endif
  914. #if defined(USE_GEMM3M)
  915. #ifdef CGEMM3M_DEFAULT_P
  916. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  917. #else
  918. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  919. #endif
  920. #ifdef ZGEMM3M_DEFAULT_P
  921. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  922. #else
  923. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  924. #endif
  925. #ifdef CGEMM3M_DEFAULT_Q
  926. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  927. #else
  928. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  929. #endif
  930. #ifdef ZGEMM3M_DEFAULT_Q
  931. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  932. #else
  933. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  934. #endif
  935. #ifdef CGEMM3M_DEFAULT_R
  936. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  937. #else
  938. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  939. #endif
  940. #ifdef ZGEMM3M_DEFAULT_R
  941. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  942. #else
  943. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  944. #endif
  945. #ifdef EXPRECISION
  946. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  947. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  948. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  949. #endif
  950. #endif
  951. }
  952. #else // (ARCH_MIPS64)
  953. #if (ARCH_LOONGARCH64)
  954. static int get_L3_size() {
  955. int ret = 0, id = 0x14;
  956. __asm__ volatile (
  957. "cpucfg %[ret], %[id]"
  958. : [ret]"=r"(ret)
  959. : [id]"r"(id)
  960. : "memory"
  961. );
  962. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  963. }
  964. static void init_parameter(void) {
  965. #ifdef BUILD_BFLOAT16
  966. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  967. #endif
  968. #ifdef BUILD_BFLOAT16
  969. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  970. #endif
  971. #if defined(LA464)
  972. int L3_size = get_L3_size();
  973. #ifdef SMP
  974. if(blas_num_threads == 1){
  975. #endif
  976. //single thread
  977. if (L3_size == 32){ // 3C5000 and 3D5000
  978. TABLE_NAME.sgemm_p = 256;
  979. TABLE_NAME.sgemm_q = 384;
  980. TABLE_NAME.sgemm_r = 8192;
  981. TABLE_NAME.dgemm_p = 112;
  982. TABLE_NAME.dgemm_q = 289;
  983. TABLE_NAME.dgemm_r = 4096;
  984. TABLE_NAME.cgemm_p = 128;
  985. TABLE_NAME.cgemm_q = 256;
  986. TABLE_NAME.cgemm_r = 4096;
  987. TABLE_NAME.zgemm_p = 128;
  988. TABLE_NAME.zgemm_q = 128;
  989. TABLE_NAME.zgemm_r = 2048;
  990. } else { // 3A5000 and 3C5000L
  991. TABLE_NAME.sgemm_p = 256;
  992. TABLE_NAME.sgemm_q = 384;
  993. TABLE_NAME.sgemm_r = 4096;
  994. TABLE_NAME.dgemm_p = 112;
  995. TABLE_NAME.dgemm_q = 300;
  996. TABLE_NAME.dgemm_r = 3024;
  997. TABLE_NAME.cgemm_p = 128;
  998. TABLE_NAME.cgemm_q = 256;
  999. TABLE_NAME.cgemm_r = 2048;
  1000. TABLE_NAME.zgemm_p = 128;
  1001. TABLE_NAME.zgemm_q = 128;
  1002. TABLE_NAME.zgemm_r = 1024;
  1003. }
  1004. #ifdef SMP
  1005. }else{
  1006. //multi thread
  1007. if (L3_size == 32){ // 3C5000 and 3D5000
  1008. TABLE_NAME.sgemm_p = 256;
  1009. TABLE_NAME.sgemm_q = 384;
  1010. TABLE_NAME.sgemm_r = 1024;
  1011. TABLE_NAME.dgemm_p = 112;
  1012. TABLE_NAME.dgemm_q = 289;
  1013. TABLE_NAME.dgemm_r = 342;
  1014. TABLE_NAME.cgemm_p = 128;
  1015. TABLE_NAME.cgemm_q = 256;
  1016. TABLE_NAME.cgemm_r = 512;
  1017. TABLE_NAME.zgemm_p = 128;
  1018. TABLE_NAME.zgemm_q = 128;
  1019. TABLE_NAME.zgemm_r = 512;
  1020. } else { // 3A5000 and 3C5000L
  1021. TABLE_NAME.sgemm_p = 256;
  1022. TABLE_NAME.sgemm_q = 384;
  1023. TABLE_NAME.sgemm_r = 2048;
  1024. TABLE_NAME.dgemm_p = 112;
  1025. TABLE_NAME.dgemm_q = 300;
  1026. TABLE_NAME.dgemm_r = 738;
  1027. TABLE_NAME.cgemm_p = 128;
  1028. TABLE_NAME.cgemm_q = 256;
  1029. TABLE_NAME.cgemm_r = 1024;
  1030. TABLE_NAME.zgemm_p = 128;
  1031. TABLE_NAME.zgemm_q = 128;
  1032. TABLE_NAME.zgemm_r = 1024;
  1033. }
  1034. }
  1035. #endif
  1036. #else
  1037. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1038. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1039. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1040. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1041. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1042. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1043. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1044. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1045. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1046. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1047. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1048. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1049. #endif
  1050. #ifdef BUILD_BFLOAT16
  1051. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1052. #endif
  1053. }
  1054. #else // (ARCH_LOONGARCH64)
  1055. #if (ARCH_POWER)
  1056. static void init_parameter(void) {
  1057. #ifdef BUILD_BFLOAT16
  1058. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1059. #endif
  1060. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1061. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1062. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1063. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1064. #ifdef BUILD_BFLOAT16
  1065. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1066. #endif
  1067. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1068. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1069. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1070. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1071. #ifdef BUILD_BFLOAT16
  1072. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1073. #endif
  1074. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1075. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1076. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1077. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1078. }
  1079. #else //POWER
  1080. #if (ARCH_ZARCH)
  1081. static void init_parameter(void) {
  1082. #ifdef BUILD_BFLOAT16
  1083. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1084. #endif
  1085. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1086. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1087. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1088. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1089. #ifdef BUILD_BFLOAT16
  1090. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1091. #endif
  1092. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1093. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1094. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1095. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1096. #ifdef BUILD_BFLOAT16
  1097. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1098. #endif
  1099. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1100. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1101. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1102. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1103. }
  1104. #else //ZARCH
  1105. #if (ARCH_RISCV64)
  1106. static void init_parameter(void) {
  1107. #ifdef BUILD_BFLOAT16
  1108. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1109. #endif
  1110. #ifdef BUILD_HFLOAT16
  1111. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1112. #endif
  1113. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1114. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1115. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1116. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1117. #ifdef BUILD_BFLOAT16
  1118. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1119. #endif
  1120. #ifdef BUILD_HFLOAT16
  1121. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  1122. #endif
  1123. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1124. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1125. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1126. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1127. #ifdef BUILD_BFLOAT16
  1128. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1129. #endif
  1130. #ifdef BUILD_HFLOAT16
  1131. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1132. #endif
  1133. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1134. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1135. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1136. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1137. }
  1138. #else //RISCV64
  1139. #ifdef ARCH_X86
  1140. static int get_l2_size_old(void){
  1141. int i, eax, ebx, ecx, edx, cpuid_level;
  1142. int info[15];
  1143. cpuid(2, &eax, &ebx, &ecx, &edx);
  1144. info[ 0] = BITMASK(eax, 8, 0xff);
  1145. info[ 1] = BITMASK(eax, 16, 0xff);
  1146. info[ 2] = BITMASK(eax, 24, 0xff);
  1147. info[ 3] = BITMASK(ebx, 0, 0xff);
  1148. info[ 4] = BITMASK(ebx, 8, 0xff);
  1149. info[ 5] = BITMASK(ebx, 16, 0xff);
  1150. info[ 6] = BITMASK(ebx, 24, 0xff);
  1151. info[ 7] = BITMASK(ecx, 0, 0xff);
  1152. info[ 8] = BITMASK(ecx, 8, 0xff);
  1153. info[ 9] = BITMASK(ecx, 16, 0xff);
  1154. info[10] = BITMASK(ecx, 24, 0xff);
  1155. info[11] = BITMASK(edx, 0, 0xff);
  1156. info[12] = BITMASK(edx, 8, 0xff);
  1157. info[13] = BITMASK(edx, 16, 0xff);
  1158. info[14] = BITMASK(edx, 24, 0xff);
  1159. for (i = 0; i < 15; i++){
  1160. switch (info[i]){
  1161. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1162. case 0x1a :
  1163. return 96;
  1164. case 0x39 :
  1165. case 0x3b :
  1166. case 0x41 :
  1167. case 0x79 :
  1168. case 0x81 :
  1169. return 128;
  1170. case 0x3a :
  1171. return 192;
  1172. case 0x21 :
  1173. case 0x3c :
  1174. case 0x42 :
  1175. case 0x7a :
  1176. case 0x7e :
  1177. case 0x82 :
  1178. return 256;
  1179. case 0x3d :
  1180. return 384;
  1181. case 0x3e :
  1182. case 0x43 :
  1183. case 0x7b :
  1184. case 0x7f :
  1185. case 0x83 :
  1186. case 0x86 :
  1187. return 512;
  1188. case 0x44 :
  1189. case 0x78 :
  1190. case 0x7c :
  1191. case 0x84 :
  1192. case 0x87 :
  1193. return 1024;
  1194. case 0x45 :
  1195. case 0x7d :
  1196. case 0x85 :
  1197. return 2048;
  1198. case 0x48 :
  1199. return 3184;
  1200. case 0x49 :
  1201. return 4096;
  1202. case 0x4e :
  1203. return 6144;
  1204. }
  1205. }
  1206. // return 0;
  1207. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1208. return 256;
  1209. }
  1210. #endif
  1211. static __inline__ int get_l2_size(void){
  1212. int eax, ebx, ecx, edx, l2;
  1213. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1214. if (l2 != 0)
  1215. return l2;
  1216. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1217. l2 = BITMASK(ecx, 16, 0xffff);
  1218. #ifndef ARCH_X86
  1219. if (l2 <= 0) {
  1220. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1221. return 256;
  1222. }
  1223. return l2;
  1224. #else
  1225. if (l2 > 0) return l2;
  1226. return get_l2_size_old();
  1227. #endif
  1228. }
  1229. static __inline__ int get_l3_size(void){
  1230. int eax, ebx, ecx, edx;
  1231. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1232. return BITMASK(edx, 18, 0x3fff) * 512;
  1233. }
  1234. static void init_parameter(void) {
  1235. int l2 = get_l2_size();
  1236. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1237. /* where the GEMM unrolling parameters do not depend on l2 */
  1238. #ifdef BUILD_BFLOAT16
  1239. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1240. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1241. #endif
  1242. #ifdef BUILD_HFLOAT16
  1243. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1244. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1245. #endif
  1246. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1247. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1248. #endif
  1249. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1250. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1251. #endif
  1252. #if BUILD_COMPLEX == 1
  1253. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1254. #endif
  1255. #if BUILD_COMPLEX16==1
  1256. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1257. #endif
  1258. #if BUILD_COMPLEX == 1
  1259. #ifdef CGEMM3M_DEFAULT_Q
  1260. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1261. #else
  1262. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1263. #endif
  1264. #endif
  1265. #if BUILD_COMPLEX16 == 1
  1266. #ifdef ZGEMM3M_DEFAULT_Q
  1267. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1268. #else
  1269. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1270. #endif
  1271. #endif
  1272. #ifdef EXPRECISION
  1273. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1274. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1275. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1276. #endif
  1277. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1278. #ifdef DEBUG
  1279. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1280. #endif
  1281. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1282. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1283. #endif
  1284. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1285. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1286. #endif
  1287. #if BUILD_COMPLEX==1
  1288. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1289. #endif
  1290. #if BUILD_COMPLEX16==1
  1291. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1292. #endif
  1293. #ifdef EXPRECISION
  1294. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1295. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1296. #endif
  1297. #endif
  1298. #ifdef CORE_NORTHWOOD
  1299. #ifdef DEBUG
  1300. fprintf(stderr, "Northwood\n");
  1301. #endif
  1302. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1303. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1304. #endif
  1305. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1306. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1307. #endif
  1308. #if BUILD_COMPLEX==1
  1309. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1310. #endif
  1311. #if BUILD_COMPLEX16==1
  1312. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1313. #endif
  1314. #ifdef EXPRECISION
  1315. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1316. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1317. #endif
  1318. #endif
  1319. #ifdef ATOM
  1320. #ifdef DEBUG
  1321. fprintf(stderr, "Atom\n");
  1322. #endif
  1323. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1324. TABLE_NAME.sgemm_p = 256;
  1325. #endif
  1326. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1327. TABLE_NAME.dgemm_p = 128;
  1328. #endif
  1329. #if BUILD_COMPLEX==1
  1330. TABLE_NAME.cgemm_p = 128;
  1331. #endif
  1332. #if BUILD_COMPLEX16==1
  1333. TABLE_NAME.zgemm_p = 64;
  1334. #endif
  1335. #ifdef EXPRECISION
  1336. TABLE_NAME.qgemm_p = 64;
  1337. TABLE_NAME.xgemm_p = 32;
  1338. #endif
  1339. #endif
  1340. #ifdef CORE_PRESCOTT
  1341. #ifdef DEBUG
  1342. fprintf(stderr, "Prescott\n");
  1343. #endif
  1344. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1345. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1346. #endif
  1347. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1348. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1349. #endif
  1350. #if BUILD_COMPLEX==1
  1351. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1352. #endif
  1353. #if BUILD_COMPLEX16 == 1
  1354. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1355. #endif
  1356. #ifdef EXPRECISION
  1357. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1358. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1359. #endif
  1360. #endif
  1361. #ifdef CORE2
  1362. #ifdef DEBUG
  1363. fprintf(stderr, "Core2\n");
  1364. #endif
  1365. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1366. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1367. #endif
  1368. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1369. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1370. #endif
  1371. #if BUILD_COMPLEX==1
  1372. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1373. #endif
  1374. #if BUILD_COMPLEX16==1
  1375. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1376. #endif
  1377. #ifdef EXPRECISION
  1378. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1379. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1380. #endif
  1381. #endif
  1382. #ifdef PENRYN
  1383. #ifdef DEBUG
  1384. fprintf(stderr, "Penryn\n");
  1385. #endif
  1386. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1387. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1388. #endif
  1389. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1390. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1391. #endif
  1392. #if BUILD_COMPLEX==1
  1393. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1394. #endif
  1395. #if BUILD_COMPLEX16==1
  1396. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1397. #endif
  1398. #ifdef EXPRECISION
  1399. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1400. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1401. #endif
  1402. #endif
  1403. #ifdef DUNNINGTON
  1404. #ifdef DEBUG
  1405. fprintf(stderr, "Dunnington\n");
  1406. #endif
  1407. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1408. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1409. #endif
  1410. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1411. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1412. #endif
  1413. #if BUILD_COMPLEX==1
  1414. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1415. #endif
  1416. #if BUILD_COMPLEX16==1
  1417. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1418. #endif
  1419. #ifdef EXPRECISION
  1420. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1421. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1422. #endif
  1423. #endif
  1424. #ifdef NEHALEM
  1425. #ifdef DEBUG
  1426. fprintf(stderr, "Nehalem\n");
  1427. #endif
  1428. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1429. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1430. #endif
  1431. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1432. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1433. #endif
  1434. #if BUILD_COMPLEX
  1435. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1436. #endif
  1437. #if BUILD_COMPLEX16
  1438. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1439. #endif
  1440. #ifdef EXPRECISION
  1441. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1442. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1443. #endif
  1444. #endif
  1445. #ifdef SANDYBRIDGE
  1446. #ifdef DEBUG
  1447. fprintf(stderr, "Sandybridge\n");
  1448. #endif
  1449. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1450. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1451. #endif
  1452. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1453. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1454. #endif
  1455. #if BUILD_COMPLEX
  1456. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1457. #endif
  1458. #if BUILD_COMPLEX16
  1459. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1460. #endif
  1461. #ifdef EXPRECISION
  1462. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1463. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1464. #endif
  1465. #endif
  1466. #ifdef HASWELL
  1467. #ifdef DEBUG
  1468. fprintf(stderr, "Haswell\n");
  1469. #endif
  1470. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1471. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1472. #endif
  1473. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1474. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1475. #endif
  1476. #if BUILD_COMPLEX
  1477. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1478. #endif
  1479. #if BUILD_COMPLEX16
  1480. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1481. #endif
  1482. #ifdef EXPRECISION
  1483. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1484. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1485. #endif
  1486. #endif
  1487. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1488. #ifdef DEBUG
  1489. fprintf(stderr, "SkylakeX\n");
  1490. #endif
  1491. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1492. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1493. #endif
  1494. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1495. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1496. #endif
  1497. #if BUILD_COMPLEX
  1498. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1499. #endif
  1500. #if BUILD_COMPLEX16
  1501. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1502. #endif
  1503. #ifdef EXPRECISION
  1504. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1505. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1506. #endif
  1507. #endif
  1508. #ifdef OPTERON
  1509. #ifdef DEBUG
  1510. fprintf(stderr, "Opteron\n");
  1511. #endif
  1512. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1513. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1514. #endif
  1515. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1516. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1517. #endif
  1518. #if BUILD_COMPLEX
  1519. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1520. #endif
  1521. #if BUILD_COMPLEX16
  1522. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1523. #endif
  1524. #ifdef EXPRECISION
  1525. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1526. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1527. #endif
  1528. #endif
  1529. #ifdef BARCELONA
  1530. #ifdef DEBUG
  1531. fprintf(stderr, "Barcelona\n");
  1532. #endif
  1533. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1534. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1535. #endif
  1536. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1537. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1538. #endif
  1539. #if BUILD_COMPLEX
  1540. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1541. #endif
  1542. #if BUILD_COMPLEX16
  1543. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1544. #endif
  1545. #ifdef EXPRECISION
  1546. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1547. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1548. #endif
  1549. #endif
  1550. #ifdef BOBCAT
  1551. #ifdef DEBUG
  1552. fprintf(stderr, "Bobcate\n");
  1553. #endif
  1554. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1555. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1556. #endif
  1557. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1558. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1559. #endif
  1560. #if BUILD_COMPLEX
  1561. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1562. #endif
  1563. #if BUILD_COMPLEX16
  1564. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1565. #endif
  1566. #ifdef EXPRECISION
  1567. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1568. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1569. #endif
  1570. #endif
  1571. #ifdef BULLDOZER
  1572. #ifdef DEBUG
  1573. fprintf(stderr, "Bulldozer\n");
  1574. #endif
  1575. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1576. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1577. #endif
  1578. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1579. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1580. #endif
  1581. #if BUILD_COMPLEX
  1582. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1583. #endif
  1584. #if BUILD_COMPLEX16
  1585. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1586. #endif
  1587. #ifdef EXPRECISION
  1588. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1589. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1590. #endif
  1591. #endif
  1592. #ifdef EXCAVATOR
  1593. #ifdef DEBUG
  1594. fprintf(stderr, "Excavator\n");
  1595. #endif
  1596. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1597. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1598. #endif
  1599. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1600. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1601. #endif
  1602. #if BUILD_COMPLEX
  1603. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1604. #endif
  1605. #if BUILD_COMPLEX16
  1606. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1607. #endif
  1608. #ifdef EXPRECISION
  1609. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1610. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1611. #endif
  1612. #endif
  1613. #ifdef PILEDRIVER
  1614. #ifdef DEBUG
  1615. fprintf(stderr, "Piledriver\n");
  1616. #endif
  1617. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1618. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1619. #endif
  1620. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1621. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1622. #endif
  1623. #if BUILD_COMPLEX
  1624. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1625. #endif
  1626. #if BUILD_COMPLEX16
  1627. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1628. #endif
  1629. #ifdef EXPRECISION
  1630. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1631. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1632. #endif
  1633. #endif
  1634. #ifdef STEAMROLLER
  1635. #ifdef DEBUG
  1636. fprintf(stderr, "Steamroller\n");
  1637. #endif
  1638. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1639. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1640. #endif
  1641. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1642. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1643. #endif
  1644. #if BUILD_COMPLEX
  1645. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1646. #endif
  1647. #if BUILD_COMPLEX16
  1648. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1649. #endif
  1650. #ifdef EXPRECISION
  1651. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1652. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1653. #endif
  1654. #endif
  1655. #ifdef ZEN
  1656. #ifdef DEBUG
  1657. fprintf(stderr, "Zen\n");
  1658. #endif
  1659. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1660. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1661. #endif
  1662. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1663. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1664. #endif
  1665. #if BUILD_COMPLEX
  1666. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1667. #endif
  1668. #if BUILD_COMPLEX16
  1669. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1670. #endif
  1671. #ifdef EXPRECISION
  1672. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1673. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1674. #endif
  1675. #endif
  1676. #ifdef NANO
  1677. #ifdef DEBUG
  1678. fprintf(stderr, "NANO\n");
  1679. #endif
  1680. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1681. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1682. #endif
  1683. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1684. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1685. #endif
  1686. #if (BUILD_COMPLEX==1)
  1687. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1688. #endif
  1689. #if (BUILD_COMPLEX16==1)
  1690. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1691. #endif
  1692. #ifdef EXPRECISION
  1693. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1694. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1695. #endif
  1696. #endif
  1697. #ifdef SAPPHIRERAPIDS
  1698. #if (BUILD_BFLOAT16 == 1)
  1699. TABLE_NAME.need_amxtile_permission = 1;
  1700. #endif
  1701. #endif
  1702. #if BUILD_COMPLEX==1
  1703. #ifdef CGEMM3M_DEFAULT_P
  1704. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1705. #else
  1706. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1707. #endif
  1708. #endif
  1709. #if BUILD_COMPLEX16==1
  1710. #ifdef ZGEMM3M_DEFAULT_P
  1711. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1712. #else
  1713. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1714. #endif
  1715. #endif
  1716. #ifdef EXPRECISION
  1717. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1718. #endif
  1719. #if BUILD_SINGLE == 1
  1720. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1721. #endif
  1722. #if BUILD_DOUBLE== 1
  1723. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1724. #endif
  1725. #if BUILD_COMPLEX==1
  1726. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1727. #endif
  1728. #if BUILD_COMPLEX16==1
  1729. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1730. #endif
  1731. #if BUILD_COMPLEX==1
  1732. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1733. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1734. #else
  1735. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1736. #endif
  1737. #endif
  1738. #if BUILD_COMPLEX16==1
  1739. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1740. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1741. #else
  1742. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1743. #endif
  1744. #endif
  1745. #ifdef QUAD_PRECISION
  1746. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1747. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1748. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1749. #endif
  1750. #ifdef DEBUG
  1751. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1752. #endif
  1753. #if BUILD_BFLOAT16==1
  1754. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1755. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1756. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1757. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1758. #endif
  1759. #if BUILD_HFLOAT16==1
  1760. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1761. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1762. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1763. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1764. #endif
  1765. #if BUILD_SINGLE==1
  1766. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1767. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1768. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1769. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1770. #endif
  1771. #if BUILD_DOUBLE==1
  1772. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1773. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1774. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1775. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1776. #endif
  1777. #ifdef EXPRECISION
  1778. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1779. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1780. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1781. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1782. #endif
  1783. #if BUILD_COMPLEX ==1
  1784. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1785. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1786. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1787. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1788. #endif
  1789. #if BUILD_COMPLEX16 ==1
  1790. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1791. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1792. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1793. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1794. #endif
  1795. #if BUILD_COMPLEX == 1
  1796. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1797. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1798. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1799. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1800. #endif
  1801. #if BUILD_COMPLEX16 == 1
  1802. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1803. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1804. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1805. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1806. #endif
  1807. #ifdef EXPRECISION
  1808. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1809. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1810. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1811. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1812. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1813. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1814. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1815. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1816. #endif
  1817. }
  1818. #endif //RISCV64
  1819. #endif //POWER
  1820. #endif //ZARCH
  1821. #endif //(ARCH_LOONGARCH64)
  1822. #endif //(ARCH_MIPS64)
  1823. #endif //(ARCH_ARM64)