You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 60 kB

6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* Copyright 2023, 2025 The OpenBLAS Project. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <string.h>
  41. #include "common.h"
  42. #ifdef BUILD_KERNEL
  43. #include "kernelTS.h"
  44. #endif
  45. #undef DEBUG
  46. static void init_parameter(void);
  47. gotoblas_t TABLE_NAME = {
  48. DTB_DEFAULT_ENTRIES,
  49. SWITCH_RATIO,
  50. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  51. #ifdef BUILD_BFLOAT16
  52. 0, 0, 0,
  53. BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N,
  54. #ifdef BGEMM_DEFAULT_UNROLL_MN
  55. BGEMM_DEFAULT_UNROLL_MN,
  56. #else
  57. MAX(BGEMM_DEFAULT_UNROLL_M, BGEMM_DEFAULT_UNROLL_N),
  58. #endif
  59. BGEMM_ALIGN_K,
  60. 0, 0, 0,
  61. SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
  62. #ifdef SBGEMM_DEFAULT_UNROLL_MN
  63. SBGEMM_DEFAULT_UNROLL_MN,
  64. #else
  65. MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
  66. #endif
  67. SBGEMM_ALIGN_K,
  68. 0, // need_amxtile_permission
  69. sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
  70. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  71. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  72. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
  73. dsdot_kTS,
  74. srot_kTS, srotm_kTS, bscal_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  75. bgemv_nTS, bgemv_tTS, sbgemv_nTS, sbgemv_tTS, sger_kTS,
  76. ssymv_LTS, ssymv_UTS,
  77. bgemm_kernelTS, bgemm_betaTS,
  78. #if BGEMM_DEFAULT_UNROLL_M != BGEMM_DEFAULT_UNROLL_N
  79. bgemm_incopyTS, bgemm_itcopyTS,
  80. #else
  81. bgemm_oncopyTS, bgemm_otcopyTS,
  82. #endif
  83. bgemm_oncopyTS, bgemm_otcopyTS,
  84. sbgemm_kernelTS, sbgemm_betaTS,
  85. #if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
  86. sbgemm_incopyTS, sbgemm_itcopyTS,
  87. #else
  88. sbgemm_oncopyTS, sbgemm_otcopyTS,
  89. #endif
  90. sbgemm_oncopyTS, sbgemm_otcopyTS,
  91. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  92. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  93. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  94. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  95. #else
  96. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  97. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  98. #endif
  99. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  100. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  101. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  102. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  103. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  104. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  105. #else
  106. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  107. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  108. #endif
  109. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  110. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  111. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  112. ssymm_iutcopyTS, ssymm_iltcopyTS,
  113. #else
  114. ssymm_outcopyTS, ssymm_oltcopyTS,
  115. #endif
  116. ssymm_outcopyTS, ssymm_oltcopyTS,
  117. #ifndef NO_LAPACK
  118. sneg_tcopyTS, slaswp_ncopyTS,
  119. #else
  120. NULL,NULL,
  121. #endif
  122. #ifdef SMALL_MATRIX_OPT
  123. sbgemm_small_matrix_permitTS,
  124. sbgemm_small_kernel_nnTS, sbgemm_small_kernel_ntTS, sbgemm_small_kernel_tnTS, sbgemm_small_kernel_ttTS,
  125. sbgemm_small_kernel_b0_nnTS, sbgemm_small_kernel_b0_ntTS, sbgemm_small_kernel_b0_tnTS, sbgemm_small_kernel_b0_ttTS,
  126. #endif
  127. #endif
  128. #ifdef BUILD_HFLOAT16
  129. 0, 0, 0,
  130. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  131. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  132. SHGEMM_DEFAULT_UNROLL_MN,
  133. #else
  134. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  135. #endif
  136. shgemm_kernelTS, shgemm_betaTS,
  137. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  138. shgemm_incopyTS, shgemm_itcopyTS,
  139. #else
  140. shgemm_oncopyTS, shgemm_otcopyTS,
  141. #endif
  142. shgemm_oncopyTS, shgemm_otcopyTS,
  143. #endif
  144. #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  145. 0, 0, 0,
  146. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  147. #ifdef SGEMM_DEFAULT_UNROLL_MN
  148. SGEMM_DEFAULT_UNROLL_MN,
  149. #else
  150. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  151. #endif
  152. #endif
  153. #ifdef HAVE_EXCLUSIVE_CACHE
  154. 1,
  155. #else
  156. 0,
  157. #endif
  158. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  159. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  160. #endif
  161. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  162. isamax_kTS,
  163. #endif
  164. #if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
  165. isamin_kTS, ismax_kTS, ismin_kTS,
  166. snrm2_kTS, sasum_kTS,
  167. #endif
  168. #if BUILD_SINGLE == 1
  169. ssum_kTS,
  170. #endif
  171. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  172. scopy_kTS, sdot_kTS,
  173. // dsdot_kTS,
  174. srot_kTS, srotm_kTS, saxpy_kTS,
  175. #endif
  176. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
  177. sscal_kTS,
  178. #endif
  179. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  180. sswap_kTS,
  181. sgemv_nTS, sgemv_tTS,
  182. #endif
  183. #if BUILD_SINGLE == 1
  184. sger_kTS,
  185. #endif
  186. #if BUILD_SINGLE == 1
  187. ssymv_LTS, ssymv_UTS,
  188. #endif
  189. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
  190. #ifdef ARCH_X86_64
  191. sgemm_directTS,
  192. sgemm_direct_performantTS,
  193. #endif
  194. #ifdef ARCH_ARM64
  195. sgemm_directTS,
  196. #endif
  197. sgemm_kernelTS, sgemm_betaTS,
  198. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  199. sgemm_incopyTS, sgemm_itcopyTS,
  200. #else
  201. sgemm_oncopyTS, sgemm_otcopyTS,
  202. #endif
  203. sgemm_oncopyTS, sgemm_otcopyTS,
  204. #endif
  205. #if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
  206. #ifdef SMALL_MATRIX_OPT
  207. sgemm_small_matrix_permitTS,
  208. sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
  209. sgemm_small_kernel_b0_nnTS, sgemm_small_kernel_b0_ntTS, sgemm_small_kernel_b0_tnTS, sgemm_small_kernel_b0_ttTS,
  210. #endif
  211. #endif
  212. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
  213. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  214. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  215. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  216. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  217. #else
  218. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  219. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  220. #endif
  221. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  222. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  223. #endif
  224. #if (BUILD_SINGLE==1)
  225. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  226. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  227. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  228. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  229. #else
  230. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  231. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  232. #endif
  233. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  234. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  235. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  236. ssymm_iutcopyTS, ssymm_iltcopyTS,
  237. #else
  238. ssymm_outcopyTS, ssymm_oltcopyTS,
  239. #endif
  240. ssymm_outcopyTS, ssymm_oltcopyTS,
  241. #ifndef NO_LAPACK
  242. sneg_tcopyTS, slaswp_ncopyTS,
  243. #else
  244. NULL,NULL,
  245. #endif
  246. #endif
  247. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  248. 0, 0, 0,
  249. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  250. #ifdef DGEMM_DEFAULT_UNROLL_MN
  251. DGEMM_DEFAULT_UNROLL_MN,
  252. #else
  253. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  254. #endif
  255. #endif
  256. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  257. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  258. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  259. dnrm2_kTS, dasum_kTS,
  260. #endif
  261. #if (BUILD_DOUBLE==1)
  262. dsum_kTS,
  263. #endif
  264. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  265. dcopy_kTS, ddot_kTS,
  266. #endif
  267. #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
  268. dsdot_kTS,
  269. #endif
  270. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  271. drot_kTS,
  272. drotm_kTS,
  273. daxpy_kTS,
  274. dscal_kTS,
  275. dswap_kTS,
  276. dgemv_nTS, dgemv_tTS,
  277. #endif
  278. #if (BUILD_DOUBLE==1)
  279. dger_kTS,
  280. dsymv_LTS, dsymv_UTS,
  281. #endif
  282. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  283. dgemm_kernelTS, dgemm_betaTS,
  284. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  285. dgemm_incopyTS, dgemm_itcopyTS,
  286. #else
  287. dgemm_oncopyTS, dgemm_otcopyTS,
  288. #endif
  289. dgemm_oncopyTS, dgemm_otcopyTS,
  290. #endif
  291. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  292. #ifdef SMALL_MATRIX_OPT
  293. dgemm_small_matrix_permitTS,
  294. dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
  295. dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
  296. #endif
  297. #endif
  298. #if (BUILD_DOUBLE==1)
  299. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  300. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  301. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  302. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  303. #else
  304. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  305. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  306. #endif
  307. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  308. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  309. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  310. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  311. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  312. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  313. #else
  314. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  315. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  316. #endif
  317. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  318. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  319. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  320. dsymm_iutcopyTS, dsymm_iltcopyTS,
  321. #else
  322. dsymm_outcopyTS, dsymm_oltcopyTS,
  323. #endif
  324. dsymm_outcopyTS, dsymm_oltcopyTS,
  325. #ifndef NO_LAPACK
  326. dneg_tcopyTS, dlaswp_ncopyTS,
  327. #else
  328. NULL, NULL,
  329. #endif
  330. #endif
  331. #ifdef EXPRECISION
  332. 0, 0, 0,
  333. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  334. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  335. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  336. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  337. qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  338. qgemv_nTS, qgemv_tTS, qger_kTS,
  339. qsymv_LTS, qsymv_UTS,
  340. qgemm_kernelTS, qgemm_betaTS,
  341. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  342. qgemm_incopyTS, qgemm_itcopyTS,
  343. #else
  344. qgemm_oncopyTS, qgemm_otcopyTS,
  345. #endif
  346. qgemm_oncopyTS, qgemm_otcopyTS,
  347. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  348. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  349. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  350. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  351. #else
  352. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  353. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  354. #endif
  355. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  356. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  357. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  358. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  359. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  360. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  361. #else
  362. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  363. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  364. #endif
  365. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  366. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  367. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  368. qsymm_iutcopyTS, qsymm_iltcopyTS,
  369. #else
  370. qsymm_outcopyTS, qsymm_oltcopyTS,
  371. #endif
  372. qsymm_outcopyTS, qsymm_oltcopyTS,
  373. #ifndef NO_LAPACK
  374. qneg_tcopyTS, qlaswp_ncopyTS,
  375. #else
  376. NULL, NULL,
  377. #endif
  378. #endif
  379. #if (BUILD_COMPLEX)
  380. 0, 0, 0,
  381. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  382. #ifdef CGEMM_DEFAULT_UNROLL_MN
  383. CGEMM_DEFAULT_UNROLL_MN,
  384. #else
  385. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  386. #endif
  387. #if (BUILD_COMPLEX)
  388. camax_kTS, camin_kTS,
  389. #endif
  390. #if (BUILD_COMPLEX)
  391. icamax_kTS,
  392. #endif
  393. #if (BUILD_COMPLEX)
  394. icamin_kTS,
  395. cnrm2_kTS, casum_kTS, csum_kTS,
  396. #endif
  397. #if (BUILD_COMPLEX)
  398. ccopy_kTS, cdotu_kTS, cdotc_kTS,
  399. #endif
  400. #if (BUILD_COMPLEX)
  401. csrot_kTS,
  402. #endif
  403. #if (BUILD_COMPLEX)
  404. caxpy_kTS,
  405. caxpyc_kTS,
  406. cscal_kTS,
  407. cswap_kTS,
  408. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  409. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  410. #endif
  411. #if (BUILD_COMPLEX)
  412. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  413. csymv_LTS, csymv_UTS,
  414. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  415. #endif
  416. #if (BUILD_COMPLEX)
  417. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  418. cgemm_betaTS,
  419. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  420. cgemm_incopyTS, cgemm_itcopyTS,
  421. #else
  422. cgemm_oncopyTS, cgemm_otcopyTS,
  423. #endif
  424. cgemm_oncopyTS, cgemm_otcopyTS,
  425. #ifdef SMALL_MATRIX_OPT
  426. cgemm_small_matrix_permitTS,
  427. cgemm_small_kernel_nnTS, cgemm_small_kernel_ntTS, cgemm_small_kernel_nrTS, cgemm_small_kernel_ncTS,
  428. cgemm_small_kernel_tnTS, cgemm_small_kernel_ttTS, cgemm_small_kernel_trTS, cgemm_small_kernel_tcTS,
  429. cgemm_small_kernel_rnTS, cgemm_small_kernel_rtTS, cgemm_small_kernel_rrTS, cgemm_small_kernel_rcTS,
  430. cgemm_small_kernel_cnTS, cgemm_small_kernel_ctTS, cgemm_small_kernel_crTS, cgemm_small_kernel_ccTS,
  431. cgemm_small_kernel_b0_nnTS, cgemm_small_kernel_b0_ntTS, cgemm_small_kernel_b0_nrTS, cgemm_small_kernel_b0_ncTS,
  432. cgemm_small_kernel_b0_tnTS, cgemm_small_kernel_b0_ttTS, cgemm_small_kernel_b0_trTS, cgemm_small_kernel_b0_tcTS,
  433. cgemm_small_kernel_b0_rnTS, cgemm_small_kernel_b0_rtTS, cgemm_small_kernel_b0_rrTS, cgemm_small_kernel_b0_rcTS,
  434. cgemm_small_kernel_b0_cnTS, cgemm_small_kernel_b0_ctTS, cgemm_small_kernel_b0_crTS, cgemm_small_kernel_b0_ccTS,
  435. #endif
  436. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  437. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  438. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  439. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  440. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  441. #else
  442. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  443. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  444. #endif
  445. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  446. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  447. #endif
  448. #endif
  449. #if (BUILD_COMPLEX)
  450. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  451. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  452. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  453. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  454. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  455. #else
  456. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  457. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  458. #endif
  459. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  460. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  461. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  462. csymm_iutcopyTS, csymm_iltcopyTS,
  463. #else
  464. csymm_outcopyTS, csymm_oltcopyTS,
  465. #endif
  466. csymm_outcopyTS, csymm_oltcopyTS,
  467. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  468. chemm_iutcopyTS, chemm_iltcopyTS,
  469. #else
  470. chemm_outcopyTS, chemm_oltcopyTS,
  471. #endif
  472. chemm_outcopyTS, chemm_oltcopyTS,
  473. 0, 0, 0,
  474. #if (USE_GEMM3M)
  475. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  476. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  477. #else
  478. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  479. #endif
  480. cgemm3m_kernelTS,
  481. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  482. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  483. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  484. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  485. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  486. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  487. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  488. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  489. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  490. csymm3m_oucopybTS, csymm3m_olcopybTS,
  491. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  492. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  493. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  494. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  495. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  496. chemm3m_oucopybTS, chemm3m_olcopybTS,
  497. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  498. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  499. #else
  500. 0, 0, 0,
  501. NULL,
  502. NULL, NULL,
  503. NULL, NULL,
  504. NULL, NULL,
  505. NULL, NULL,
  506. NULL, NULL,
  507. NULL, NULL,
  508. NULL, NULL,
  509. NULL, NULL,
  510. NULL, NULL,
  511. NULL, NULL,
  512. NULL, NULL,
  513. NULL, NULL,
  514. NULL, NULL,
  515. NULL, NULL,
  516. NULL, NULL,
  517. NULL, NULL,
  518. NULL, NULL,
  519. NULL, NULL,
  520. #endif
  521. #endif
  522. #if (BUILD_COMPLEX)
  523. #ifndef NO_LAPACK
  524. cneg_tcopyTS,
  525. claswp_ncopyTS,
  526. #else
  527. NULL, NULL,
  528. #endif
  529. #endif
  530. #if BUILD_COMPLEX16 == 1
  531. 0, 0, 0,
  532. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  533. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  534. ZGEMM_DEFAULT_UNROLL_MN,
  535. #else
  536. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  537. #endif
  538. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  539. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  540. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  541. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  542. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  543. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  544. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  545. zsymv_LTS, zsymv_UTS,
  546. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  547. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  548. zgemm_betaTS,
  549. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  550. zgemm_incopyTS, zgemm_itcopyTS,
  551. #else
  552. zgemm_oncopyTS, zgemm_otcopyTS,
  553. #endif
  554. zgemm_oncopyTS, zgemm_otcopyTS,
  555. #ifdef SMALL_MATRIX_OPT
  556. zgemm_small_matrix_permitTS,
  557. zgemm_small_kernel_nnTS, zgemm_small_kernel_ntTS, zgemm_small_kernel_nrTS, zgemm_small_kernel_ncTS,
  558. zgemm_small_kernel_tnTS, zgemm_small_kernel_ttTS, zgemm_small_kernel_trTS, zgemm_small_kernel_tcTS,
  559. zgemm_small_kernel_rnTS, zgemm_small_kernel_rtTS, zgemm_small_kernel_rrTS, zgemm_small_kernel_rcTS,
  560. zgemm_small_kernel_cnTS, zgemm_small_kernel_ctTS, zgemm_small_kernel_crTS, zgemm_small_kernel_ccTS,
  561. zgemm_small_kernel_b0_nnTS, zgemm_small_kernel_b0_ntTS, zgemm_small_kernel_b0_nrTS, zgemm_small_kernel_b0_ncTS,
  562. zgemm_small_kernel_b0_tnTS, zgemm_small_kernel_b0_ttTS, zgemm_small_kernel_b0_trTS, zgemm_small_kernel_b0_tcTS,
  563. zgemm_small_kernel_b0_rnTS, zgemm_small_kernel_b0_rtTS, zgemm_small_kernel_b0_rrTS, zgemm_small_kernel_b0_rcTS,
  564. zgemm_small_kernel_b0_cnTS, zgemm_small_kernel_b0_ctTS, zgemm_small_kernel_b0_crTS, zgemm_small_kernel_b0_ccTS,
  565. #endif
  566. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  567. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  568. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  569. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  570. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  571. #else
  572. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  573. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  574. #endif
  575. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  576. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  577. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  578. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  579. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  580. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  581. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  582. #else
  583. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  584. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  585. #endif
  586. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  587. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  588. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  589. zsymm_iutcopyTS, zsymm_iltcopyTS,
  590. #else
  591. zsymm_outcopyTS, zsymm_oltcopyTS,
  592. #endif
  593. zsymm_outcopyTS, zsymm_oltcopyTS,
  594. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  595. zhemm_iutcopyTS, zhemm_iltcopyTS,
  596. #else
  597. zhemm_outcopyTS, zhemm_oltcopyTS,
  598. #endif
  599. zhemm_outcopyTS, zhemm_oltcopyTS,
  600. 0, 0, 0,
  601. #if (USE_GEMM3M)
  602. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  603. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  604. #else
  605. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  606. #endif
  607. zgemm3m_kernelTS,
  608. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  609. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  610. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  611. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  612. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  613. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  614. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  615. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  616. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  617. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  618. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  619. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  620. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  621. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  622. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  623. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  624. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  625. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  626. #else
  627. 0, 0, 0,
  628. NULL,
  629. NULL, NULL,
  630. NULL, NULL,
  631. NULL, NULL,
  632. NULL, NULL,
  633. NULL, NULL,
  634. NULL, NULL,
  635. NULL, NULL,
  636. NULL, NULL,
  637. NULL, NULL,
  638. NULL, NULL,
  639. NULL, NULL,
  640. NULL, NULL,
  641. NULL, NULL,
  642. NULL, NULL,
  643. NULL, NULL,
  644. NULL, NULL,
  645. NULL, NULL,
  646. NULL, NULL,
  647. #endif
  648. #ifndef NO_LAPACK
  649. zneg_tcopyTS, zlaswp_ncopyTS,
  650. #else
  651. NULL, NULL,
  652. #endif
  653. #endif
  654. #ifdef EXPRECISION
  655. 0, 0, 0,
  656. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  657. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  658. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  659. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  660. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  661. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  662. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  663. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  664. xsymv_LTS, xsymv_UTS,
  665. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  666. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  667. xgemm_betaTS,
  668. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  669. xgemm_incopyTS, xgemm_itcopyTS,
  670. #else
  671. xgemm_oncopyTS, xgemm_otcopyTS,
  672. #endif
  673. xgemm_oncopyTS, xgemm_otcopyTS,
  674. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  675. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  676. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  677. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  678. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  679. #else
  680. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  681. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  682. #endif
  683. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  684. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  685. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  686. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  687. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  688. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  689. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  690. #else
  691. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  692. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  693. #endif
  694. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  695. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  696. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  697. xsymm_iutcopyTS, xsymm_iltcopyTS,
  698. #else
  699. xsymm_outcopyTS, xsymm_oltcopyTS,
  700. #endif
  701. xsymm_outcopyTS, xsymm_oltcopyTS,
  702. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  703. xhemm_iutcopyTS, xhemm_iltcopyTS,
  704. #else
  705. xhemm_outcopyTS, xhemm_oltcopyTS,
  706. #endif
  707. xhemm_outcopyTS, xhemm_oltcopyTS,
  708. 0, 0, 0,
  709. #if (USE_GEMM3M)
  710. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  711. xgemm3m_kernelTS,
  712. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  713. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  714. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  715. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  716. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  717. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  718. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  719. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  720. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  721. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  722. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  723. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  724. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  725. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  726. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  727. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  728. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  729. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  730. #else
  731. 0, 0, 0,
  732. NULL,
  733. NULL, NULL,
  734. NULL, NULL,
  735. NULL, NULL,
  736. NULL, NULL,
  737. NULL, NULL,
  738. NULL, NULL,
  739. NULL, NULL,
  740. NULL, NULL,
  741. NULL, NULL,
  742. NULL, NULL,
  743. NULL, NULL,
  744. NULL, NULL,
  745. NULL, NULL,
  746. NULL, NULL,
  747. NULL, NULL,
  748. NULL, NULL,
  749. NULL, NULL,
  750. NULL, NULL,
  751. #endif
  752. #ifndef NO_LAPACK
  753. xneg_tcopyTS, xlaswp_ncopyTS,
  754. #else
  755. NULL, NULL,
  756. #endif
  757. #endif
  758. init_parameter,
  759. SNUMOPT, DNUMOPT, QNUMOPT,
  760. #if BUILD_SINGLE == 1
  761. saxpby_kTS,
  762. #endif
  763. #if BUILD_DOUBLE == 1
  764. daxpby_kTS,
  765. #endif
  766. #if BUILD_COMPLEX == 1
  767. caxpby_kTS,
  768. #endif
  769. #if BUILD_COMPLEX16== 1
  770. zaxpby_kTS,
  771. #endif
  772. #if BUILD_SINGLE == 1
  773. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  774. #endif
  775. #if BUILD_DOUBLE== 1
  776. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  777. #endif
  778. #if BUILD_COMPLEX == 1
  779. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  780. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  781. #endif
  782. #if BUILD_COMPLEX16 == 1
  783. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  784. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  785. #endif
  786. #if BUILD_SINGLE == 1
  787. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  788. #endif
  789. #if BUILD_DOUBLE== 1
  790. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  791. #endif
  792. #if BUILD_COMPLEX== 1
  793. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  794. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  795. #endif
  796. #if BUILD_COMPLEX16==1
  797. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  798. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  799. #endif
  800. #if BUILD_SINGLE == 1
  801. sgeadd_kTS,
  802. #endif
  803. #if BUILD_DOUBLE==1
  804. dgeadd_kTS,
  805. #endif
  806. #if BUILD_COMPLEX==1
  807. cgeadd_kTS,
  808. #endif
  809. #if BUILD_COMPLEX16==1
  810. zgeadd_kTS,
  811. #endif
  812. };
  813. #if (ARCH_ARM64)
  814. static void init_parameter(void) {
  815. #if (BUILD_BFLOAT16)
  816. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  817. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  818. #endif
  819. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  820. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  821. #endif
  822. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  823. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  824. #endif
  825. #if BUILD_COMPLEX==1
  826. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  827. #endif
  828. #if BUILD_COMPLEX16==1
  829. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  830. #endif
  831. #if (BUILD_BFLOAT16)
  832. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  833. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  834. #endif
  835. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  836. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  837. #endif
  838. #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
  839. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  840. #endif
  841. #if BUILD_COMPLEX== 1
  842. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  843. #endif
  844. #if BUILD_COMPLEX16==1
  845. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  846. #endif
  847. #if (BUILD_BFLOAT16)
  848. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  849. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  850. #endif
  851. #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
  852. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  853. #endif
  854. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  855. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  856. #endif
  857. #if BUILD_COMPLEX==1
  858. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  859. #endif
  860. #if BUILD_COMPLEX16==1
  861. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  862. #endif
  863. #ifdef EXPRECISION
  864. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  865. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  866. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  867. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  868. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  869. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  870. #endif
  871. #if (USE_GEMM3M)
  872. #ifdef CGEMM3M_DEFAULT_P
  873. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  874. #else
  875. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  876. #endif
  877. #ifdef ZGEMM3M_DEFAULT_P
  878. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  879. #else
  880. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  881. #endif
  882. #ifdef CGEMM3M_DEFAULT_Q
  883. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  884. #else
  885. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  886. #endif
  887. #ifdef ZGEMM3M_DEFAULT_Q
  888. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  889. #else
  890. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  891. #endif
  892. #ifdef CGEMM3M_DEFAULT_R
  893. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  894. #else
  895. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  896. #endif
  897. #ifdef ZGEMM3M_DEFAULT_R
  898. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  899. #else
  900. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  901. #endif
  902. #ifdef EXPRECISION
  903. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  904. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  905. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  906. #endif
  907. #endif
  908. }
  909. #else // (ARCH_ARM64)
  910. #if defined(ARCH_MIPS64)
  911. static void init_parameter(void) {
  912. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  913. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  914. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  915. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  916. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  917. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  918. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  919. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  920. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  921. TABLE_NAME.dgemm_r = 640;
  922. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  923. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  924. #ifdef EXPRECISION
  925. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  926. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  927. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  928. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  929. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  930. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  931. #endif
  932. #if defined(USE_GEMM3M)
  933. #ifdef CGEMM3M_DEFAULT_P
  934. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  935. #else
  936. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  937. #endif
  938. #ifdef ZGEMM3M_DEFAULT_P
  939. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  940. #else
  941. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  942. #endif
  943. #ifdef CGEMM3M_DEFAULT_Q
  944. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  945. #else
  946. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  947. #endif
  948. #ifdef ZGEMM3M_DEFAULT_Q
  949. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  950. #else
  951. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  952. #endif
  953. #ifdef CGEMM3M_DEFAULT_R
  954. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  955. #else
  956. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  957. #endif
  958. #ifdef ZGEMM3M_DEFAULT_R
  959. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  960. #else
  961. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  962. #endif
  963. #ifdef EXPRECISION
  964. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  965. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  966. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  967. #endif
  968. #endif
  969. }
  970. #else // (ARCH_MIPS64)
  971. #if (ARCH_LOONGARCH64)
  972. static int get_L3_size() {
  973. int ret = 0, id = 0x14;
  974. __asm__ volatile (
  975. "cpucfg %[ret], %[id]"
  976. : [ret]"=r"(ret)
  977. : [id]"r"(id)
  978. : "memory"
  979. );
  980. return ((ret & 0xffff) + 1) * pow(2, ((ret >> 16) & 0xff)) * pow(2, ((ret >> 24) & 0x7f)) / 1024 / 1024; // MB
  981. }
  982. static void init_parameter(void) {
  983. #ifdef BUILD_BFLOAT16
  984. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  985. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  986. #endif
  987. #ifdef BUILD_BFLOAT16
  988. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  989. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  990. #endif
  991. #if defined(LA464)
  992. int L3_size = get_L3_size();
  993. #ifdef SMP
  994. if(blas_num_threads == 1){
  995. #endif
  996. //single thread
  997. if (L3_size == 32){ // 3C5000 and 3D5000
  998. TABLE_NAME.sgemm_p = 256;
  999. TABLE_NAME.sgemm_q = 384;
  1000. TABLE_NAME.sgemm_r = 8192;
  1001. TABLE_NAME.dgemm_p = 112;
  1002. TABLE_NAME.dgemm_q = 289;
  1003. TABLE_NAME.dgemm_r = 4096;
  1004. TABLE_NAME.cgemm_p = 128;
  1005. TABLE_NAME.cgemm_q = 256;
  1006. TABLE_NAME.cgemm_r = 4096;
  1007. TABLE_NAME.zgemm_p = 128;
  1008. TABLE_NAME.zgemm_q = 128;
  1009. TABLE_NAME.zgemm_r = 2048;
  1010. } else { // 3A5000 and 3C5000L
  1011. TABLE_NAME.sgemm_p = 256;
  1012. TABLE_NAME.sgemm_q = 384;
  1013. TABLE_NAME.sgemm_r = 4096;
  1014. TABLE_NAME.dgemm_p = 112;
  1015. TABLE_NAME.dgemm_q = 300;
  1016. TABLE_NAME.dgemm_r = 3024;
  1017. TABLE_NAME.cgemm_p = 128;
  1018. TABLE_NAME.cgemm_q = 256;
  1019. TABLE_NAME.cgemm_r = 2048;
  1020. TABLE_NAME.zgemm_p = 128;
  1021. TABLE_NAME.zgemm_q = 128;
  1022. TABLE_NAME.zgemm_r = 1024;
  1023. }
  1024. #ifdef SMP
  1025. }else{
  1026. //multi thread
  1027. if (L3_size == 32){ // 3C5000 and 3D5000
  1028. TABLE_NAME.sgemm_p = 256;
  1029. TABLE_NAME.sgemm_q = 384;
  1030. TABLE_NAME.sgemm_r = 1024;
  1031. TABLE_NAME.dgemm_p = 112;
  1032. TABLE_NAME.dgemm_q = 289;
  1033. TABLE_NAME.dgemm_r = 342;
  1034. TABLE_NAME.cgemm_p = 128;
  1035. TABLE_NAME.cgemm_q = 256;
  1036. TABLE_NAME.cgemm_r = 512;
  1037. TABLE_NAME.zgemm_p = 128;
  1038. TABLE_NAME.zgemm_q = 128;
  1039. TABLE_NAME.zgemm_r = 512;
  1040. } else { // 3A5000 and 3C5000L
  1041. TABLE_NAME.sgemm_p = 256;
  1042. TABLE_NAME.sgemm_q = 384;
  1043. TABLE_NAME.sgemm_r = 2048;
  1044. TABLE_NAME.dgemm_p = 112;
  1045. TABLE_NAME.dgemm_q = 300;
  1046. TABLE_NAME.dgemm_r = 738;
  1047. TABLE_NAME.cgemm_p = 128;
  1048. TABLE_NAME.cgemm_q = 256;
  1049. TABLE_NAME.cgemm_r = 1024;
  1050. TABLE_NAME.zgemm_p = 128;
  1051. TABLE_NAME.zgemm_q = 128;
  1052. TABLE_NAME.zgemm_r = 1024;
  1053. }
  1054. }
  1055. #endif
  1056. #else
  1057. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1058. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1059. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1060. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1061. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1062. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1063. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1064. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1065. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1066. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1067. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1068. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1069. #endif
  1070. #ifdef BUILD_BFLOAT16
  1071. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1072. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1073. #endif
  1074. }
  1075. #else // (ARCH_LOONGARCH64)
  1076. #if (ARCH_POWER)
  1077. static void init_parameter(void) {
  1078. #ifdef BUILD_BFLOAT16
  1079. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1080. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1081. #endif
  1082. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1083. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1084. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1085. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1086. #ifdef BUILD_BFLOAT16
  1087. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1088. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1089. #endif
  1090. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1091. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1092. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1093. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1094. #ifdef BUILD_BFLOAT16
  1095. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1096. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1097. #endif
  1098. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1099. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1100. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1101. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1102. }
  1103. #else //POWER
  1104. #if (ARCH_ZARCH)
  1105. static void init_parameter(void) {
  1106. #ifdef BUILD_BFLOAT16
  1107. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1108. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1109. #endif
  1110. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1111. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1112. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1113. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1114. #ifdef BUILD_BFLOAT16
  1115. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1116. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1117. #endif
  1118. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1119. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1120. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1121. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1122. #ifdef BUILD_BFLOAT16
  1123. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1124. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1125. #endif
  1126. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1127. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1128. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1129. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1130. }
  1131. #else //ZARCH
  1132. #if (ARCH_RISCV64)
  1133. static void init_parameter(void) {
  1134. #ifdef BUILD_BFLOAT16
  1135. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1136. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1137. #endif
  1138. #ifdef BUILD_HFLOAT16
  1139. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1140. #endif
  1141. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1142. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1143. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1144. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1145. #ifdef BUILD_BFLOAT16
  1146. TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
  1147. TABLE_NAME.bgemm_r = BGEMM_DEFAULT_R;
  1148. #endif
  1149. #ifdef BUILD_HFLOAT16
  1150. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  1151. #endif
  1152. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  1153. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  1154. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  1155. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  1156. #ifdef BUILD_BFLOAT16
  1157. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1158. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1159. #endif
  1160. #ifdef BUILD_HFLOAT16
  1161. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1162. #endif
  1163. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1164. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1165. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1166. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1167. }
  1168. #else //RISCV64
  1169. #ifdef ARCH_X86
  1170. static int get_l2_size_old(void){
  1171. int i, eax, ebx, ecx, edx, cpuid_level;
  1172. int info[15];
  1173. cpuid(2, &eax, &ebx, &ecx, &edx);
  1174. info[ 0] = BITMASK(eax, 8, 0xff);
  1175. info[ 1] = BITMASK(eax, 16, 0xff);
  1176. info[ 2] = BITMASK(eax, 24, 0xff);
  1177. info[ 3] = BITMASK(ebx, 0, 0xff);
  1178. info[ 4] = BITMASK(ebx, 8, 0xff);
  1179. info[ 5] = BITMASK(ebx, 16, 0xff);
  1180. info[ 6] = BITMASK(ebx, 24, 0xff);
  1181. info[ 7] = BITMASK(ecx, 0, 0xff);
  1182. info[ 8] = BITMASK(ecx, 8, 0xff);
  1183. info[ 9] = BITMASK(ecx, 16, 0xff);
  1184. info[10] = BITMASK(ecx, 24, 0xff);
  1185. info[11] = BITMASK(edx, 0, 0xff);
  1186. info[12] = BITMASK(edx, 8, 0xff);
  1187. info[13] = BITMASK(edx, 16, 0xff);
  1188. info[14] = BITMASK(edx, 24, 0xff);
  1189. for (i = 0; i < 15; i++){
  1190. switch (info[i]){
  1191. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  1192. case 0x1a :
  1193. return 96;
  1194. case 0x39 :
  1195. case 0x3b :
  1196. case 0x41 :
  1197. case 0x79 :
  1198. case 0x81 :
  1199. return 128;
  1200. case 0x3a :
  1201. return 192;
  1202. case 0x21 :
  1203. case 0x3c :
  1204. case 0x42 :
  1205. case 0x7a :
  1206. case 0x7e :
  1207. case 0x82 :
  1208. return 256;
  1209. case 0x3d :
  1210. return 384;
  1211. case 0x3e :
  1212. case 0x43 :
  1213. case 0x7b :
  1214. case 0x7f :
  1215. case 0x83 :
  1216. case 0x86 :
  1217. return 512;
  1218. case 0x44 :
  1219. case 0x78 :
  1220. case 0x7c :
  1221. case 0x84 :
  1222. case 0x87 :
  1223. return 1024;
  1224. case 0x45 :
  1225. case 0x7d :
  1226. case 0x85 :
  1227. return 2048;
  1228. case 0x48 :
  1229. return 3184;
  1230. case 0x49 :
  1231. return 4096;
  1232. case 0x4e :
  1233. return 6144;
  1234. }
  1235. }
  1236. // return 0;
  1237. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1238. return 256;
  1239. }
  1240. #endif
  1241. static __inline__ int get_l2_size(void){
  1242. int eax, ebx, ecx, edx, l2;
  1243. l2 = readenv_atoi("OPENBLAS_L2_SIZE");
  1244. if (l2 != 0)
  1245. return l2;
  1246. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1247. l2 = BITMASK(ecx, 16, 0xffff);
  1248. #ifndef ARCH_X86
  1249. if (l2 <= 0) {
  1250. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  1251. return 256;
  1252. }
  1253. return l2;
  1254. #else
  1255. if (l2 > 0) return l2;
  1256. return get_l2_size_old();
  1257. #endif
  1258. }
  1259. static __inline__ int get_l3_size(void){
  1260. int eax, ebx, ecx, edx;
  1261. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  1262. return BITMASK(edx, 18, 0x3fff) * 512;
  1263. }
  1264. static void init_parameter(void) {
  1265. int l2 = get_l2_size();
  1266. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  1267. /* where the GEMM unrolling parameters do not depend on l2 */
  1268. #ifdef BUILD_BFLOAT16
  1269. TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
  1270. TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
  1271. TABLE_NAME.bgemm_p = BGEMM_DEFAULT_P;
  1272. TABLE_NAME.bgemm_q = BGEMM_DEFAULT_Q;
  1273. #endif
  1274. #ifdef BUILD_HFLOAT16
  1275. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  1276. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  1277. #endif
  1278. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1279. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  1280. #endif
  1281. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1282. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  1283. #endif
  1284. #if BUILD_COMPLEX == 1
  1285. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  1286. #endif
  1287. #if BUILD_COMPLEX16==1
  1288. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  1289. #endif
  1290. #if BUILD_COMPLEX == 1
  1291. #ifdef CGEMM3M_DEFAULT_Q
  1292. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  1293. #else
  1294. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  1295. #endif
  1296. #endif
  1297. #if BUILD_COMPLEX16 == 1
  1298. #ifdef ZGEMM3M_DEFAULT_Q
  1299. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  1300. #else
  1301. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  1302. #endif
  1303. #endif
  1304. #ifdef EXPRECISION
  1305. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  1306. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  1307. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  1308. #endif
  1309. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  1310. #ifdef DEBUG
  1311. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  1312. #endif
  1313. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1314. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  1315. #endif
  1316. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1317. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  1318. #endif
  1319. #if BUILD_COMPLEX==1
  1320. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  1321. #endif
  1322. #if BUILD_COMPLEX16==1
  1323. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  1324. #endif
  1325. #ifdef EXPRECISION
  1326. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  1327. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  1328. #endif
  1329. #endif
  1330. #ifdef CORE_NORTHWOOD
  1331. #ifdef DEBUG
  1332. fprintf(stderr, "Northwood\n");
  1333. #endif
  1334. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1335. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  1336. #endif
  1337. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1338. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  1339. #endif
  1340. #if BUILD_COMPLEX==1
  1341. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  1342. #endif
  1343. #if BUILD_COMPLEX16==1
  1344. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  1345. #endif
  1346. #ifdef EXPRECISION
  1347. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  1348. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  1349. #endif
  1350. #endif
  1351. #ifdef ATOM
  1352. #ifdef DEBUG
  1353. fprintf(stderr, "Atom\n");
  1354. #endif
  1355. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1356. TABLE_NAME.sgemm_p = 256;
  1357. #endif
  1358. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1359. TABLE_NAME.dgemm_p = 128;
  1360. #endif
  1361. #if BUILD_COMPLEX==1
  1362. TABLE_NAME.cgemm_p = 128;
  1363. #endif
  1364. #if BUILD_COMPLEX16==1
  1365. TABLE_NAME.zgemm_p = 64;
  1366. #endif
  1367. #ifdef EXPRECISION
  1368. TABLE_NAME.qgemm_p = 64;
  1369. TABLE_NAME.xgemm_p = 32;
  1370. #endif
  1371. #endif
  1372. #ifdef CORE_PRESCOTT
  1373. #ifdef DEBUG
  1374. fprintf(stderr, "Prescott\n");
  1375. #endif
  1376. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1377. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  1378. #endif
  1379. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1380. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  1381. #endif
  1382. #if BUILD_COMPLEX==1
  1383. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  1384. #endif
  1385. #if BUILD_COMPLEX16 == 1
  1386. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  1387. #endif
  1388. #ifdef EXPRECISION
  1389. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  1390. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  1391. #endif
  1392. #endif
  1393. #ifdef CORE2
  1394. #ifdef DEBUG
  1395. fprintf(stderr, "Core2\n");
  1396. #endif
  1397. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1398. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  1399. #endif
  1400. #if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
  1401. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  1402. #endif
  1403. #if BUILD_COMPLEX==1
  1404. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  1405. #endif
  1406. #if BUILD_COMPLEX16==1
  1407. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  1408. #endif
  1409. #ifdef EXPRECISION
  1410. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  1411. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  1412. #endif
  1413. #endif
  1414. #ifdef PENRYN
  1415. #ifdef DEBUG
  1416. fprintf(stderr, "Penryn\n");
  1417. #endif
  1418. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1419. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1420. #endif
  1421. #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
  1422. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1423. #endif
  1424. #if BUILD_COMPLEX==1
  1425. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1426. #endif
  1427. #if BUILD_COMPLEX16==1
  1428. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1429. #endif
  1430. #ifdef EXPRECISION
  1431. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1432. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1433. #endif
  1434. #endif
  1435. #ifdef DUNNINGTON
  1436. #ifdef DEBUG
  1437. fprintf(stderr, "Dunnington\n");
  1438. #endif
  1439. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1440. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  1441. #endif
  1442. #if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
  1443. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  1444. #endif
  1445. #if BUILD_COMPLEX==1
  1446. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  1447. #endif
  1448. #if BUILD_COMPLEX16==1
  1449. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  1450. #endif
  1451. #ifdef EXPRECISION
  1452. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  1453. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  1454. #endif
  1455. #endif
  1456. #ifdef NEHALEM
  1457. #ifdef DEBUG
  1458. fprintf(stderr, "Nehalem\n");
  1459. #endif
  1460. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1461. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1462. #endif
  1463. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1464. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1465. #endif
  1466. #if BUILD_COMPLEX
  1467. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1468. #endif
  1469. #if BUILD_COMPLEX16
  1470. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1471. #endif
  1472. #ifdef EXPRECISION
  1473. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1474. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1475. #endif
  1476. #endif
  1477. #ifdef SANDYBRIDGE
  1478. #ifdef DEBUG
  1479. fprintf(stderr, "Sandybridge\n");
  1480. #endif
  1481. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1482. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1483. #endif
  1484. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1485. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1486. #endif
  1487. #if BUILD_COMPLEX
  1488. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1489. #endif
  1490. #if BUILD_COMPLEX16
  1491. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1492. #endif
  1493. #ifdef EXPRECISION
  1494. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1495. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1496. #endif
  1497. #endif
  1498. #ifdef HASWELL
  1499. #ifdef DEBUG
  1500. fprintf(stderr, "Haswell\n");
  1501. #endif
  1502. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1503. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1504. #endif
  1505. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
  1506. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1507. #endif
  1508. #if BUILD_COMPLEX
  1509. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1510. #endif
  1511. #if BUILD_COMPLEX16
  1512. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1513. #endif
  1514. #ifdef EXPRECISION
  1515. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1516. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1517. #endif
  1518. #endif
  1519. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  1520. #ifdef DEBUG
  1521. fprintf(stderr, "SkylakeX\n");
  1522. #endif
  1523. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1524. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1525. #endif
  1526. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1527. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1528. #endif
  1529. #if BUILD_COMPLEX
  1530. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1531. #endif
  1532. #if BUILD_COMPLEX16
  1533. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1534. #endif
  1535. #ifdef EXPRECISION
  1536. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1537. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1538. #endif
  1539. #endif
  1540. #ifdef OPTERON
  1541. #ifdef DEBUG
  1542. fprintf(stderr, "Opteron\n");
  1543. #endif
  1544. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1545. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  1546. #endif
  1547. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1548. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  1549. #endif
  1550. #if BUILD_COMPLEX
  1551. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  1552. #endif
  1553. #if BUILD_COMPLEX16
  1554. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  1555. #endif
  1556. #ifdef EXPRECISION
  1557. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  1558. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1559. #endif
  1560. #endif
  1561. #ifdef BARCELONA
  1562. #ifdef DEBUG
  1563. fprintf(stderr, "Barcelona\n");
  1564. #endif
  1565. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1566. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1567. #endif
  1568. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1569. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1570. #endif
  1571. #if BUILD_COMPLEX
  1572. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1573. #endif
  1574. #if BUILD_COMPLEX16
  1575. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1576. #endif
  1577. #ifdef EXPRECISION
  1578. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1579. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1580. #endif
  1581. #endif
  1582. #ifdef BOBCAT
  1583. #ifdef DEBUG
  1584. fprintf(stderr, "Bobcate\n");
  1585. #endif
  1586. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1587. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1588. #endif
  1589. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1590. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1591. #endif
  1592. #if BUILD_COMPLEX
  1593. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1594. #endif
  1595. #if BUILD_COMPLEX16
  1596. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1597. #endif
  1598. #ifdef EXPRECISION
  1599. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1600. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1601. #endif
  1602. #endif
  1603. #ifdef BULLDOZER
  1604. #ifdef DEBUG
  1605. fprintf(stderr, "Bulldozer\n");
  1606. #endif
  1607. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1608. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1609. #endif
  1610. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1611. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1612. #endif
  1613. #if BUILD_COMPLEX
  1614. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1615. #endif
  1616. #if BUILD_COMPLEX16
  1617. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1618. #endif
  1619. #ifdef EXPRECISION
  1620. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1621. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1622. #endif
  1623. #endif
  1624. #ifdef EXCAVATOR
  1625. #ifdef DEBUG
  1626. fprintf(stderr, "Excavator\n");
  1627. #endif
  1628. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1629. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1630. #endif
  1631. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1632. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1633. #endif
  1634. #if BUILD_COMPLEX
  1635. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1636. #endif
  1637. #if BUILD_COMPLEX16
  1638. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1639. #endif
  1640. #ifdef EXPRECISION
  1641. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1642. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1643. #endif
  1644. #endif
  1645. #ifdef PILEDRIVER
  1646. #ifdef DEBUG
  1647. fprintf(stderr, "Piledriver\n");
  1648. #endif
  1649. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1650. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1651. #endif
  1652. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1653. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1654. #endif
  1655. #if BUILD_COMPLEX
  1656. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1657. #endif
  1658. #if BUILD_COMPLEX16
  1659. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1660. #endif
  1661. #ifdef EXPRECISION
  1662. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1663. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1664. #endif
  1665. #endif
  1666. #ifdef STEAMROLLER
  1667. #ifdef DEBUG
  1668. fprintf(stderr, "Steamroller\n");
  1669. #endif
  1670. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1671. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1672. #endif
  1673. #if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
  1674. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1675. #endif
  1676. #if BUILD_COMPLEX
  1677. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1678. #endif
  1679. #if BUILD_COMPLEX16
  1680. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1681. #endif
  1682. #ifdef EXPRECISION
  1683. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1684. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1685. #endif
  1686. #endif
  1687. #ifdef ZEN
  1688. #ifdef DEBUG
  1689. fprintf(stderr, "Zen\n");
  1690. #endif
  1691. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1692. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1693. #endif
  1694. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1695. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1696. #endif
  1697. #if BUILD_COMPLEX
  1698. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1699. #endif
  1700. #if BUILD_COMPLEX16
  1701. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1702. #endif
  1703. #ifdef EXPRECISION
  1704. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1705. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1706. #endif
  1707. #endif
  1708. #ifdef NANO
  1709. #ifdef DEBUG
  1710. fprintf(stderr, "NANO\n");
  1711. #endif
  1712. #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
  1713. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1714. #endif
  1715. #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
  1716. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1717. #endif
  1718. #if (BUILD_COMPLEX==1)
  1719. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1720. #endif
  1721. #if (BUILD_COMPLEX16==1)
  1722. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1723. #endif
  1724. #ifdef EXPRECISION
  1725. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1726. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1727. #endif
  1728. #endif
  1729. #ifdef SAPPHIRERAPIDS
  1730. #if (BUILD_BFLOAT16 == 1)
  1731. TABLE_NAME.need_amxtile_permission = 1;
  1732. #endif
  1733. #endif
  1734. #if BUILD_COMPLEX==1
  1735. #ifdef CGEMM3M_DEFAULT_P
  1736. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1737. #else
  1738. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1739. #endif
  1740. #endif
  1741. #if BUILD_COMPLEX16==1
  1742. #ifdef ZGEMM3M_DEFAULT_P
  1743. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1744. #else
  1745. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1746. #endif
  1747. #endif
  1748. #ifdef EXPRECISION
  1749. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1750. #endif
  1751. #if BUILD_SINGLE == 1
  1752. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1753. #endif
  1754. #if BUILD_DOUBLE== 1
  1755. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1756. #endif
  1757. #if BUILD_COMPLEX==1
  1758. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1759. #endif
  1760. #if BUILD_COMPLEX16==1
  1761. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1762. #endif
  1763. #if BUILD_COMPLEX==1
  1764. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1765. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1766. #else
  1767. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1768. #endif
  1769. #endif
  1770. #if BUILD_COMPLEX16==1
  1771. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1772. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1773. #else
  1774. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1775. #endif
  1776. #endif
  1777. #ifdef QUAD_PRECISION
  1778. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1779. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1780. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1781. #endif
  1782. #ifdef DEBUG
  1783. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1784. #endif
  1785. #if BUILD_BFLOAT16==1
  1786. TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
  1787. ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
  1788. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1789. ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
  1790. TABLE_NAME.bgemm_r = (((BUFFER_SIZE -
  1791. ((TABLE_NAME.bgemm_p * TABLE_NAME.bgemm_q * 4 + TABLE_NAME.offsetA
  1792. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1793. ) / (TABLE_NAME.bgemm_q * 4) - 15) & ~15);
  1794. #endif
  1795. #if BUILD_HFLOAT16==1
  1796. TABLE_NAME.shgemm_r = (((BUFFER_SIZE -
  1797. ((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA
  1798. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1799. ) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15);
  1800. #endif
  1801. #if BUILD_SINGLE==1
  1802. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1803. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1804. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1805. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1806. #endif
  1807. #if BUILD_DOUBLE==1
  1808. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1809. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1810. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1811. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1812. #endif
  1813. #ifdef EXPRECISION
  1814. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1815. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1816. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1817. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1818. #endif
  1819. #if BUILD_COMPLEX ==1
  1820. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1821. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1822. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1823. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1824. #endif
  1825. #if BUILD_COMPLEX16 ==1
  1826. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1827. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1828. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1829. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1830. #endif
  1831. #if BUILD_COMPLEX == 1
  1832. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1833. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1834. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1835. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1836. #endif
  1837. #if BUILD_COMPLEX16 == 1
  1838. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1839. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1840. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1841. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1842. #endif
  1843. #ifdef EXPRECISION
  1844. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1845. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1846. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1847. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1848. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1849. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1850. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1851. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1852. #endif
  1853. }
  1854. #endif //RISCV64
  1855. #endif //POWER
  1856. #endif //ZARCH
  1857. #endif //(ARCH_LOONGARCH64)
  1858. #endif //(ARCH_MIPS64)
  1859. #endif //(ARCH_ARM64)