You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

setparam-ref.c 42 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
11 years ago
11 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include <string.h>
  40. #include "common.h"
  41. #ifdef BUILD_KERNEL
  42. #include "kernelTS.h"
  43. #endif
  44. #undef DEBUG
  45. static void init_parameter(void);
  46. gotoblas_t TABLE_NAME = {
  47. DTB_DEFAULT_ENTRIES ,
  48. GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
  49. #ifdef BUILD_HALF
  50. 0, 0, 0,
  51. SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
  52. #ifdef SHGEMM_DEFAULT_UNROLL_MN
  53. SHGEMM_DEFAULT_UNROLL_MN,
  54. #else
  55. MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
  56. #endif
  57. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  58. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  59. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  60. dsdot_kTS,
  61. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  62. sgemv_nTS, sgemv_tTS, sger_kTS,
  63. ssymv_LTS, ssymv_UTS,
  64. shgemm_kernelTS, shgemm_betaTS,
  65. #if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
  66. shgemm_incopyTS, shgemm_itcopyTS,
  67. #else
  68. shgemm_oncopyTS, shgemm_otcopyTS,
  69. #endif
  70. shgemm_oncopyTS, shgemm_otcopyTS,
  71. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  72. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  73. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  74. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  75. #else
  76. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  77. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  78. #endif
  79. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  80. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  81. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  82. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  83. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  84. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  85. #else
  86. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  87. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  88. #endif
  89. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  90. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  91. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  92. ssymm_iutcopyTS, ssymm_iltcopyTS,
  93. #else
  94. ssymm_outcopyTS, ssymm_oltcopyTS,
  95. #endif
  96. ssymm_outcopyTS, ssymm_oltcopyTS,
  97. #ifndef NO_LAPACK
  98. sneg_tcopyTS, slaswp_ncopyTS,
  99. #else
  100. NULL,NULL,
  101. #endif
  102. #endif
  103. 0, 0, 0,
  104. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
  105. #ifdef SGEMM_DEFAULT_UNROLL_MN
  106. SGEMM_DEFAULT_UNROLL_MN,
  107. #else
  108. MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  109. #endif
  110. #ifdef HAVE_EXCLUSIVE_CACHE
  111. 1,
  112. #else
  113. 0,
  114. #endif
  115. samax_kTS, samin_kTS, smax_kTS, smin_kTS,
  116. isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
  117. snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
  118. dsdot_kTS,
  119. srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
  120. sgemv_nTS, sgemv_tTS, sger_kTS,
  121. ssymv_LTS, ssymv_UTS,
  122. #ifdef ARCH_X86_64
  123. sgemm_directTS,
  124. sgemm_direct_performantTS,
  125. #endif
  126. sgemm_kernelTS, sgemm_betaTS,
  127. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  128. sgemm_incopyTS, sgemm_itcopyTS,
  129. #else
  130. sgemm_oncopyTS, sgemm_otcopyTS,
  131. #endif
  132. sgemm_oncopyTS, sgemm_otcopyTS,
  133. strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
  134. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  135. strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
  136. strsm_ilnucopyTS, strsm_ilnncopyTS, strsm_iltucopyTS, strsm_iltncopyTS,
  137. #else
  138. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  139. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  140. #endif
  141. strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
  142. strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
  143. strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
  144. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  145. strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
  146. strmm_ilnucopyTS, strmm_ilnncopyTS, strmm_iltucopyTS, strmm_iltncopyTS,
  147. #else
  148. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  149. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  150. #endif
  151. strmm_ounucopyTS, strmm_ounncopyTS, strmm_outucopyTS, strmm_outncopyTS,
  152. strmm_olnucopyTS, strmm_olnncopyTS, strmm_oltucopyTS, strmm_oltncopyTS,
  153. #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  154. ssymm_iutcopyTS, ssymm_iltcopyTS,
  155. #else
  156. ssymm_outcopyTS, ssymm_oltcopyTS,
  157. #endif
  158. ssymm_outcopyTS, ssymm_oltcopyTS,
  159. #ifndef NO_LAPACK
  160. sneg_tcopyTS, slaswp_ncopyTS,
  161. #else
  162. NULL,NULL,
  163. #endif
  164. 0, 0, 0,
  165. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
  166. #ifdef DGEMM_DEFAULT_UNROLL_MN
  167. DGEMM_DEFAULT_UNROLL_MN,
  168. #else
  169. MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  170. #endif
  171. damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
  172. idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
  173. dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
  174. drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
  175. dgemv_nTS, dgemv_tTS, dger_kTS,
  176. dsymv_LTS, dsymv_UTS,
  177. dgemm_kernelTS, dgemm_betaTS,
  178. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  179. dgemm_incopyTS, dgemm_itcopyTS,
  180. #else
  181. dgemm_oncopyTS, dgemm_otcopyTS,
  182. #endif
  183. dgemm_oncopyTS, dgemm_otcopyTS,
  184. dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
  185. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  186. dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
  187. dtrsm_ilnucopyTS, dtrsm_ilnncopyTS, dtrsm_iltucopyTS, dtrsm_iltncopyTS,
  188. #else
  189. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  190. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  191. #endif
  192. dtrsm_ounucopyTS, dtrsm_ounncopyTS, dtrsm_outucopyTS, dtrsm_outncopyTS,
  193. dtrsm_olnucopyTS, dtrsm_olnncopyTS, dtrsm_oltucopyTS, dtrsm_oltncopyTS,
  194. dtrmm_kernel_RNTS, dtrmm_kernel_RTTS, dtrmm_kernel_LNTS, dtrmm_kernel_LTTS,
  195. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  196. dtrmm_iunucopyTS, dtrmm_iunncopyTS, dtrmm_iutucopyTS, dtrmm_iutncopyTS,
  197. dtrmm_ilnucopyTS, dtrmm_ilnncopyTS, dtrmm_iltucopyTS, dtrmm_iltncopyTS,
  198. #else
  199. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  200. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  201. #endif
  202. dtrmm_ounucopyTS, dtrmm_ounncopyTS, dtrmm_outucopyTS, dtrmm_outncopyTS,
  203. dtrmm_olnucopyTS, dtrmm_olnncopyTS, dtrmm_oltucopyTS, dtrmm_oltncopyTS,
  204. #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  205. dsymm_iutcopyTS, dsymm_iltcopyTS,
  206. #else
  207. dsymm_outcopyTS, dsymm_oltcopyTS,
  208. #endif
  209. dsymm_outcopyTS, dsymm_oltcopyTS,
  210. #ifndef NO_LAPACK
  211. dneg_tcopyTS, dlaswp_ncopyTS,
  212. #else
  213. NULL, NULL,
  214. #endif
  215. #ifdef EXPRECISION
  216. 0, 0, 0,
  217. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  218. qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS,
  219. iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS,
  220. qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS,
  221. qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS,
  222. qgemv_nTS, qgemv_tTS, qger_kTS,
  223. qsymv_LTS, qsymv_UTS,
  224. qgemm_kernelTS, qgemm_betaTS,
  225. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  226. qgemm_incopyTS, qgemm_itcopyTS,
  227. #else
  228. qgemm_oncopyTS, qgemm_otcopyTS,
  229. #endif
  230. qgemm_oncopyTS, qgemm_otcopyTS,
  231. qtrsm_kernel_LNTS, qtrsm_kernel_LTTS, qtrsm_kernel_RNTS, qtrsm_kernel_RTTS,
  232. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  233. qtrsm_iunucopyTS, qtrsm_iunncopyTS, qtrsm_iutucopyTS, qtrsm_iutncopyTS,
  234. qtrsm_ilnucopyTS, qtrsm_ilnncopyTS, qtrsm_iltucopyTS, qtrsm_iltncopyTS,
  235. #else
  236. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  237. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  238. #endif
  239. qtrsm_ounucopyTS, qtrsm_ounncopyTS, qtrsm_outucopyTS, qtrsm_outncopyTS,
  240. qtrsm_olnucopyTS, qtrsm_olnncopyTS, qtrsm_oltucopyTS, qtrsm_oltncopyTS,
  241. qtrmm_kernel_RNTS, qtrmm_kernel_RTTS, qtrmm_kernel_LNTS, qtrmm_kernel_LTTS,
  242. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  243. qtrmm_iunucopyTS, qtrmm_iunncopyTS, qtrmm_iutucopyTS, qtrmm_iutncopyTS,
  244. qtrmm_ilnucopyTS, qtrmm_ilnncopyTS, qtrmm_iltucopyTS, qtrmm_iltncopyTS,
  245. #else
  246. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  247. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  248. #endif
  249. qtrmm_ounucopyTS, qtrmm_ounncopyTS, qtrmm_outucopyTS, qtrmm_outncopyTS,
  250. qtrmm_olnucopyTS, qtrmm_olnncopyTS, qtrmm_oltucopyTS, qtrmm_oltncopyTS,
  251. #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  252. qsymm_iutcopyTS, qsymm_iltcopyTS,
  253. #else
  254. qsymm_outcopyTS, qsymm_oltcopyTS,
  255. #endif
  256. qsymm_outcopyTS, qsymm_oltcopyTS,
  257. #ifndef NO_LAPACK
  258. qneg_tcopyTS, qlaswp_ncopyTS,
  259. #else
  260. NULL, NULL,
  261. #endif
  262. #endif
  263. 0, 0, 0,
  264. CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
  265. #ifdef CGEMM_DEFAULT_UNROLL_MN
  266. CGEMM_DEFAULT_UNROLL_MN,
  267. #else
  268. MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
  269. #endif
  270. camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
  271. cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
  272. cdotu_kTS, cdotc_kTS, csrot_kTS,
  273. caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
  274. cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
  275. cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
  276. cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
  277. csymv_LTS, csymv_UTS,
  278. chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
  279. cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
  280. cgemm_betaTS,
  281. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  282. cgemm_incopyTS, cgemm_itcopyTS,
  283. #else
  284. cgemm_oncopyTS, cgemm_otcopyTS,
  285. #endif
  286. cgemm_oncopyTS, cgemm_otcopyTS,
  287. ctrsm_kernel_LNTS, ctrsm_kernel_LTTS, ctrsm_kernel_LRTS, ctrsm_kernel_LCTS,
  288. ctrsm_kernel_RNTS, ctrsm_kernel_RTTS, ctrsm_kernel_RRTS, ctrsm_kernel_RCTS,
  289. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  290. ctrsm_iunucopyTS, ctrsm_iunncopyTS, ctrsm_iutucopyTS, ctrsm_iutncopyTS,
  291. ctrsm_ilnucopyTS, ctrsm_ilnncopyTS, ctrsm_iltucopyTS, ctrsm_iltncopyTS,
  292. #else
  293. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  294. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  295. #endif
  296. ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
  297. ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
  298. ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
  299. ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
  300. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  301. ctrmm_iunucopyTS, ctrmm_iunncopyTS, ctrmm_iutucopyTS, ctrmm_iutncopyTS,
  302. ctrmm_ilnucopyTS, ctrmm_ilnncopyTS, ctrmm_iltucopyTS, ctrmm_iltncopyTS,
  303. #else
  304. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  305. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  306. #endif
  307. ctrmm_ounucopyTS, ctrmm_ounncopyTS, ctrmm_outucopyTS, ctrmm_outncopyTS,
  308. ctrmm_olnucopyTS, ctrmm_olnncopyTS, ctrmm_oltucopyTS, ctrmm_oltncopyTS,
  309. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  310. csymm_iutcopyTS, csymm_iltcopyTS,
  311. #else
  312. csymm_outcopyTS, csymm_oltcopyTS,
  313. #endif
  314. csymm_outcopyTS, csymm_oltcopyTS,
  315. #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  316. chemm_iutcopyTS, chemm_iltcopyTS,
  317. #else
  318. chemm_outcopyTS, chemm_oltcopyTS,
  319. #endif
  320. chemm_outcopyTS, chemm_oltcopyTS,
  321. 0, 0, 0,
  322. #if defined(USE_GEMM3M)
  323. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  324. CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
  325. #else
  326. SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
  327. #endif
  328. cgemm3m_kernelTS,
  329. cgemm3m_incopybTS, cgemm3m_incopyrTS,
  330. cgemm3m_incopyiTS, cgemm3m_itcopybTS,
  331. cgemm3m_itcopyrTS, cgemm3m_itcopyiTS,
  332. cgemm3m_oncopybTS, cgemm3m_oncopyrTS,
  333. cgemm3m_oncopyiTS, cgemm3m_otcopybTS,
  334. cgemm3m_otcopyrTS, cgemm3m_otcopyiTS,
  335. csymm3m_iucopybTS, csymm3m_ilcopybTS,
  336. csymm3m_iucopyrTS, csymm3m_ilcopyrTS,
  337. csymm3m_iucopyiTS, csymm3m_ilcopyiTS,
  338. csymm3m_oucopybTS, csymm3m_olcopybTS,
  339. csymm3m_oucopyrTS, csymm3m_olcopyrTS,
  340. csymm3m_oucopyiTS, csymm3m_olcopyiTS,
  341. chemm3m_iucopybTS, chemm3m_ilcopybTS,
  342. chemm3m_iucopyrTS, chemm3m_ilcopyrTS,
  343. chemm3m_iucopyiTS, chemm3m_ilcopyiTS,
  344. chemm3m_oucopybTS, chemm3m_olcopybTS,
  345. chemm3m_oucopyrTS, chemm3m_olcopyrTS,
  346. chemm3m_oucopyiTS, chemm3m_olcopyiTS,
  347. #else
  348. 0, 0, 0,
  349. NULL,
  350. NULL, NULL,
  351. NULL, NULL,
  352. NULL, NULL,
  353. NULL, NULL,
  354. NULL, NULL,
  355. NULL, NULL,
  356. NULL, NULL,
  357. NULL, NULL,
  358. NULL, NULL,
  359. NULL, NULL,
  360. NULL, NULL,
  361. NULL, NULL,
  362. NULL, NULL,
  363. NULL, NULL,
  364. NULL, NULL,
  365. NULL, NULL,
  366. NULL, NULL,
  367. NULL, NULL,
  368. #endif
  369. #ifndef NO_LAPACK
  370. cneg_tcopyTS, claswp_ncopyTS,
  371. #else
  372. NULL, NULL,
  373. #endif
  374. 0, 0, 0,
  375. ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
  376. #ifdef ZGEMM_DEFAULT_UNROLL_MN
  377. ZGEMM_DEFAULT_UNROLL_MN,
  378. #else
  379. MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
  380. #endif
  381. zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS,
  382. znrm2_kTS, zasum_kTS, zsum_kTS, zcopy_kTS,
  383. zdotu_kTS, zdotc_kTS, zdrot_kTS,
  384. zaxpy_kTS, zaxpyc_kTS, zscal_kTS, zswap_kTS,
  385. zgemv_nTS, zgemv_tTS, zgemv_rTS, zgemv_cTS,
  386. zgemv_oTS, zgemv_uTS, zgemv_sTS, zgemv_dTS,
  387. zgeru_kTS, zgerc_kTS, zgerv_kTS, zgerd_kTS,
  388. zsymv_LTS, zsymv_UTS,
  389. zhemv_LTS, zhemv_UTS, zhemv_MTS, zhemv_VTS,
  390. zgemm_kernel_nTS, zgemm_kernel_lTS, zgemm_kernel_rTS, zgemm_kernel_bTS,
  391. zgemm_betaTS,
  392. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  393. zgemm_incopyTS, zgemm_itcopyTS,
  394. #else
  395. zgemm_oncopyTS, zgemm_otcopyTS,
  396. #endif
  397. zgemm_oncopyTS, zgemm_otcopyTS,
  398. ztrsm_kernel_LNTS, ztrsm_kernel_LTTS, ztrsm_kernel_LRTS, ztrsm_kernel_LCTS,
  399. ztrsm_kernel_RNTS, ztrsm_kernel_RTTS, ztrsm_kernel_RRTS, ztrsm_kernel_RCTS,
  400. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  401. ztrsm_iunucopyTS, ztrsm_iunncopyTS, ztrsm_iutucopyTS, ztrsm_iutncopyTS,
  402. ztrsm_ilnucopyTS, ztrsm_ilnncopyTS, ztrsm_iltucopyTS, ztrsm_iltncopyTS,
  403. #else
  404. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  405. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  406. #endif
  407. ztrsm_ounucopyTS, ztrsm_ounncopyTS, ztrsm_outucopyTS, ztrsm_outncopyTS,
  408. ztrsm_olnucopyTS, ztrsm_olnncopyTS, ztrsm_oltucopyTS, ztrsm_oltncopyTS,
  409. ztrmm_kernel_RNTS, ztrmm_kernel_RTTS, ztrmm_kernel_RRTS, ztrmm_kernel_RCTS,
  410. ztrmm_kernel_LNTS, ztrmm_kernel_LTTS, ztrmm_kernel_LRTS, ztrmm_kernel_LCTS,
  411. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  412. ztrmm_iunucopyTS, ztrmm_iunncopyTS, ztrmm_iutucopyTS, ztrmm_iutncopyTS,
  413. ztrmm_ilnucopyTS, ztrmm_ilnncopyTS, ztrmm_iltucopyTS, ztrmm_iltncopyTS,
  414. #else
  415. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  416. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  417. #endif
  418. ztrmm_ounucopyTS, ztrmm_ounncopyTS, ztrmm_outucopyTS, ztrmm_outncopyTS,
  419. ztrmm_olnucopyTS, ztrmm_olnncopyTS, ztrmm_oltucopyTS, ztrmm_oltncopyTS,
  420. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  421. zsymm_iutcopyTS, zsymm_iltcopyTS,
  422. #else
  423. zsymm_outcopyTS, zsymm_oltcopyTS,
  424. #endif
  425. zsymm_outcopyTS, zsymm_oltcopyTS,
  426. #if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  427. zhemm_iutcopyTS, zhemm_iltcopyTS,
  428. #else
  429. zhemm_outcopyTS, zhemm_oltcopyTS,
  430. #endif
  431. zhemm_outcopyTS, zhemm_oltcopyTS,
  432. 0, 0, 0,
  433. #if defined(USE_GEMM3M)
  434. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  435. ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
  436. #else
  437. DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
  438. #endif
  439. zgemm3m_kernelTS,
  440. zgemm3m_incopybTS, zgemm3m_incopyrTS,
  441. zgemm3m_incopyiTS, zgemm3m_itcopybTS,
  442. zgemm3m_itcopyrTS, zgemm3m_itcopyiTS,
  443. zgemm3m_oncopybTS, zgemm3m_oncopyrTS,
  444. zgemm3m_oncopyiTS, zgemm3m_otcopybTS,
  445. zgemm3m_otcopyrTS, zgemm3m_otcopyiTS,
  446. zsymm3m_iucopybTS, zsymm3m_ilcopybTS,
  447. zsymm3m_iucopyrTS, zsymm3m_ilcopyrTS,
  448. zsymm3m_iucopyiTS, zsymm3m_ilcopyiTS,
  449. zsymm3m_oucopybTS, zsymm3m_olcopybTS,
  450. zsymm3m_oucopyrTS, zsymm3m_olcopyrTS,
  451. zsymm3m_oucopyiTS, zsymm3m_olcopyiTS,
  452. zhemm3m_iucopybTS, zhemm3m_ilcopybTS,
  453. zhemm3m_iucopyrTS, zhemm3m_ilcopyrTS,
  454. zhemm3m_iucopyiTS, zhemm3m_ilcopyiTS,
  455. zhemm3m_oucopybTS, zhemm3m_olcopybTS,
  456. zhemm3m_oucopyrTS, zhemm3m_olcopyrTS,
  457. zhemm3m_oucopyiTS, zhemm3m_olcopyiTS,
  458. #else
  459. 0, 0, 0,
  460. NULL,
  461. NULL, NULL,
  462. NULL, NULL,
  463. NULL, NULL,
  464. NULL, NULL,
  465. NULL, NULL,
  466. NULL, NULL,
  467. NULL, NULL,
  468. NULL, NULL,
  469. NULL, NULL,
  470. NULL, NULL,
  471. NULL, NULL,
  472. NULL, NULL,
  473. NULL, NULL,
  474. NULL, NULL,
  475. NULL, NULL,
  476. NULL, NULL,
  477. NULL, NULL,
  478. NULL, NULL,
  479. #endif
  480. #ifndef NO_LAPACK
  481. zneg_tcopyTS, zlaswp_ncopyTS,
  482. #else
  483. NULL, NULL,
  484. #endif
  485. #ifdef EXPRECISION
  486. 0, 0, 0,
  487. XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),
  488. xamax_kTS, xamin_kTS, ixamax_kTS, ixamin_kTS,
  489. xnrm2_kTS, xasum_kTS, xsum_kTS, xcopy_kTS,
  490. xdotu_kTS, xdotc_kTS, xqrot_kTS,
  491. xaxpy_kTS, xaxpyc_kTS, xscal_kTS, xswap_kTS,
  492. xgemv_nTS, xgemv_tTS, xgemv_rTS, xgemv_cTS,
  493. xgemv_oTS, xgemv_uTS, xgemv_sTS, xgemv_dTS,
  494. xgeru_kTS, xgerc_kTS, xgerv_kTS, xgerd_kTS,
  495. xsymv_LTS, xsymv_UTS,
  496. xhemv_LTS, xhemv_UTS, xhemv_MTS, xhemv_VTS,
  497. xgemm_kernel_nTS, xgemm_kernel_lTS, xgemm_kernel_rTS, xgemm_kernel_bTS,
  498. xgemm_betaTS,
  499. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  500. xgemm_incopyTS, xgemm_itcopyTS,
  501. #else
  502. xgemm_oncopyTS, xgemm_otcopyTS,
  503. #endif
  504. xgemm_oncopyTS, xgemm_otcopyTS,
  505. xtrsm_kernel_LNTS, xtrsm_kernel_LTTS, xtrsm_kernel_LRTS, xtrsm_kernel_LCTS,
  506. xtrsm_kernel_RNTS, xtrsm_kernel_RTTS, xtrsm_kernel_RRTS, xtrsm_kernel_RCTS,
  507. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  508. xtrsm_iunucopyTS, xtrsm_iunncopyTS, xtrsm_iutucopyTS, xtrsm_iutncopyTS,
  509. xtrsm_ilnucopyTS, xtrsm_ilnncopyTS, xtrsm_iltucopyTS, xtrsm_iltncopyTS,
  510. #else
  511. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  512. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  513. #endif
  514. xtrsm_ounucopyTS, xtrsm_ounncopyTS, xtrsm_outucopyTS, xtrsm_outncopyTS,
  515. xtrsm_olnucopyTS, xtrsm_olnncopyTS, xtrsm_oltucopyTS, xtrsm_oltncopyTS,
  516. xtrmm_kernel_RNTS, xtrmm_kernel_RTTS, xtrmm_kernel_RRTS, xtrmm_kernel_RCTS,
  517. xtrmm_kernel_LNTS, xtrmm_kernel_LTTS, xtrmm_kernel_LRTS, xtrmm_kernel_LCTS,
  518. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  519. xtrmm_iunucopyTS, xtrmm_iunncopyTS, xtrmm_iutucopyTS, xtrmm_iutncopyTS,
  520. xtrmm_ilnucopyTS, xtrmm_ilnncopyTS, xtrmm_iltucopyTS, xtrmm_iltncopyTS,
  521. #else
  522. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  523. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  524. #endif
  525. xtrmm_ounucopyTS, xtrmm_ounncopyTS, xtrmm_outucopyTS, xtrmm_outncopyTS,
  526. xtrmm_olnucopyTS, xtrmm_olnncopyTS, xtrmm_oltucopyTS, xtrmm_oltncopyTS,
  527. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  528. xsymm_iutcopyTS, xsymm_iltcopyTS,
  529. #else
  530. xsymm_outcopyTS, xsymm_oltcopyTS,
  531. #endif
  532. xsymm_outcopyTS, xsymm_oltcopyTS,
  533. #if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  534. xhemm_iutcopyTS, xhemm_iltcopyTS,
  535. #else
  536. xhemm_outcopyTS, xhemm_oltcopyTS,
  537. #endif
  538. xhemm_outcopyTS, xhemm_oltcopyTS,
  539. 0, 0, 0,
  540. #if defined(USE_GEMM3M)
  541. QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
  542. xgemm3m_kernelTS,
  543. xgemm3m_incopybTS, xgemm3m_incopyrTS,
  544. xgemm3m_incopyiTS, xgemm3m_itcopybTS,
  545. xgemm3m_itcopyrTS, xgemm3m_itcopyiTS,
  546. xgemm3m_oncopybTS, xgemm3m_oncopyrTS,
  547. xgemm3m_oncopyiTS, xgemm3m_otcopybTS,
  548. xgemm3m_otcopyrTS, xgemm3m_otcopyiTS,
  549. xsymm3m_iucopybTS, xsymm3m_ilcopybTS,
  550. xsymm3m_iucopyrTS, xsymm3m_ilcopyrTS,
  551. xsymm3m_iucopyiTS, xsymm3m_ilcopyiTS,
  552. xsymm3m_oucopybTS, xsymm3m_olcopybTS,
  553. xsymm3m_oucopyrTS, xsymm3m_olcopyrTS,
  554. xsymm3m_oucopyiTS, xsymm3m_olcopyiTS,
  555. xhemm3m_iucopybTS, xhemm3m_ilcopybTS,
  556. xhemm3m_iucopyrTS, xhemm3m_ilcopyrTS,
  557. xhemm3m_iucopyiTS, xhemm3m_ilcopyiTS,
  558. xhemm3m_oucopybTS, xhemm3m_olcopybTS,
  559. xhemm3m_oucopyrTS, xhemm3m_olcopyrTS,
  560. xhemm3m_oucopyiTS, xhemm3m_olcopyiTS,
  561. #else
  562. 0, 0, 0,
  563. NULL,
  564. NULL, NULL,
  565. NULL, NULL,
  566. NULL, NULL,
  567. NULL, NULL,
  568. NULL, NULL,
  569. NULL, NULL,
  570. NULL, NULL,
  571. NULL, NULL,
  572. NULL, NULL,
  573. NULL, NULL,
  574. NULL, NULL,
  575. NULL, NULL,
  576. NULL, NULL,
  577. NULL, NULL,
  578. NULL, NULL,
  579. NULL, NULL,
  580. NULL, NULL,
  581. NULL, NULL,
  582. #endif
  583. #ifndef NO_LAPACK
  584. xneg_tcopyTS, xlaswp_ncopyTS,
  585. #else
  586. NULL, NULL,
  587. #endif
  588. #endif
  589. init_parameter,
  590. SNUMOPT, DNUMOPT, QNUMOPT,
  591. saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,
  592. somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
  593. domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
  594. comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
  595. comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
  596. zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
  597. zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
  598. simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
  599. dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
  600. cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
  601. cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
  602. zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
  603. zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
  604. sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS
  605. };
  606. #if defined(ARCH_ARM64)
  607. static void init_parameter(void) {
  608. #if defined(BUILD_HALF)
  609. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  610. #endif
  611. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  612. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  613. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  614. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  615. #if defined(BUILD_HALF)
  616. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  617. #endif
  618. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  619. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  620. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  621. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  622. #if defined(BUILD_HALF)
  623. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  624. #endif
  625. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  626. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  627. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  628. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  629. #ifdef EXPRECISION
  630. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  631. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  632. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  633. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  634. TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R;
  635. TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
  636. #endif
  637. #if defined(USE_GEMM3M)
  638. #ifdef CGEMM3M_DEFAULT_P
  639. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  640. #else
  641. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  642. #endif
  643. #ifdef ZGEMM3M_DEFAULT_P
  644. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  645. #else
  646. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  647. #endif
  648. #ifdef CGEMM3M_DEFAULT_Q
  649. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  650. #else
  651. TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q;
  652. #endif
  653. #ifdef ZGEMM3M_DEFAULT_Q
  654. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  655. #else
  656. TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q;
  657. #endif
  658. #ifdef CGEMM3M_DEFAULT_R
  659. TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R;
  660. #else
  661. TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r;
  662. #endif
  663. #ifdef ZGEMM3M_DEFAULT_R
  664. TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R;
  665. #else
  666. TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r;
  667. #endif
  668. #ifdef EXPRECISION
  669. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  670. TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q;
  671. TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
  672. #endif
  673. #endif
  674. }
  675. #else // defined(ARCH_ARM64)
  676. #if defined(ARCH_POWER)
  677. static void init_parameter(void) {
  678. #ifdef BUILD_HALF
  679. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  680. #endif
  681. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  682. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  683. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  684. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  685. #ifdef BUILD_HALF
  686. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  687. #endif
  688. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  689. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  690. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  691. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  692. #ifdef BUILD_HALF
  693. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  694. #endif
  695. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  696. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  697. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  698. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  699. }
  700. #else //POWER
  701. #if defined(ARCH_ZARCH)
  702. static void init_parameter(void) {
  703. #ifdef BUILD_HALF
  704. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  705. #endif
  706. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  707. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  708. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  709. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  710. #ifdef BUILD_HALF
  711. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  712. #endif
  713. TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
  714. TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
  715. TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
  716. TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
  717. #ifdef BUILD_HALF
  718. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  719. #endif
  720. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  721. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  722. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  723. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  724. }
  725. #else //ZARCH
  726. #ifdef ARCH_X86
  727. static int get_l2_size_old(void){
  728. int i, eax, ebx, ecx, edx, cpuid_level;
  729. int info[15];
  730. cpuid(2, &eax, &ebx, &ecx, &edx);
  731. info[ 0] = BITMASK(eax, 8, 0xff);
  732. info[ 1] = BITMASK(eax, 16, 0xff);
  733. info[ 2] = BITMASK(eax, 24, 0xff);
  734. info[ 3] = BITMASK(ebx, 0, 0xff);
  735. info[ 4] = BITMASK(ebx, 8, 0xff);
  736. info[ 5] = BITMASK(ebx, 16, 0xff);
  737. info[ 6] = BITMASK(ebx, 24, 0xff);
  738. info[ 7] = BITMASK(ecx, 0, 0xff);
  739. info[ 8] = BITMASK(ecx, 8, 0xff);
  740. info[ 9] = BITMASK(ecx, 16, 0xff);
  741. info[10] = BITMASK(ecx, 24, 0xff);
  742. info[11] = BITMASK(edx, 0, 0xff);
  743. info[12] = BITMASK(edx, 8, 0xff);
  744. info[13] = BITMASK(edx, 16, 0xff);
  745. info[14] = BITMASK(edx, 24, 0xff);
  746. for (i = 0; i < 15; i++){
  747. switch (info[i]){
  748. /* This table is from http://www.sandpile.org/ia32/cpuid.htm */
  749. case 0x1a :
  750. return 96;
  751. case 0x39 :
  752. case 0x3b :
  753. case 0x41 :
  754. case 0x79 :
  755. case 0x81 :
  756. return 128;
  757. case 0x3a :
  758. return 192;
  759. case 0x21 :
  760. case 0x3c :
  761. case 0x42 :
  762. case 0x7a :
  763. case 0x7e :
  764. case 0x82 :
  765. return 256;
  766. case 0x3d :
  767. return 384;
  768. case 0x3e :
  769. case 0x43 :
  770. case 0x7b :
  771. case 0x7f :
  772. case 0x83 :
  773. case 0x86 :
  774. return 512;
  775. case 0x44 :
  776. case 0x78 :
  777. case 0x7c :
  778. case 0x84 :
  779. case 0x87 :
  780. return 1024;
  781. case 0x45 :
  782. case 0x7d :
  783. case 0x85 :
  784. return 2048;
  785. case 0x48 :
  786. return 3184;
  787. case 0x49 :
  788. return 4096;
  789. case 0x4e :
  790. return 6144;
  791. }
  792. }
  793. // return 0;
  794. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  795. return 256;
  796. }
  797. #endif
  798. static __inline__ int get_l2_size(void){
  799. int eax, ebx, ecx, edx, l2;
  800. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  801. l2 = BITMASK(ecx, 16, 0xffff);
  802. #ifndef ARCH_X86
  803. if (l2 <= 0) {
  804. fprintf (stderr,"OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k\n");
  805. return 256;
  806. }
  807. return l2;
  808. #else
  809. if (l2 > 0) return l2;
  810. return get_l2_size_old();
  811. #endif
  812. }
  813. static __inline__ int get_l3_size(void){
  814. int eax, ebx, ecx, edx;
  815. cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
  816. return BITMASK(edx, 18, 0x3fff) * 512;
  817. }
  818. static void init_parameter(void) {
  819. int l2 = get_l2_size();
  820. (void) l2; /* dirty trick to suppress unused variable warning for targets */
  821. /* where the GEMM unrolling parameters do not depend on l2 */
  822. #ifdef BUILD_HALF
  823. TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
  824. TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
  825. TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
  826. #endif
  827. TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  828. TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  829. TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  830. TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
  831. #ifdef CGEMM3M_DEFAULT_Q
  832. TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
  833. #else
  834. TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
  835. #endif
  836. #ifdef ZGEMM3M_DEFAULT_Q
  837. TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
  838. #else
  839. TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
  840. #endif
  841. #ifdef EXPRECISION
  842. TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  843. TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  844. TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
  845. #endif
  846. #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
  847. #ifdef DEBUG
  848. fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
  849. #endif
  850. TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
  851. TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
  852. TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
  853. TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
  854. #ifdef EXPRECISION
  855. TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
  856. TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
  857. #endif
  858. #endif
  859. #ifdef CORE_NORTHWOOD
  860. #ifdef DEBUG
  861. fprintf(stderr, "Northwood\n");
  862. #endif
  863. TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
  864. TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
  865. TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
  866. TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
  867. #ifdef EXPRECISION
  868. TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
  869. TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
  870. #endif
  871. #endif
  872. #ifdef ATOM
  873. #ifdef DEBUG
  874. fprintf(stderr, "Atom\n");
  875. #endif
  876. TABLE_NAME.sgemm_p = 256;
  877. TABLE_NAME.dgemm_p = 128;
  878. TABLE_NAME.cgemm_p = 128;
  879. TABLE_NAME.zgemm_p = 64;
  880. #ifdef EXPRECISION
  881. TABLE_NAME.qgemm_p = 64;
  882. TABLE_NAME.xgemm_p = 32;
  883. #endif
  884. #endif
  885. #ifdef CORE_PRESCOTT
  886. #ifdef DEBUG
  887. fprintf(stderr, "Prescott\n");
  888. #endif
  889. TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
  890. TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
  891. TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
  892. TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
  893. #ifdef EXPRECISION
  894. TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
  895. TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
  896. #endif
  897. #endif
  898. #ifdef CORE2
  899. #ifdef DEBUG
  900. fprintf(stderr, "Core2\n");
  901. #endif
  902. TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
  903. TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
  904. TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
  905. TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
  906. #ifdef EXPRECISION
  907. TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
  908. TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
  909. #endif
  910. #endif
  911. #ifdef PENRYN
  912. #ifdef DEBUG
  913. fprintf(stderr, "Penryn\n");
  914. #endif
  915. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  916. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  917. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  918. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  919. #ifdef EXPRECISION
  920. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  921. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  922. #endif
  923. #endif
  924. #ifdef DUNNINGTON
  925. #ifdef DEBUG
  926. fprintf(stderr, "Dunnington\n");
  927. #endif
  928. TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
  929. TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
  930. TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
  931. TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
  932. #ifdef EXPRECISION
  933. TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
  934. TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
  935. #endif
  936. #endif
  937. #ifdef NEHALEM
  938. #ifdef DEBUG
  939. fprintf(stderr, "Nehalem\n");
  940. #endif
  941. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  942. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  943. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  944. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  945. #ifdef EXPRECISION
  946. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  947. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  948. #endif
  949. #endif
  950. #ifdef SANDYBRIDGE
  951. #ifdef DEBUG
  952. fprintf(stderr, "Sandybridge\n");
  953. #endif
  954. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  955. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  956. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  957. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  958. #ifdef EXPRECISION
  959. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  960. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  961. #endif
  962. #endif
  963. #ifdef HASWELL
  964. #ifdef DEBUG
  965. fprintf(stderr, "Haswell\n");
  966. #endif
  967. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  968. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  969. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  970. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  971. #ifdef EXPRECISION
  972. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  973. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  974. #endif
  975. #endif
  976. #if defined (SKYLAKEX) || defined (COOPERLAKE)
  977. #ifdef DEBUG
  978. fprintf(stderr, "SkylakeX\n");
  979. #endif
  980. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  981. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  982. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  983. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  984. #ifdef EXPRECISION
  985. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  986. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  987. #endif
  988. #endif
  989. #ifdef OPTERON
  990. #ifdef DEBUG
  991. fprintf(stderr, "Opteron\n");
  992. #endif
  993. TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
  994. TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
  995. TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
  996. TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
  997. #ifdef EXPRECISION
  998. TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
  999. TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
  1000. #endif
  1001. #endif
  1002. #ifdef BARCELONA
  1003. #ifdef DEBUG
  1004. fprintf(stderr, "Barcelona\n");
  1005. #endif
  1006. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1007. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1008. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1009. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1010. #ifdef EXPRECISION
  1011. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1012. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1013. #endif
  1014. #endif
  1015. #ifdef BOBCAT
  1016. #ifdef DEBUG
  1017. fprintf(stderr, "Bobcate\n");
  1018. #endif
  1019. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1020. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1021. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1022. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1023. #ifdef EXPRECISION
  1024. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1025. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1026. #endif
  1027. #endif
  1028. #ifdef BULLDOZER
  1029. #ifdef DEBUG
  1030. fprintf(stderr, "Bulldozer\n");
  1031. #endif
  1032. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1033. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1034. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1035. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1036. #ifdef EXPRECISION
  1037. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1038. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1039. #endif
  1040. #endif
  1041. #ifdef EXCAVATOR
  1042. #ifdef DEBUG
  1043. fprintf(stderr, "Excavator\n");
  1044. #endif
  1045. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1046. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1047. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1048. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1049. #ifdef EXPRECISION
  1050. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1051. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1052. #endif
  1053. #endif
  1054. #ifdef PILEDRIVER
  1055. #ifdef DEBUG
  1056. fprintf(stderr, "Piledriver\n");
  1057. #endif
  1058. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1059. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1060. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1061. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1062. #ifdef EXPRECISION
  1063. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1064. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1065. #endif
  1066. #endif
  1067. #ifdef STEAMROLLER
  1068. #ifdef DEBUG
  1069. fprintf(stderr, "Steamroller\n");
  1070. #endif
  1071. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1072. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1073. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1074. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1075. #ifdef EXPRECISION
  1076. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1077. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1078. #endif
  1079. #endif
  1080. #ifdef ZEN
  1081. #ifdef DEBUG
  1082. fprintf(stderr, "Zen\n");
  1083. #endif
  1084. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1085. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1086. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1087. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1088. #ifdef EXPRECISION
  1089. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1090. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1091. #endif
  1092. #endif
  1093. #ifdef NANO
  1094. #ifdef DEBUG
  1095. fprintf(stderr, "NANO\n");
  1096. #endif
  1097. TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  1098. TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  1099. TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  1100. TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
  1101. #ifdef EXPRECISION
  1102. TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  1103. TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
  1104. #endif
  1105. #endif
  1106. #ifdef CGEMM3M_DEFAULT_P
  1107. TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
  1108. #else
  1109. TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
  1110. #endif
  1111. #ifdef ZGEMM3M_DEFAULT_P
  1112. TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
  1113. #else
  1114. TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
  1115. #endif
  1116. #ifdef EXPRECISION
  1117. TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
  1118. #endif
  1119. TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1120. TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1121. TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
  1122. TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
  1123. #ifdef CGEMM3M_DEFAULT_UNROLL_M
  1124. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
  1125. #else
  1126. TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
  1127. #endif
  1128. #ifdef ZGEMM3M_DEFAULT_UNROLL_M
  1129. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
  1130. #else
  1131. TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
  1132. #endif
  1133. #ifdef QUAD_PRECISION
  1134. TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1135. TABLE_NAME.xgemm_p = ((TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1)/XGEMM_DEFAULT_UNROLL_M) * XGEMM_DEFAULT_UNROLL_M;
  1136. TABLE_NAME.xgemm3m_p = ((TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
  1137. #endif
  1138. #ifdef DEBUG
  1139. fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
  1140. #endif
  1141. TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
  1142. ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
  1143. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1144. ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
  1145. TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
  1146. ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
  1147. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1148. ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
  1149. #ifdef EXPRECISION
  1150. TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
  1151. ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
  1152. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1153. ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
  1154. #endif
  1155. TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
  1156. ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
  1157. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1158. ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
  1159. TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
  1160. ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
  1161. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1162. ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
  1163. TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
  1164. ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
  1165. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1166. ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
  1167. TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
  1168. ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
  1169. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1170. ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
  1171. #ifdef EXPRECISION
  1172. TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
  1173. ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
  1174. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1175. ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);
  1176. TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
  1177. ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
  1178. + TABLE_NAME.align) & ~TABLE_NAME.align)
  1179. ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);
  1180. #endif
  1181. }
  1182. #endif //POWER
  1183. #endif //ZARCH
  1184. #endif //defined(ARCH_ARM64)