You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common_level3.h 166 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef ASSEMBLER
  39. #ifdef __CUDACC__
  40. __global__ void cuda_sgemm_kernel(int, int, int, float *, float *, float *);
  41. __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
  42. #endif
  43. #ifdef __CUDACC__
  44. extern "C" {
  45. #endif
  46. void sgemm_direct(BLASLONG M, BLASLONG N, BLASLONG K,
  47. float * A, BLASLONG strideA,
  48. float * B, BLASLONG strideB,
  49. float * R, BLASLONG strideR);
  50. int sgemm_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
  51. int sbgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
  52. bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
  53. int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
  54. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
  55. int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
  56. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
  57. int cgemm_beta(BLASLONG, BLASLONG, BLASLONG, float, float,
  58. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
  59. int zgemm_beta(BLASLONG, BLASLONG, BLASLONG, double, double,
  60. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
  61. #ifdef EXPRECISION
  62. int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble,
  63. xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
  64. int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
  65. xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
  66. #else
  67. int qgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
  68. xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
  69. int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
  70. xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
  71. #endif
  72. int sbgemm_incopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
  73. int sbgemm_itcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
  74. int sbgemm_oncopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
  75. int sbgemm_otcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
  76. int sgemm_incopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  77. int sgemm_itcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  78. int sgemm_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  79. int sgemm_otcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  80. int dgemm_incopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  81. int dgemm_itcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  82. int dgemm_oncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  83. int dgemm_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  84. int cgemm_incopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  85. int cgemm_itcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  86. int cgemm_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  87. int cgemm_otcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  88. int zgemm_incopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  89. int zgemm_itcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  90. int zgemm_oncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  91. int zgemm_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  92. #ifdef QUAD_PRECISION
  93. int qgemm_incopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  94. int qgemm_itcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  95. int qgemm_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  96. int qgemm_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  97. int xgemm_incopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  98. int xgemm_itcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  99. int xgemm_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  100. int xgemm_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xidouble *b);
  101. #else
  102. int qgemm_incopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  103. int qgemm_itcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  104. int qgemm_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  105. int qgemm_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  106. int xgemm_incopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  107. int xgemm_itcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  108. int xgemm_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  109. int xgemm_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  110. #endif
  111. int strsm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  112. int strsm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  113. int strsm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  114. int strsm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  115. int dtrsm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  116. int dtrsm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  117. int dtrsm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  118. int dtrsm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  119. int qtrsm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  120. int qtrsm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  121. int qtrsm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  122. int qtrsm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  123. int ctrsm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  124. int ctrsm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  125. int ctrsm_kernel_LR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  126. int ctrsm_kernel_LC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  127. int ctrsm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  128. int ctrsm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  129. int ctrsm_kernel_RR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  130. int ctrsm_kernel_RC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  131. int ztrsm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  132. int ztrsm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  133. int ztrsm_kernel_LR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  134. int ztrsm_kernel_LC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  135. int ztrsm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  136. int ztrsm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  137. int ztrsm_kernel_RR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  138. int ztrsm_kernel_RC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  139. int xtrsm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  140. int xtrsm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  141. int xtrsm_kernel_LR(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  142. int xtrsm_kernel_LC(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  143. int xtrsm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  144. int xtrsm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  145. int xtrsm_kernel_RR(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  146. int xtrsm_kernel_RC(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  147. int strmm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  148. int strmm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  149. int strmm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  150. int strmm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
  151. int dtrmm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  152. int dtrmm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  153. int dtrmm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  154. int dtrmm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
  155. int qtrmm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  156. int qtrmm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  157. int qtrmm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  158. int qtrmm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  159. int ctrmm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  160. int ctrmm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  161. int ctrmm_kernel_RR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  162. int ctrmm_kernel_RC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  163. int ctrmm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  164. int ctrmm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  165. int ctrmm_kernel_LR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  166. int ctrmm_kernel_LC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
  167. int ztrmm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  168. int ztrmm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  169. int ztrmm_kernel_RR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  170. int ztrmm_kernel_RC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  171. int ztrmm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  172. int ztrmm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  173. int ztrmm_kernel_LR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  174. int ztrmm_kernel_LC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
  175. int xtrmm_kernel_RN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  176. int xtrmm_kernel_RT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  177. int xtrmm_kernel_RR(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  178. int xtrmm_kernel_RC(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  179. int xtrmm_kernel_LN(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  180. int xtrmm_kernel_LT(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  181. int xtrmm_kernel_LR(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  182. int xtrmm_kernel_LC(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
  183. int strmm_iunucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  184. int strmm_iunncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  185. int strmm_iutucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  186. int strmm_iutncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  187. int strmm_ounucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  188. int strmm_ounncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  189. int strmm_outucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  190. int strmm_outncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  191. int strmm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  192. int strmm_ilnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  193. int strmm_iltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  194. int strmm_iltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  195. int strmm_olnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  196. int strmm_olnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  197. int strmm_oltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  198. int strmm_oltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  199. int dtrmm_iunucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  200. int dtrmm_iunncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  201. int dtrmm_iutucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  202. int dtrmm_iutncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  203. int dtrmm_ounucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  204. int dtrmm_ounncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  205. int dtrmm_outucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  206. int dtrmm_outncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  207. int dtrmm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  208. int dtrmm_ilnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  209. int dtrmm_iltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  210. int dtrmm_iltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  211. int dtrmm_olnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  212. int dtrmm_olnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  213. int dtrmm_oltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  214. int dtrmm_oltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  215. int qtrmm_iunucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  216. int qtrmm_iunncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  217. int qtrmm_iutucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  218. int qtrmm_iutncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  219. int qtrmm_ounucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  220. int qtrmm_ounncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  221. int qtrmm_outucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  222. int qtrmm_outncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  223. int qtrmm_ilnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  224. int qtrmm_ilnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  225. int qtrmm_iltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  226. int qtrmm_iltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  227. int qtrmm_olnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  228. int qtrmm_olnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  229. int qtrmm_oltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  230. int qtrmm_oltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  231. int ctrmm_iunucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  232. int ctrmm_iunncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  233. int ctrmm_iutucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  234. int ctrmm_iutncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  235. int ctrmm_ounucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  236. int ctrmm_ounncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  237. int ctrmm_outucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  238. int ctrmm_outncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  239. int ctrmm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  240. int ctrmm_ilnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  241. int ctrmm_iltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  242. int ctrmm_iltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  243. int ctrmm_olnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  244. int ctrmm_olnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  245. int ctrmm_oltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  246. int ctrmm_oltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  247. int ztrmm_iunucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  248. int ztrmm_iunncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  249. int ztrmm_iutucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  250. int ztrmm_iutncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  251. int ztrmm_ounucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  252. int ztrmm_ounncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  253. int ztrmm_outucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  254. int ztrmm_outncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  255. int ztrmm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  256. int ztrmm_ilnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  257. int ztrmm_iltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  258. int ztrmm_iltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  259. int ztrmm_olnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  260. int ztrmm_olnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  261. int ztrmm_oltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  262. int ztrmm_oltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  263. int xtrmm_iunucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  264. int xtrmm_iunncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  265. int xtrmm_iutucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  266. int xtrmm_iutncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  267. int xtrmm_ounucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  268. int xtrmm_ounncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  269. int xtrmm_outucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  270. int xtrmm_outncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  271. int xtrmm_ilnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  272. int xtrmm_ilnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  273. int xtrmm_iltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  274. int xtrmm_iltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  275. int xtrmm_olnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  276. int xtrmm_olnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  277. int xtrmm_oltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  278. int xtrmm_oltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  279. int strsm_iunucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  280. int strsm_iunncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  281. int strsm_iutucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  282. int strsm_iutncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  283. int strsm_ounucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  284. int strsm_ounncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  285. int strsm_outucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  286. int strsm_outncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  287. int strsm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  288. int strsm_ilnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  289. int strsm_iltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  290. int strsm_iltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  291. int strsm_olnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  292. int strsm_olnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  293. int strsm_oltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  294. int strsm_oltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  295. int dtrsm_iunucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  296. int dtrsm_iunncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  297. int dtrsm_iutucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  298. int dtrsm_iutncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  299. int dtrsm_ounucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  300. int dtrsm_ounncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  301. int dtrsm_outucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  302. int dtrsm_outncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  303. int dtrsm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  304. int dtrsm_ilnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  305. int dtrsm_iltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  306. int dtrsm_iltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  307. int dtrsm_olnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  308. int dtrsm_olnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  309. int dtrsm_oltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  310. int dtrsm_oltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  311. int qtrsm_iunucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  312. int qtrsm_iunncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  313. int qtrsm_iutucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  314. int qtrsm_iutncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  315. int qtrsm_ounucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  316. int qtrsm_ounncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  317. int qtrsm_outucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  318. int qtrsm_outncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  319. int qtrsm_ilnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  320. int qtrsm_ilnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  321. int qtrsm_iltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  322. int qtrsm_iltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  323. int qtrsm_olnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  324. int qtrsm_olnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  325. int qtrsm_oltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  326. int qtrsm_oltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  327. int ctrsm_iunucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  328. int ctrsm_iunncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  329. int ctrsm_iutucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  330. int ctrsm_iutncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  331. int ctrsm_ounucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  332. int ctrsm_ounncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  333. int ctrsm_outucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  334. int ctrsm_outncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  335. int ctrsm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  336. int ctrsm_ilnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  337. int ctrsm_iltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  338. int ctrsm_iltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  339. int ctrsm_olnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  340. int ctrsm_olnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  341. int ctrsm_oltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  342. int ctrsm_oltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG offset, float *b);
  343. int ztrsm_iunucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  344. int ztrsm_iunncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  345. int ztrsm_iutucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  346. int ztrsm_iutncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  347. int ztrsm_ounucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  348. int ztrsm_ounncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  349. int ztrsm_outucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  350. int ztrsm_outncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  351. int ztrsm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  352. int ztrsm_ilnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  353. int ztrsm_iltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  354. int ztrsm_iltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  355. int ztrsm_olnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  356. int ztrsm_olnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  357. int ztrsm_oltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  358. int ztrsm_oltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG offset, double *b);
  359. int xtrsm_iunucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  360. int xtrsm_iunncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  361. int xtrsm_iutucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  362. int xtrsm_iutncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  363. int xtrsm_ounucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  364. int xtrsm_ounncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  365. int xtrsm_outucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  366. int xtrsm_outncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  367. int xtrsm_ilnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  368. int xtrsm_ilnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  369. int xtrsm_iltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  370. int xtrsm_iltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  371. int xtrsm_olnucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  372. int xtrsm_olnncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  373. int xtrsm_oltucopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  374. int xtrsm_oltncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG offset, xdouble *b);
  375. int ssymm_iutcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  376. int ssymm_outcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  377. int ssymm_iltcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  378. int ssymm_oltcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  379. int dsymm_iutcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  380. int dsymm_outcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  381. int dsymm_iltcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  382. int dsymm_oltcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  383. int qsymm_iutcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  384. int qsymm_outcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  385. int qsymm_iltcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  386. int qsymm_oltcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  387. int csymm_iutcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  388. int csymm_outcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  389. int csymm_iltcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  390. int csymm_oltcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  391. int zsymm_iutcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  392. int zsymm_outcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  393. int zsymm_iltcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  394. int zsymm_oltcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  395. int xsymm_iutcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  396. int xsymm_outcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  397. int xsymm_iltcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  398. int xsymm_oltcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  399. int chemm_iutcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  400. int chemm_outcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  401. int chemm_iltcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  402. int chemm_oltcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
  403. int zhemm_iutcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  404. int zhemm_outcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  405. int zhemm_iltcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  406. int zhemm_oltcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
  407. int xhemm_iutcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  408. int xhemm_outcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  409. int xhemm_iltcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  410. int xhemm_oltcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, xdouble *b);
  411. int ssyrk_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  412. int ssyrk_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  413. int dsyrk_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  414. int dsyrk_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  415. int qsyrk_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  416. int qsyrk_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  417. int csyrk_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  418. int csyrk_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  419. int zsyrk_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  420. int zsyrk_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  421. int xsyrk_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  422. int xsyrk_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  423. int ssyr2k_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  424. int ssyr2k_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  425. int dsyr2k_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  426. int dsyr2k_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  427. int qsyr2k_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  428. int qsyr2k_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  429. int csyr2k_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  430. int csyr2k_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  431. int zsyr2k_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  432. int zsyr2k_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  433. int xsyr2k_kernel_U(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  434. int xsyr2k_kernel_L(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  435. int cherk_kernel_UN(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  436. int cherk_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  437. int cherk_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  438. int cherk_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset);
  439. int zherk_kernel_UN(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  440. int zherk_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  441. int zherk_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  442. int zherk_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset);
  443. int xherk_kernel_UN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  444. int xherk_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  445. int xherk_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  446. int xherk_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset);
  447. int cher2k_kernel_UN(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  448. int cher2k_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  449. int cher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  450. int cher2k_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, float alpha_r, float alpha_i, float *a, float *b, float *c, BLASLONG ldc, BLASLONG offset, int flag);
  451. int zher2k_kernel_UN(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  452. int zher2k_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  453. int zher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  454. int zher2k_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, double alpha_r, double alpha_i, double *a, double *b, double *c, BLASLONG ldc, BLASLONG offset, int flag);
  455. int xher2k_kernel_UN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  456. int xher2k_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  457. int xher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  458. int xher2k_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
  459. int sbgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
  460. int sgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
  461. int dgemm_kernel(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
  462. #ifdef QUAD_PRECISION
  463. int qgemm_kernel(BLASLONG, BLASLONG, BLASLONG, xidouble *, xidouble *, xidouble *, xdouble *, BLASLONG);
  464. #else
  465. int qgemm_kernel(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
  466. #endif
  467. #ifdef SMALL_MATRIX_OPT
  468. int sbgemm_small_matrix_permit(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta);
  469. int sbgemm_small_kernel_nn(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  470. int sbgemm_small_kernel_nt(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  471. int sbgemm_small_kernel_tn(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  472. int sbgemm_small_kernel_tt(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  473. int sgemm_small_matrix_permit(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta);
  474. int sgemm_small_kernel_nn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  475. int sgemm_small_kernel_nt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  476. int sgemm_small_kernel_tn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  477. int sgemm_small_kernel_tt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
  478. int dgemm_small_matrix_permit(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double beta);
  479. int dgemm_small_kernel_nn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
  480. int dgemm_small_kernel_nt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
  481. int dgemm_small_kernel_tn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
  482. int dgemm_small_kernel_tt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
  483. int sbgemm_small_kernel_b0_nn(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
  484. int sbgemm_small_kernel_b0_nt(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
  485. int sbgemm_small_kernel_b0_tn(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
  486. int sbgemm_small_kernel_b0_tt(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
  487. int sgemm_small_kernel_b0_nn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  488. int sgemm_small_kernel_b0_nt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  489. int sgemm_small_kernel_b0_tn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  490. int sgemm_small_kernel_b0_tt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  491. int dgemm_small_kernel_b0_nn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  492. int dgemm_small_kernel_b0_nt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  493. int dgemm_small_kernel_b0_tn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  494. int dgemm_small_kernel_b0_tt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  495. int cgemm_small_matrix_permit(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha0, float alpha1, float beta0, float beta1);
  496. int cgemm_small_kernel_nn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  497. int cgemm_small_kernel_nt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  498. int cgemm_small_kernel_nr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  499. int cgemm_small_kernel_nc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  500. int cgemm_small_kernel_tn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  501. int cgemm_small_kernel_tt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  502. int cgemm_small_kernel_tr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  503. int cgemm_small_kernel_tc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  504. int cgemm_small_kernel_rn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  505. int cgemm_small_kernel_rt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  506. int cgemm_small_kernel_rr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  507. int cgemm_small_kernel_rc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  508. int cgemm_small_kernel_cn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  509. int cgemm_small_kernel_ct(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  510. int cgemm_small_kernel_cr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  511. int cgemm_small_kernel_cc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
  512. int zgemm_small_matrix_permit(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha0, double alpha1, double beta0, double beta1);
  513. int zgemm_small_kernel_nn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  514. int zgemm_small_kernel_nt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  515. int zgemm_small_kernel_nr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  516. int zgemm_small_kernel_nc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  517. int zgemm_small_kernel_tn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  518. int zgemm_small_kernel_tt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  519. int zgemm_small_kernel_tr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  520. int zgemm_small_kernel_tc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  521. int zgemm_small_kernel_rn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  522. int zgemm_small_kernel_rt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  523. int zgemm_small_kernel_rr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  524. int zgemm_small_kernel_rc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  525. int zgemm_small_kernel_cn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  526. int zgemm_small_kernel_ct(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  527. int zgemm_small_kernel_cr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  528. int zgemm_small_kernel_cc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
  529. int cgemm_small_kernel_b0_nn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  530. int cgemm_small_kernel_b0_nt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  531. int cgemm_small_kernel_b0_nr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  532. int cgemm_small_kernel_b0_nc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  533. int cgemm_small_kernel_b0_tn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  534. int cgemm_small_kernel_b0_tt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  535. int cgemm_small_kernel_b0_tr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  536. int cgemm_small_kernel_b0_tc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  537. int cgemm_small_kernel_b0_rn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  538. int cgemm_small_kernel_b0_rt(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  539. int cgemm_small_kernel_b0_rr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  540. int cgemm_small_kernel_b0_rc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  541. int cgemm_small_kernel_b0_cn(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  542. int cgemm_small_kernel_b0_ct(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  543. int cgemm_small_kernel_b0_cr(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  544. int cgemm_small_kernel_b0_cc(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
  545. int zgemm_small_kernel_b0_nn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  546. int zgemm_small_kernel_b0_nt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  547. int zgemm_small_kernel_b0_nr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  548. int zgemm_small_kernel_b0_nc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  549. int zgemm_small_kernel_b0_tn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  550. int zgemm_small_kernel_b0_tt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  551. int zgemm_small_kernel_b0_tr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  552. int zgemm_small_kernel_b0_tc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  553. int zgemm_small_kernel_b0_rn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  554. int zgemm_small_kernel_b0_rt(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  555. int zgemm_small_kernel_b0_rr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  556. int zgemm_small_kernel_b0_rc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  557. int zgemm_small_kernel_b0_cn(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  558. int zgemm_small_kernel_b0_ct(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  559. int zgemm_small_kernel_b0_cr(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  560. int zgemm_small_kernel_b0_cc(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
  561. #endif
  562. int cgemm_kernel_n(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
  563. int cgemm_kernel_l(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
  564. int cgemm_kernel_r(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
  565. int cgemm_kernel_b(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
  566. int zgemm_kernel_n(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
  567. int zgemm_kernel_l(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
  568. int zgemm_kernel_r(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
  569. int zgemm_kernel_b(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
  570. int xgemm_kernel_n(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
  571. int xgemm_kernel_l(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
  572. int xgemm_kernel_r(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
  573. int xgemm_kernel_b(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
  574. int cgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
  575. int zgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
  576. int xgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
  577. int sbgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  578. int sbgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  579. int sbgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  580. int sbgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  581. int sgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  582. int sgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  583. int sgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  584. int sgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  585. int dgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  586. int dgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  587. int dgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  588. int dgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  589. #ifdef QUAD_PRECISION
  590. int qgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  591. int qgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  592. int qgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  593. int qgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  594. #else
  595. int qgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  596. int qgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  597. int qgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  598. int qgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  599. #endif
  600. int cgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  601. int cgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  602. int cgemm_nr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  603. int cgemm_nc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  604. int cgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  605. int cgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  606. int cgemm_tr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  607. int cgemm_tc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  608. int cgemm_rn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  609. int cgemm_rt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  610. int cgemm_rr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  611. int cgemm_rc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  612. int cgemm_cn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  613. int cgemm_ct(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  614. int cgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  615. int cgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  616. int zgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  617. int zgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  618. int zgemm_nr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  619. int zgemm_nc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  620. int zgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  621. int zgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  622. int zgemm_tr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  623. int zgemm_tc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  624. int zgemm_rn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  625. int zgemm_rt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  626. int zgemm_rr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  627. int zgemm_rc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  628. int zgemm_cn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  629. int zgemm_ct(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  630. int zgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  631. int zgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  632. #ifdef QUAD_PRECISION
  633. int xgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  634. int xgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  635. int xgemm_nr(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  636. int xgemm_nc(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  637. int xgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  638. int xgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  639. int xgemm_tr(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  640. int xgemm_tc(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  641. int xgemm_rn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  642. int xgemm_rt(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  643. int xgemm_rr(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  644. int xgemm_rc(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  645. int xgemm_cn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  646. int xgemm_ct(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  647. int xgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  648. int xgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  649. #else
  650. int xgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  651. int xgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  652. int xgemm_nr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  653. int xgemm_nc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  654. int xgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  655. int xgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  656. int xgemm_tr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  657. int xgemm_tc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  658. int xgemm_rn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  659. int xgemm_rt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  660. int xgemm_rr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  661. int xgemm_rc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  662. int xgemm_cn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  663. int xgemm_ct(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  664. int xgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  665. int xgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  666. #endif
  667. int sbgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  668. int sbgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  669. int sbgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  670. int sbgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
  671. int sgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  672. int sgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  673. int sgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  674. int sgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  675. int dgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  676. int dgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  677. int dgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  678. int dgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  679. #ifdef QUAD_PRECISION
  680. int qgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  681. int qgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  682. int qgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  683. int qgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xidouble *, xidouble *, BLASLONG);
  684. #else
  685. int qgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  686. int qgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  687. int qgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  688. int qgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  689. #endif
  690. int cgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  691. int cgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  692. int cgemm_thread_nr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  693. int cgemm_thread_nc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  694. int cgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  695. int cgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  696. int cgemm_thread_tr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  697. int cgemm_thread_tc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  698. int cgemm_thread_rn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  699. int cgemm_thread_rt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  700. int cgemm_thread_rr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  701. int cgemm_thread_rc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  702. int cgemm_thread_cn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  703. int cgemm_thread_ct(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  704. int cgemm_thread_cr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  705. int cgemm_thread_cc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  706. int zgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  707. int zgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  708. int zgemm_thread_nr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  709. int zgemm_thread_nc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  710. int zgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  711. int zgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  712. int zgemm_thread_tr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  713. int zgemm_thread_tc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  714. int zgemm_thread_rn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  715. int zgemm_thread_rt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  716. int zgemm_thread_rr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  717. int zgemm_thread_rc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  718. int zgemm_thread_cn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  719. int zgemm_thread_ct(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  720. int zgemm_thread_cr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  721. int zgemm_thread_cc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  722. int xgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  723. int xgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  724. int xgemm_thread_nr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  725. int xgemm_thread_nc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  726. int xgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  727. int xgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  728. int xgemm_thread_tr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  729. int xgemm_thread_tc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  730. int xgemm_thread_rn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  731. int xgemm_thread_rt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  732. int xgemm_thread_rr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  733. int xgemm_thread_rc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  734. int xgemm_thread_cn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  735. int xgemm_thread_ct(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  736. int xgemm_thread_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  737. int xgemm_thread_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  738. int cgemm3m_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  739. int cgemm3m_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  740. int cgemm3m_nr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  741. int cgemm3m_nc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  742. int cgemm3m_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  743. int cgemm3m_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  744. int cgemm3m_tr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  745. int cgemm3m_tc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  746. int cgemm3m_rn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  747. int cgemm3m_rt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  748. int cgemm3m_rr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  749. int cgemm3m_rc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  750. int cgemm3m_cn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  751. int cgemm3m_ct(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  752. int cgemm3m_cr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  753. int cgemm3m_cc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  754. int zgemm3m_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  755. int zgemm3m_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  756. int zgemm3m_nr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  757. int zgemm3m_nc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  758. int zgemm3m_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  759. int zgemm3m_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  760. int zgemm3m_tr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  761. int zgemm3m_tc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  762. int zgemm3m_rn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  763. int zgemm3m_rt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  764. int zgemm3m_rr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  765. int zgemm3m_rc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  766. int zgemm3m_cn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  767. int zgemm3m_ct(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  768. int zgemm3m_cr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  769. int zgemm3m_cc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  770. int xgemm3m_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  771. int xgemm3m_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  772. int xgemm3m_nr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  773. int xgemm3m_nc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  774. int xgemm3m_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  775. int xgemm3m_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  776. int xgemm3m_tr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  777. int xgemm3m_tc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  778. int xgemm3m_rn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  779. int xgemm3m_rt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  780. int xgemm3m_rr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  781. int xgemm3m_rc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  782. int xgemm3m_cn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  783. int xgemm3m_ct(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  784. int xgemm3m_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  785. int xgemm3m_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  786. int cgemm3m_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  787. int cgemm3m_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  788. int cgemm3m_thread_nr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  789. int cgemm3m_thread_nc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  790. int cgemm3m_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  791. int cgemm3m_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  792. int cgemm3m_thread_tr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  793. int cgemm3m_thread_tc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  794. int cgemm3m_thread_rn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  795. int cgemm3m_thread_rt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  796. int cgemm3m_thread_rr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  797. int cgemm3m_thread_rc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  798. int cgemm3m_thread_cn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  799. int cgemm3m_thread_ct(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  800. int cgemm3m_thread_cr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  801. int cgemm3m_thread_cc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  802. int zgemm3m_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  803. int zgemm3m_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  804. int zgemm3m_thread_nr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  805. int zgemm3m_thread_nc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  806. int zgemm3m_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  807. int zgemm3m_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  808. int zgemm3m_thread_tr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  809. int zgemm3m_thread_tc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  810. int zgemm3m_thread_rn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  811. int zgemm3m_thread_rt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  812. int zgemm3m_thread_rr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  813. int zgemm3m_thread_rc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  814. int zgemm3m_thread_cn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  815. int zgemm3m_thread_ct(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  816. int zgemm3m_thread_cr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  817. int zgemm3m_thread_cc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  818. int xgemm3m_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  819. int xgemm3m_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  820. int xgemm3m_thread_nr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  821. int xgemm3m_thread_nc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  822. int xgemm3m_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  823. int xgemm3m_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  824. int xgemm3m_thread_tr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  825. int xgemm3m_thread_tc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  826. int xgemm3m_thread_rn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  827. int xgemm3m_thread_rt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  828. int xgemm3m_thread_rr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  829. int xgemm3m_thread_rc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  830. int xgemm3m_thread_cn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  831. int xgemm3m_thread_ct(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  832. int xgemm3m_thread_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  833. int xgemm3m_thread_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  834. int cher2m_LNN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  835. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  836. int cher2m_LNT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  837. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  838. int cher2m_LNR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  839. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  840. int cher2m_LNC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  841. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  842. int cher2m_LTN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  843. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  844. int cher2m_LTT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  845. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  846. int cher2m_LTR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  847. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  848. int cher2m_LTC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  849. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  850. int cher2m_LRN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  851. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  852. int cher2m_LRT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  853. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  854. int cher2m_LRR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  855. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  856. int cher2m_LRC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  857. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  858. int cher2m_LCN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  859. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  860. int cher2m_LCT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  861. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  862. int cher2m_LCR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  863. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  864. int cher2m_LCC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  865. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  866. int cher2m_UNN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  867. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  868. int cher2m_UNT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  869. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  870. int cher2m_UNR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  871. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  872. int cher2m_UNC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  873. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  874. int cher2m_UTN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  875. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  876. int cher2m_UTT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  877. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  878. int cher2m_UTR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  879. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  880. int cher2m_UTC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  881. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  882. int cher2m_URN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  883. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  884. int cher2m_URT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  885. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  886. int cher2m_URR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  887. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  888. int cher2m_URC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  889. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  890. int cher2m_UCN(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  891. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  892. int cher2m_UCT(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  893. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  894. int cher2m_UCR(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  895. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  896. int cher2m_UCC(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG,
  897. float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, BLASLONG);
  898. int zher2m_LNN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  899. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  900. int zher2m_LNT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  901. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  902. int zher2m_LNR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  903. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  904. int zher2m_LNC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  905. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  906. int zher2m_LTN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  907. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  908. int zher2m_LTT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  909. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  910. int zher2m_LTR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  911. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  912. int zher2m_LTC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  913. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  914. int zher2m_LRN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  915. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  916. int zher2m_LRT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  917. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  918. int zher2m_LRR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  919. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  920. int zher2m_LRC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  921. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  922. int zher2m_LCN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  923. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  924. int zher2m_LCT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  925. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  926. int zher2m_LCR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  927. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  928. int zher2m_LCC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  929. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  930. int zher2m_UNN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  931. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  932. int zher2m_UNT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  933. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  934. int zher2m_UNR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  935. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  936. int zher2m_UNC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  937. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  938. int zher2m_UTN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  939. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  940. int zher2m_UTT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  941. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  942. int zher2m_UTR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  943. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  944. int zher2m_UTC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  945. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  946. int zher2m_URN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  947. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  948. int zher2m_URT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  949. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  950. int zher2m_URR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  951. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  952. int zher2m_URC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  953. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  954. int zher2m_UCN(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  955. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  956. int zher2m_UCT(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  957. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  958. int zher2m_UCR(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  959. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  960. int zher2m_UCC(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG,
  961. double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, BLASLONG);
  962. int strsm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  963. int strsm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  964. int strsm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  965. int strsm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  966. int strsm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  967. int strsm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  968. int strsm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  969. int strsm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  970. int strsm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  971. int strsm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  972. int strsm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  973. int strsm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  974. int strsm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  975. int strsm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  976. int strsm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  977. int strsm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  978. int dtrsm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  979. int dtrsm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  980. int dtrsm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  981. int dtrsm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  982. int dtrsm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  983. int dtrsm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  984. int dtrsm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  985. int dtrsm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  986. int dtrsm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  987. int dtrsm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  988. int dtrsm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  989. int dtrsm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  990. int dtrsm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  991. int dtrsm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  992. int dtrsm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  993. int dtrsm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  994. int qtrsm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  995. int qtrsm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  996. int qtrsm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  997. int qtrsm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  998. int qtrsm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  999. int qtrsm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1000. int qtrsm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1001. int qtrsm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1002. int qtrsm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1003. int qtrsm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1004. int qtrsm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1005. int qtrsm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1006. int qtrsm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1007. int qtrsm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1008. int qtrsm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1009. int qtrsm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1010. int ctrsm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1011. int ctrsm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1012. int ctrsm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1013. int ctrsm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1014. int ctrsm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1015. int ctrsm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1016. int ctrsm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1017. int ctrsm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1018. int ctrsm_LRUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1019. int ctrsm_LRUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1020. int ctrsm_LRLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1021. int ctrsm_LRLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1022. int ctrsm_LCUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1023. int ctrsm_LCUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1024. int ctrsm_LCLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1025. int ctrsm_LCLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1026. int ctrsm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1027. int ctrsm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1028. int ctrsm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1029. int ctrsm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1030. int ctrsm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1031. int ctrsm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1032. int ctrsm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1033. int ctrsm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1034. int ctrsm_RRUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1035. int ctrsm_RRUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1036. int ctrsm_RRLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1037. int ctrsm_RRLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1038. int ctrsm_RCUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1039. int ctrsm_RCUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1040. int ctrsm_RCLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1041. int ctrsm_RCLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1042. int ztrsm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1043. int ztrsm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1044. int ztrsm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1045. int ztrsm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1046. int ztrsm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1047. int ztrsm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1048. int ztrsm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1049. int ztrsm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1050. int ztrsm_LRUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1051. int ztrsm_LRUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1052. int ztrsm_LRLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1053. int ztrsm_LRLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1054. int ztrsm_LCUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1055. int ztrsm_LCUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1056. int ztrsm_LCLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1057. int ztrsm_LCLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1058. int ztrsm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1059. int ztrsm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1060. int ztrsm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1061. int ztrsm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1062. int ztrsm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1063. int ztrsm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1064. int ztrsm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1065. int ztrsm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1066. int ztrsm_RRUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1067. int ztrsm_RRUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1068. int ztrsm_RRLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1069. int ztrsm_RRLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1070. int ztrsm_RCUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1071. int ztrsm_RCUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1072. int ztrsm_RCLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1073. int ztrsm_RCLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1074. int xtrsm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1075. int xtrsm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1076. int xtrsm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1077. int xtrsm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1078. int xtrsm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1079. int xtrsm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1080. int xtrsm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1081. int xtrsm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1082. int xtrsm_LRUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1083. int xtrsm_LRUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1084. int xtrsm_LRLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1085. int xtrsm_LRLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1086. int xtrsm_LCUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1087. int xtrsm_LCUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1088. int xtrsm_LCLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1089. int xtrsm_LCLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1090. int xtrsm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1091. int xtrsm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1092. int xtrsm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1093. int xtrsm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1094. int xtrsm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1095. int xtrsm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1096. int xtrsm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1097. int xtrsm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1098. int xtrsm_RRUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1099. int xtrsm_RRUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1100. int xtrsm_RRLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1101. int xtrsm_RRLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1102. int xtrsm_RCUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1103. int xtrsm_RCUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1104. int xtrsm_RCLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1105. int xtrsm_RCLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1106. int strmm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1107. int strmm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1108. int strmm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1109. int strmm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1110. int strmm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1111. int strmm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1112. int strmm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1113. int strmm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1114. int strmm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1115. int strmm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1116. int strmm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1117. int strmm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1118. int strmm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1119. int strmm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1120. int strmm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1121. int strmm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1122. int dtrmm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1123. int dtrmm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1124. int dtrmm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1125. int dtrmm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1126. int dtrmm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1127. int dtrmm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1128. int dtrmm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1129. int dtrmm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1130. int dtrmm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1131. int dtrmm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1132. int dtrmm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1133. int dtrmm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1134. int dtrmm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1135. int dtrmm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1136. int dtrmm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1137. int dtrmm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1138. int qtrmm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1139. int qtrmm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1140. int qtrmm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1141. int qtrmm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1142. int qtrmm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1143. int qtrmm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1144. int qtrmm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1145. int qtrmm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1146. int qtrmm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1147. int qtrmm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1148. int qtrmm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1149. int qtrmm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1150. int qtrmm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1151. int qtrmm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1152. int qtrmm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1153. int qtrmm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1154. int ctrmm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1155. int ctrmm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1156. int ctrmm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1157. int ctrmm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1158. int ctrmm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1159. int ctrmm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1160. int ctrmm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1161. int ctrmm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1162. int ctrmm_LRUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1163. int ctrmm_LRUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1164. int ctrmm_LRLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1165. int ctrmm_LRLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1166. int ctrmm_LCUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1167. int ctrmm_LCUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1168. int ctrmm_LCLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1169. int ctrmm_LCLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1170. int ctrmm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1171. int ctrmm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1172. int ctrmm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1173. int ctrmm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1174. int ctrmm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1175. int ctrmm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1176. int ctrmm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1177. int ctrmm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1178. int ctrmm_RRUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1179. int ctrmm_RRUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1180. int ctrmm_RRLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1181. int ctrmm_RRLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1182. int ctrmm_RCUU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1183. int ctrmm_RCUN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1184. int ctrmm_RCLU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1185. int ctrmm_RCLN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1186. int ztrmm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1187. int ztrmm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1188. int ztrmm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1189. int ztrmm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1190. int ztrmm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1191. int ztrmm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1192. int ztrmm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1193. int ztrmm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1194. int ztrmm_LRUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1195. int ztrmm_LRUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1196. int ztrmm_LRLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1197. int ztrmm_LRLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1198. int ztrmm_LCUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1199. int ztrmm_LCUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1200. int ztrmm_LCLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1201. int ztrmm_LCLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1202. int ztrmm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1203. int ztrmm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1204. int ztrmm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1205. int ztrmm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1206. int ztrmm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1207. int ztrmm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1208. int ztrmm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1209. int ztrmm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1210. int ztrmm_RRUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1211. int ztrmm_RRUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1212. int ztrmm_RRLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1213. int ztrmm_RRLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1214. int ztrmm_RCUU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1215. int ztrmm_RCUN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1216. int ztrmm_RCLU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1217. int ztrmm_RCLN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1218. int xtrmm_LNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1219. int xtrmm_LNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1220. int xtrmm_LNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1221. int xtrmm_LNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1222. int xtrmm_LTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1223. int xtrmm_LTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1224. int xtrmm_LTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1225. int xtrmm_LTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1226. int xtrmm_LRUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1227. int xtrmm_LRUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1228. int xtrmm_LRLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1229. int xtrmm_LRLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1230. int xtrmm_LCUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1231. int xtrmm_LCUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1232. int xtrmm_LCLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1233. int xtrmm_LCLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1234. int xtrmm_RNUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1235. int xtrmm_RNUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1236. int xtrmm_RNLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1237. int xtrmm_RNLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1238. int xtrmm_RTUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1239. int xtrmm_RTUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1240. int xtrmm_RTLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1241. int xtrmm_RTLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1242. int xtrmm_RRUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1243. int xtrmm_RRUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1244. int xtrmm_RRLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1245. int xtrmm_RRLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1246. int xtrmm_RCUU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1247. int xtrmm_RCUN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1248. int xtrmm_RCLU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1249. int xtrmm_RCLN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1250. int ssymm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1251. int ssymm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1252. int ssymm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1253. int ssymm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1254. int dsymm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1255. int dsymm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1256. int dsymm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1257. int dsymm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1258. int qsymm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1259. int qsymm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1260. int qsymm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1261. int qsymm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1262. int csymm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1263. int csymm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1264. int csymm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1265. int csymm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1266. int zsymm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1267. int zsymm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1268. int zsymm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1269. int zsymm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1270. int xsymm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1271. int xsymm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1272. int xsymm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1273. int xsymm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1274. int csymm3m_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1275. int csymm3m_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1276. int csymm3m_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1277. int csymm3m_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1278. int zsymm3m_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1279. int zsymm3m_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1280. int zsymm3m_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1281. int zsymm3m_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1282. int xsymm3m_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1283. int xsymm3m_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1284. int xsymm3m_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1285. int xsymm3m_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1286. int csymm3m_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1287. int csymm3m_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1288. int csymm3m_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1289. int csymm3m_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1290. int zsymm3m_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1291. int zsymm3m_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1292. int zsymm3m_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1293. int zsymm3m_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1294. int xsymm3m_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1295. int xsymm3m_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1296. int xsymm3m_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1297. int xsymm3m_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1298. int chemm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1299. int chemm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1300. int chemm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1301. int chemm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1302. int zhemm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1303. int zhemm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1304. int zhemm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1305. int zhemm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1306. int xhemm_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1307. int xhemm_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1308. int xhemm_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1309. int xhemm_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1310. int chemm3m_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1311. int chemm3m_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1312. int chemm3m_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1313. int chemm3m_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1314. int zhemm3m_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1315. int zhemm3m_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1316. int zhemm3m_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1317. int zhemm3m_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1318. int xhemm3m_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1319. int xhemm3m_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1320. int xhemm3m_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1321. int xhemm3m_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1322. int chemm3m_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1323. int chemm3m_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1324. int chemm3m_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1325. int chemm3m_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1326. int zhemm3m_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1327. int zhemm3m_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1328. int zhemm3m_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1329. int zhemm3m_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1330. int xhemm3m_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1331. int xhemm3m_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1332. int xhemm3m_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1333. int xhemm3m_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1334. int ssymm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1335. int ssymm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1336. int ssymm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1337. int ssymm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1338. int dsymm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1339. int dsymm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1340. int dsymm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1341. int dsymm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1342. int qsymm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1343. int qsymm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1344. int qsymm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1345. int qsymm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1346. int csymm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1347. int csymm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1348. int csymm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1349. int csymm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1350. int zsymm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1351. int zsymm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1352. int zsymm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1353. int zsymm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1354. int xsymm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1355. int xsymm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1356. int xsymm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1357. int xsymm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1358. int chemm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1359. int chemm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1360. int chemm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1361. int chemm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1362. int zhemm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1363. int zhemm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1364. int zhemm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1365. int zhemm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1366. int xhemm_thread_LU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1367. int xhemm_thread_LL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1368. int xhemm_thread_RU(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1369. int xhemm_thread_RL(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1370. int ssyrk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1371. int ssyrk_UT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1372. int ssyrk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1373. int ssyrk_LT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1374. int dsyrk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1375. int dsyrk_UT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1376. int dsyrk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1377. int dsyrk_LT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1378. int qsyrk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1379. int qsyrk_UT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1380. int qsyrk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1381. int qsyrk_LT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1382. int csyrk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1383. int csyrk_UT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1384. int csyrk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1385. int csyrk_LT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1386. int zsyrk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1387. int zsyrk_UT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1388. int zsyrk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1389. int zsyrk_LT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1390. int xsyrk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1391. int xsyrk_UT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1392. int xsyrk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1393. int xsyrk_LT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1394. int ssyrk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1395. int ssyrk_thread_UT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1396. int ssyrk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1397. int ssyrk_thread_LT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1398. int dsyrk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1399. int dsyrk_thread_UT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1400. int dsyrk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1401. int dsyrk_thread_LT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1402. int qsyrk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1403. int qsyrk_thread_UT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1404. int qsyrk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1405. int qsyrk_thread_LT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1406. int csyrk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1407. int csyrk_thread_UT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1408. int csyrk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1409. int csyrk_thread_LT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1410. int zsyrk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1411. int zsyrk_thread_UT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1412. int zsyrk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1413. int zsyrk_thread_LT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1414. int xsyrk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1415. int xsyrk_thread_UT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1416. int xsyrk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1417. int xsyrk_thread_LT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1418. int ssyr2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1419. int ssyr2k_UT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1420. int ssyr2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1421. int ssyr2k_LT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1422. int dsyr2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1423. int dsyr2k_UT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1424. int dsyr2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1425. int dsyr2k_LT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1426. int qsyr2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1427. int qsyr2k_UT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1428. int qsyr2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1429. int qsyr2k_LT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1430. int csyr2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1431. int csyr2k_UT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1432. int csyr2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1433. int csyr2k_LT(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1434. int zsyr2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1435. int zsyr2k_UT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1436. int zsyr2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1437. int zsyr2k_LT(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1438. int xsyr2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1439. int xsyr2k_UT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1440. int xsyr2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1441. int xsyr2k_LT(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1442. int cherk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1443. int cherk_UC(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1444. int cherk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1445. int cherk_LC(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1446. int zherk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1447. int zherk_UC(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1448. int zherk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1449. int zherk_LC(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1450. int xherk_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1451. int xherk_UC(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1452. int xherk_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1453. int xherk_LC(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1454. int cherk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1455. int cherk_thread_UC(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1456. int cherk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1457. int cherk_thread_LC(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1458. int zherk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1459. int zherk_thread_UC(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1460. int zherk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1461. int zherk_thread_LC(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1462. int xherk_thread_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1463. int xherk_thread_UC(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1464. int xherk_thread_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1465. int xherk_thread_LC(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1466. int cher2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1467. int cher2k_UC(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1468. int cher2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1469. int cher2k_LC(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1470. int zher2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1471. int zher2k_UC(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1472. int zher2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1473. int zher2k_LC(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1474. int xher2k_UN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1475. int xher2k_UC(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1476. int xher2k_LN(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1477. int xher2k_LC(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1478. int sgemt_n(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, int);
  1479. int sgemt_t(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, int);
  1480. int dgemt_n(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, int);
  1481. int dgemt_t(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, int);
  1482. int cgemt_n(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, int);
  1483. int cgemt_t(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, int);
  1484. int cgemt_r(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, int);
  1485. int cgemt_c(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, int);
  1486. int zgemt_n(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, int);
  1487. int zgemt_t(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, int);
  1488. int zgemt_r(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, int);
  1489. int zgemt_c(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, int);
  1490. int sgema_n(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
  1491. int sgema_t(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
  1492. int dgema_n(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
  1493. int dgema_t(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
  1494. int cgema_n(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1495. int cgema_t(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1496. int cgema_r(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1497. int cgema_c(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1498. int zgema_n(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1499. int zgema_t(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1500. int zgema_r(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1501. int zgema_c(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1502. int cgemm3m_incopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  1503. int cgemm3m_incopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  1504. int cgemm3m_incopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  1505. int cgemm3m_itcopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  1506. int cgemm3m_itcopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  1507. int cgemm3m_itcopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
  1508. int cgemm3m_oncopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float alpha_r, float alpha_i, float *b);
  1509. int cgemm3m_oncopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float alpha_r, float alpha_i, float *b);
  1510. int cgemm3m_oncopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float alpha_r, float alpha_i, float *b);
  1511. int cgemm3m_otcopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float alpha_r, float alpha_i, float *b);
  1512. int cgemm3m_otcopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float alpha_r, float alpha_i, float *b);
  1513. int cgemm3m_otcopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float alpha_r, float alpha_i, float *b);
  1514. int zgemm3m_incopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  1515. int zgemm3m_incopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  1516. int zgemm3m_incopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  1517. int zgemm3m_itcopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  1518. int zgemm3m_itcopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  1519. int zgemm3m_itcopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
  1520. int zgemm3m_oncopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double alpha_r, double alpha_i, double *b);
  1521. int zgemm3m_oncopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double alpha_r, double alpha_i, double *b);
  1522. int zgemm3m_oncopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double alpha_r, double alpha_i, double *b);
  1523. int zgemm3m_otcopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double alpha_r, double alpha_i, double *b);
  1524. int zgemm3m_otcopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double alpha_r, double alpha_i, double *b);
  1525. int zgemm3m_otcopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double alpha_r, double alpha_i, double *b);
  1526. int xgemm3m_incopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  1527. int xgemm3m_incopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  1528. int xgemm3m_incopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  1529. int xgemm3m_itcopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  1530. int xgemm3m_itcopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  1531. int xgemm3m_itcopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b);
  1532. int xgemm3m_oncopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1533. int xgemm3m_oncopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1534. int xgemm3m_oncopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1535. int xgemm3m_otcopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1536. int xgemm3m_otcopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1537. int xgemm3m_otcopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1538. int csymm3m_iucopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1539. int csymm3m_ilcopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1540. int csymm3m_iucopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1541. int csymm3m_ilcopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1542. int csymm3m_iucopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1543. int csymm3m_ilcopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1544. int csymm3m_oucopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1545. int csymm3m_olcopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1546. int csymm3m_oucopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1547. int csymm3m_olcopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1548. int csymm3m_oucopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1549. int csymm3m_olcopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1550. int zsymm3m_iucopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1551. int zsymm3m_ilcopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1552. int zsymm3m_iucopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1553. int zsymm3m_ilcopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1554. int zsymm3m_iucopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1555. int zsymm3m_ilcopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1556. int zsymm3m_oucopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1557. int zsymm3m_olcopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1558. int zsymm3m_oucopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1559. int zsymm3m_olcopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1560. int zsymm3m_oucopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1561. int zsymm3m_olcopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1562. int xsymm3m_iucopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1563. int xsymm3m_ilcopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1564. int xsymm3m_iucopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1565. int xsymm3m_ilcopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1566. int xsymm3m_iucopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1567. int xsymm3m_ilcopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1568. int xsymm3m_oucopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1569. int xsymm3m_olcopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1570. int xsymm3m_oucopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1571. int xsymm3m_olcopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1572. int xsymm3m_oucopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1573. int xsymm3m_olcopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1574. int chemm3m_iucopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1575. int chemm3m_ilcopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1576. int chemm3m_iucopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1577. int chemm3m_ilcopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1578. int chemm3m_iucopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1579. int chemm3m_ilcopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float *b);
  1580. int chemm3m_oucopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1581. int chemm3m_olcopyb(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1582. int chemm3m_oucopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1583. int chemm3m_olcopyr(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1584. int chemm3m_oucopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1585. int chemm3m_olcopyi(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG x, BLASLONG y, float alpha_r, float alpha_i, float *b);
  1586. int zhemm3m_iucopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1587. int zhemm3m_ilcopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1588. int zhemm3m_iucopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1589. int zhemm3m_ilcopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1590. int zhemm3m_iucopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1591. int zhemm3m_ilcopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double *b);
  1592. int zhemm3m_oucopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1593. int zhemm3m_olcopyb(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1594. int zhemm3m_oucopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1595. int zhemm3m_olcopyr(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1596. int zhemm3m_oucopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1597. int zhemm3m_olcopyi(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG x, BLASLONG y, double alpha_r, double alpha_i, double *b);
  1598. int xhemm3m_iucopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1599. int xhemm3m_ilcopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1600. int xhemm3m_iucopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1601. int xhemm3m_ilcopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1602. int xhemm3m_iucopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1603. int xhemm3m_ilcopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble *b);
  1604. int xhemm3m_oucopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1605. int xhemm3m_olcopyb(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1606. int xhemm3m_oucopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1607. int xhemm3m_olcopyr(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1608. int xhemm3m_oucopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1609. int xhemm3m_olcopyi(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, BLASLONG x, BLASLONG y, xdouble alpha_r, xdouble alpha_i, xdouble *b);
  1610. int sgemc_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1611. int sgemc_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1612. int sgemc_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1613. int sgemc_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1614. int dgemc_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1615. int dgemc_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1616. int dgemc_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1617. int dgemc_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1618. int qgemc_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1619. int qgemc_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1620. int qgemc_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1621. int qgemc_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1622. int cgemc_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1623. int cgemc_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1624. int cgemc_nr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1625. int cgemc_nc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1626. int cgemc_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1627. int cgemc_tt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1628. int cgemc_tr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1629. int cgemc_tc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1630. int cgemc_rn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1631. int cgemc_rt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1632. int cgemc_rr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1633. int cgemc_rc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1634. int cgemc_cn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1635. int cgemc_ct(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1636. int cgemc_cr(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1637. int cgemc_cc(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
  1638. int zgemc_nn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1639. int zgemc_nt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1640. int zgemc_nr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1641. int zgemc_nc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1642. int zgemc_tn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1643. int zgemc_tt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1644. int zgemc_tr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1645. int zgemc_tc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1646. int zgemc_rn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1647. int zgemc_rt(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1648. int zgemc_rr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1649. int zgemc_rc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1650. int zgemc_cn(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1651. int zgemc_ct(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1652. int zgemc_cr(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1653. int zgemc_cc(blas_arg_t *, BLASLONG *, BLASLONG *, double *, double *, BLASLONG);
  1654. int xgemc_nn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1655. int xgemc_nt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1656. int xgemc_nr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1657. int xgemc_nc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1658. int xgemc_tn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1659. int xgemc_tt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1660. int xgemc_tr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1661. int xgemc_tc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1662. int xgemc_rn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1663. int xgemc_rt(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1664. int xgemc_rr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1665. int xgemc_rc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1666. int xgemc_cn(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1667. int xgemc_ct(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1668. int xgemc_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1669. int xgemc_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
  1670. int sgemc_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b, BLASLONG ldb, float *c);
  1671. int sgemc_otcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b, BLASLONG ldb, float *c);
  1672. int dgemc_oncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLASLONG ldb, double *c);
  1673. int dgemc_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLASLONG ldb, double *c);
  1674. int qgemc_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
  1675. int qgemc_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
  1676. int cgemc_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b, BLASLONG ldb, float *c);
  1677. int cgemc_otcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b, BLASLONG ldb, float *c);
  1678. int zgemc_oncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLASLONG ldb, double *c);
  1679. int zgemc_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLASLONG ldb, double *c);
  1680. int xgemc_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
  1681. int xgemc_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c);
  1682. int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
  1683. int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
  1684. int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
  1685. int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG);
  1686. int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG);
  1687. int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG);
  1688. int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG);
  1689. int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG);
  1690. int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
  1691. int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
  1692. int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
  1693. int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG);
  1694. int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG);
  1695. int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG);
  1696. int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG);
  1697. int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG);
  1698. int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1699. int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1700. int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1701. int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1702. int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1703. int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1704. int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1705. int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1706. int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1707. int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1708. int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1709. int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG);
  1710. int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1711. int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1712. int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1713. int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG);
  1714. int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1715. int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1716. int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1717. int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1718. int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1719. int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1720. int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1721. int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1722. int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1723. int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1724. int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1725. int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
  1726. int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1727. int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1728. int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1729. int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG);
  1730. int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
  1731. int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
  1732. int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
  1733. int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
  1734. int sgemm_batch_thread(blas_arg_t * queue, BLASLONG nums);
  1735. int dgemm_batch_thread(blas_arg_t * queue, BLASLONG nums);
  1736. int cgemm_batch_thread(blas_arg_t * queue, BLASLONG nums);
  1737. int zgemm_batch_thread(blas_arg_t * queue, BLASLONG nums);
  1738. int sbgemm_batch_thread(blas_arg_t * queue, BLASLONG nums);
  1739. #ifdef __CUDACC__
  1740. }
  1741. #endif
  1742. #endif