You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sgemm_tcopy_macros_8_power8.S 8.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /***************************************************************************
  2. Copyright (c) 2013-2016, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. *****************************************************************************/
  27. /**************************************************************************************
  28. * 2016/04/23 Werner Saar (wernsaar@googlemail.com)
  29. * BLASTEST : OK
  30. * CTEST : OK
  31. * TEST : OK
  32. * LAPACK-TEST : OK
  33. **************************************************************************************/
  34. /**********************************************************************************************
  35. * Macros for N=4 and M=8
  36. **********************************************************************************************/
  37. #if defined(_AIX)
  38. define(`COPY_4x8', `
  39. #else
  40. .macro COPY_4x8
  41. #endif
  42. lxvw4x vs32, o0, A0
  43. lxvw4x vs33, o16, A0
  44. lxvw4x vs34, o0, A1
  45. lxvw4x vs35, o16, A1
  46. lxvw4x vs36, o0, A2
  47. lxvw4x vs37, o16, A2
  48. lxvw4x vs38, o0, A3
  49. lxvw4x vs39, o16, A3
  50. mr T1, BO
  51. stxvw4x vs32, o0, T1
  52. stxvw4x vs33, o16, T1
  53. stxvw4x vs34, o32, T1
  54. stxvw4x vs35, o48, T1
  55. addi T1, T1, 64
  56. stxvw4x vs36, o0, T1
  57. stxvw4x vs37, o16, T1
  58. stxvw4x vs38, o32, T1
  59. stxvw4x vs39, o48, T1
  60. #if defined(_AIX)
  61. ')
  62. #else
  63. .endm
  64. #endif
  65. /**********************************************************************************************
  66. * Macros for N=4 and M=4
  67. **********************************************************************************************/
  68. #if defined(_AIX)
  69. define(`COPY_4x4', `
  70. #else
  71. .macro COPY_4x4
  72. #endif
  73. lxvw4x vs32, o0, A0
  74. lxvw4x vs33, o0, A1
  75. lxvw4x vs34, o0, A2
  76. lxvw4x vs35, o0, A3
  77. mr T1, BO
  78. stxvw4x vs32, o0, T1
  79. stxvw4x vs33, o16, T1
  80. stxvw4x vs34, o32, T1
  81. stxvw4x vs35, o48, T1
  82. #if defined(_AIX)
  83. ')
  84. #else
  85. .endm
  86. #endif
  87. /**********************************************************************************************
  88. * Macros for N=4 and M=2
  89. **********************************************************************************************/
  90. #if defined(_AIX)
  91. define(`COPY_4x2', `
  92. #else
  93. .macro COPY_4x2
  94. #endif
  95. lxsspx vs32, o0, A0
  96. lxsspx vs33, o4, A0
  97. lxsspx vs34, o0, A1
  98. lxsspx vs35, o4, A1
  99. lxsspx vs36, o0, A2
  100. lxsspx vs37, o4, A2
  101. lxsspx vs38, o0, A3
  102. lxsspx vs39, o4, A3
  103. mr T1, BO
  104. stxsspx vs32, o0, T1
  105. stxsspx vs33, o4, T1
  106. addi T1, T1, 8
  107. stxsspx vs34, o0, T1
  108. stxsspx vs35, o4, T1
  109. addi T1, T1, 8
  110. stxsspx vs36, o0, T1
  111. stxsspx vs37, o4, T1
  112. addi T1, T1, 8
  113. stxsspx vs38, o0, T1
  114. stxsspx vs39, o4, T1
  115. #if defined(_AIX)
  116. ')
  117. #else
  118. .endm
  119. #endif
  120. /**********************************************************************************************
  121. * Macros for N=4 and M=1
  122. **********************************************************************************************/
  123. #if defined(_AIX)
  124. define(`COPY_4x1', `
  125. #else
  126. .macro COPY_4x1
  127. #endif
  128. lxsspx vs32, o0, A0
  129. lxsspx vs33, o0, A1
  130. lxsspx vs34, o0, A2
  131. lxsspx vs35, o0, A3
  132. mr T1, BO
  133. stxsspx vs32, o0, T1
  134. stxsspx vs33, o4, T1
  135. addi T1, T1, 8
  136. stxsspx vs34, o0, T1
  137. stxsspx vs35, o4, T1
  138. #if defined(_AIX)
  139. ')
  140. #else
  141. .endm
  142. #endif
  143. /**********************************************************************************************
  144. * Macros for N=2 and M=8
  145. **********************************************************************************************/
  146. #if defined(_AIX)
  147. define(`COPY_2x8', `
  148. #else
  149. .macro COPY_2x8
  150. #endif
  151. lxvw4x vs32, o0, A0
  152. lxvw4x vs33, o16, A0
  153. lxvw4x vs34, o0, A1
  154. lxvw4x vs35, o16, A1
  155. mr T1, BO
  156. stxvw4x vs32, o0, T1
  157. stxvw4x vs33, o16, T1
  158. stxvw4x vs34, o32, T1
  159. stxvw4x vs35, o48, T1
  160. #if defined(_AIX)
  161. ')
  162. #else
  163. .endm
  164. #endif
  165. /**********************************************************************************************
  166. * Macros for N=2 and M=4
  167. **********************************************************************************************/
  168. #if defined(_AIX)
  169. define(`COPY_2x4', `
  170. #else
  171. .macro COPY_2x4
  172. #endif
  173. lxvw4x vs32, o0, A0
  174. lxvw4x vs33, o0, A1
  175. mr T1, BO
  176. stxvw4x vs32, o0, T1
  177. stxvw4x vs33, o16, T1
  178. #if defined(_AIX)
  179. ')
  180. #else
  181. .endm
  182. #endif
  183. /**********************************************************************************************
  184. * Macros for N=2 and M=2
  185. **********************************************************************************************/
  186. #if defined(_AIX)
  187. define(`COPY_2x2', `
  188. #else
  189. .macro COPY_2x2
  190. #endif
  191. lxsspx vs32, o0, A0
  192. lxsspx vs33, o4, A0
  193. lxsspx vs34, o0, A1
  194. lxsspx vs35, o4, A1
  195. mr T1, BO
  196. stxsspx vs32, o0, T1
  197. stxsspx vs33, o4, T1
  198. addi T1, T1, 8
  199. stxsspx vs34, o0, T1
  200. stxsspx vs35, o4, T1
  201. #if defined(_AIX)
  202. ')
  203. #else
  204. .endm
  205. #endif
  206. /**********************************************************************************************
  207. * Macros for N=2 and M=1
  208. **********************************************************************************************/
  209. #if defined(_AIX)
  210. define(`COPY_2x1', `
  211. #else
  212. .macro COPY_2x1
  213. #endif
  214. lxsspx vs32, o0, A0
  215. lxsspx vs33, o0, A1
  216. mr T1, BO
  217. stxsspx vs32, o0, T1
  218. stxsspx vs33, o4, T1
  219. #if defined(_AIX)
  220. ')
  221. #else
  222. .endm
  223. #endif
  224. /**********************************************************************************************
  225. * Macros for N=1 and M=8
  226. **********************************************************************************************/
  227. #if defined(_AIX)
  228. define(`COPY_1x8', `
  229. #else
  230. .macro COPY_1x8
  231. #endif
  232. lxvw4x vs32, o0, A0
  233. lxvw4x vs33, o16, A0
  234. mr T1, BO
  235. stxvw4x vs32, o0, T1
  236. stxvw4x vs33, o16, T1
  237. #if defined(_AIX)
  238. ')
  239. #else
  240. .endm
  241. #endif
  242. /**********************************************************************************************
  243. * Macros for N=1 and M=4
  244. **********************************************************************************************/
  245. #if defined(_AIX)
  246. define(`COPY_1x4', `
  247. #else
  248. .macro COPY_1x4
  249. #endif
  250. lxvw4x vs32, o0, A0
  251. mr T1, BO
  252. stxvw4x vs32, o0, T1
  253. #if defined(_AIX)
  254. ')
  255. #else
  256. .endm
  257. #endif
  258. /**********************************************************************************************
  259. * Macros for N=1 and M=2
  260. **********************************************************************************************/
  261. #if defined(_AIX)
  262. define(`COPY_1x2', `
  263. #else
  264. .macro COPY_1x2
  265. #endif
  266. lxsspx vs32, o0, A0
  267. lxsspx vs33, o4, A0
  268. mr T1, BO
  269. stxsspx vs32, o0, T1
  270. stxsspx vs33, o4, T1
  271. #if defined(_AIX)
  272. ')
  273. #else
  274. .endm
  275. #endif
  276. /**********************************************************************************************
  277. * Macros for N=1 and M=1
  278. **********************************************************************************************/
  279. #if defined(_AIX)
  280. define(`COPY_1x1', `
  281. #else
  282. .macro COPY_1x1
  283. #endif
  284. lxsspx vs32, o0, A0
  285. mr T1, BO
  286. stxsspx vs32, o0, T1
  287. #if defined(_AIX)
  288. ')
  289. #else
  290. .endm
  291. #endif