You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dgemm_ncopy_macros_4_power8.S 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822
  1. /***************************************************************************
  2. Copyright (c) 2013-2016, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. *****************************************************************************/
  27. /**************************************************************************************
  28. * 2016/04/28 Werner Saar (wernsaar@googlemail.com)
  29. * BLASTEST : OK
  30. * CTEST : OK
  31. * TEST : OK
  32. * LAPACK-TEST : OK
  33. **************************************************************************************/
  34. /**********************************************************************************************
  35. * Macros for N=4 and M=16
  36. **********************************************************************************************/
  37. #if defined(_AIX)
  38. define(`COPY_4x16', `
  39. #else
  40. .macro COPY_4x16
  41. #endif
  42. lxvd2x vs0, o0, A0
  43. lxvd2x vs1, o0, A1
  44. lxvd2x vs2, o0, A2
  45. lxvd2x vs3, o0, A3
  46. lxvd2x vs4, o16, A0
  47. lxvd2x vs5, o16, A1
  48. lxvd2x vs6, o16, A2
  49. lxvd2x vs7, o16, A3
  50. xxpermdi vs32, vs0, vs1, 0
  51. xxpermdi vs33, vs2, vs3, 0
  52. xxpermdi vs34, vs0, vs1, 3
  53. xxpermdi vs35, vs2, vs3, 3
  54. xxpermdi vs36, vs4, vs5, 0
  55. xxpermdi vs37, vs6, vs7, 0
  56. xxpermdi vs38, vs4, vs5, 3
  57. xxpermdi vs39, vs6, vs7, 3
  58. lxvd2x vs0, o32, A0
  59. lxvd2x vs1, o32, A1
  60. lxvd2x vs2, o32, A2
  61. lxvd2x vs3, o32, A3
  62. lxvd2x vs4, o48, A0
  63. lxvd2x vs5, o48, A1
  64. lxvd2x vs6, o48, A2
  65. lxvd2x vs7, o48, A3
  66. xxpermdi vs40, vs0, vs1, 0
  67. xxpermdi vs41, vs2, vs3, 0
  68. xxpermdi vs42, vs0, vs1, 3
  69. xxpermdi vs43, vs2, vs3, 3
  70. xxpermdi vs44, vs4, vs5, 0
  71. xxpermdi vs45, vs6, vs7, 0
  72. xxpermdi vs46, vs4, vs5, 3
  73. xxpermdi vs47, vs6, vs7, 3
  74. lxvd2x vs0, o64, A0
  75. lxvd2x vs1, o64, A1
  76. lxvd2x vs2, o64, A2
  77. lxvd2x vs3, o64, A3
  78. lxvd2x vs4, o80, A0
  79. lxvd2x vs5, o80, A1
  80. lxvd2x vs6, o80, A2
  81. lxvd2x vs7, o80, A3
  82. xxpermdi vs48, vs0, vs1, 0
  83. xxpermdi vs49, vs2, vs3, 0
  84. xxpermdi vs50, vs0, vs1, 3
  85. xxpermdi vs51, vs2, vs3, 3
  86. xxpermdi vs8, vs4, vs5, 0
  87. xxpermdi vs9, vs6, vs7, 0
  88. xxpermdi vs10, vs4, vs5, 3
  89. xxpermdi vs11, vs6, vs7, 3
  90. lxvd2x vs0, o96, A0
  91. lxvd2x vs1, o96, A1
  92. lxvd2x vs2, o96, A2
  93. lxvd2x vs3, o96, A3
  94. lxvd2x vs6, o112, A0
  95. lxvd2x vs7, o112, A1
  96. lxvd2x vs12, o112, A2
  97. lxvd2x vs13, o112, A3
  98. xxpermdi vs4, vs0, vs1, 0
  99. xxpermdi vs5, vs2, vs3, 0
  100. xxpermdi vs0, vs0, vs1, 3
  101. xxpermdi vs2, vs2, vs3, 3
  102. addi A0, A0, 128
  103. addi A1, A1, 128
  104. xxpermdi vs1, vs6, vs7, 0
  105. xxpermdi vs3, vs12, vs13, 0
  106. xxpermdi vs6, vs6, vs7, 3
  107. xxpermdi vs12, vs12, vs13, 3
  108. dcbt BO, PREB
  109. addi A3, A3, 128
  110. addi A2, A2, 128
  111. stxvd2x vs32, o0, BO
  112. stxvd2x vs33, o16, BO
  113. stxvd2x vs34, o32, BO
  114. stxvd2x vs35, o48, BO
  115. stxvd2x vs36, o64, BO
  116. stxvd2x vs37, o80, BO
  117. stxvd2x vs38, o96, BO
  118. stxvd2x vs39, o112, BO
  119. addi BO, BO, 128
  120. dcbt BO, PREB
  121. stxvd2x vs40, o0, BO
  122. stxvd2x vs41, o16, BO
  123. stxvd2x vs42, o32, BO
  124. stxvd2x vs43, o48, BO
  125. stxvd2x vs44, o64, BO
  126. stxvd2x vs45, o80, BO
  127. stxvd2x vs46, o96, BO
  128. stxvd2x vs47, o112, BO
  129. addi BO, BO, 128
  130. dcbt BO, PREB
  131. stxvd2x vs48, o0, BO
  132. stxvd2x vs49, o16, BO
  133. stxvd2x vs50, o32, BO
  134. stxvd2x vs51, o48, BO
  135. stxvd2x vs8, o64, BO
  136. stxvd2x vs9, o80, BO
  137. stxvd2x vs10, o96, BO
  138. stxvd2x vs11, o112, BO
  139. addi BO, BO, 128
  140. dcbt BO, PREB
  141. stxvd2x vs4, o0, BO
  142. stxvd2x vs5, o16, BO
  143. stxvd2x vs0, o32, BO
  144. stxvd2x vs2, o48, BO
  145. stxvd2x vs1, o64, BO
  146. stxvd2x vs3, o80, BO
  147. stxvd2x vs6, o96, BO
  148. stxvd2x vs12, o112, BO
  149. addi BO, BO, 128
  150. #if defined(_AIX)
  151. ')
  152. #else
  153. .endm
  154. #endif
  155. /**********************************************************************************************
  156. * Macros for N=4 and M=8
  157. **********************************************************************************************/
  158. #if defined(_AIX)
  159. define(`COPY_4x8', `
  160. #else
  161. .macro COPY_4x8
  162. #endif
  163. lxvd2x vs0, o0, A0
  164. lxvd2x vs1, o16, A0
  165. lxvd2x vs2, o32, A0
  166. lxvd2x vs3, o48, A0
  167. addi A0, A0, 64
  168. lxvd2x vs8, o0, A1
  169. lxvd2x vs9, o16, A1
  170. lxvd2x vs10, o32, A1
  171. lxvd2x vs11, o48, A1
  172. addi A1, A1, 64
  173. lxvd2x vs4, o0, A2
  174. lxvd2x vs5, o16, A2
  175. lxvd2x vs6, o32, A2
  176. lxvd2x vs7, o48, A2
  177. addi A2, A2, 64
  178. lxvd2x vs12, o0, A3
  179. lxvd2x vs13, o16, A3
  180. lxvd2x vs50, o32, A3
  181. lxvd2x vs51, o48, A3
  182. addi A3, A3, 64
  183. xxpermdi vs32, vs0, vs8, 0
  184. xxpermdi vs33, vs4, vs12, 0
  185. xxpermdi vs34, vs0, vs8, 3
  186. xxpermdi vs35, vs4, vs12, 3
  187. xxpermdi vs36, vs1, vs9, 0
  188. xxpermdi vs37, vs5, vs13, 0
  189. xxpermdi vs38, vs1, vs9, 3
  190. xxpermdi vs39, vs5, vs13, 3
  191. xxpermdi vs40, vs2, vs10, 0
  192. xxpermdi vs41, vs6, vs50, 0
  193. xxpermdi vs42, vs2, vs10, 3
  194. xxpermdi vs43, vs6, vs50, 3
  195. xxpermdi vs44, vs3, vs11, 0
  196. xxpermdi vs45, vs7, vs51, 0
  197. xxpermdi vs46, vs3, vs11, 3
  198. xxpermdi vs47, vs7, vs51, 3
  199. stxvd2x vs32, o0, BO
  200. stxvd2x vs33, o16, BO
  201. stxvd2x vs34, o32, BO
  202. stxvd2x vs35, o48, BO
  203. stxvd2x vs36, o64, BO
  204. stxvd2x vs37, o80, BO
  205. stxvd2x vs38, o96, BO
  206. stxvd2x vs39, o112, BO
  207. addi BO, BO, 128
  208. stxvd2x vs40, o0, BO
  209. stxvd2x vs41, o16, BO
  210. stxvd2x vs42, o32, BO
  211. stxvd2x vs43, o48, BO
  212. stxvd2x vs44, o64, BO
  213. stxvd2x vs45, o80, BO
  214. stxvd2x vs46, o96, BO
  215. stxvd2x vs47, o112, BO
  216. addi BO, BO, 128
  217. #if defined(_AIX)
  218. ')
  219. #else
  220. .endm
  221. #endif
  222. /**********************************************************************************************
  223. * Macros for N=4 and M=4
  224. **********************************************************************************************/
  225. #if defined(_AIX)
  226. define(`COPY_4x4', `
  227. #else
  228. .macro COPY_4x4
  229. #endif
  230. lxvd2x vs0, o0, A0
  231. lxvd2x vs1, o16, A0
  232. addi A0, A0, 32
  233. lxvd2x vs8, o0, A1
  234. lxvd2x vs9, o16, A1
  235. addi A1, A1, 32
  236. lxvd2x vs10, o0, A2
  237. lxvd2x vs11, o16, A2
  238. addi A2, A2, 32
  239. lxvd2x vs12, o0, A3
  240. lxvd2x vs13, o16, A3
  241. addi A3, A3, 32
  242. xxpermdi vs32, vs0, vs8, 0
  243. xxpermdi vs33, vs10, vs12, 0
  244. xxpermdi vs34, vs0, vs8, 3
  245. xxpermdi vs35, vs10, vs12, 3
  246. xxpermdi vs36, vs1, vs9, 0
  247. xxpermdi vs37, vs11, vs13, 0
  248. xxpermdi vs38, vs1, vs9, 3
  249. xxpermdi vs39, vs11, vs13, 3
  250. stxvd2x vs32, o0, BO
  251. stxvd2x vs33, o16, BO
  252. stxvd2x vs34, o32, BO
  253. stxvd2x vs35, o48, BO
  254. stxvd2x vs36, o64, BO
  255. stxvd2x vs37, o80, BO
  256. stxvd2x vs38, o96, BO
  257. stxvd2x vs39, o112, BO
  258. addi BO, BO, 128
  259. #if defined(_AIX)
  260. ')
  261. #else
  262. .endm
  263. #endif
  264. /**********************************************************************************************
  265. * Macros for N=4 and M=2
  266. **********************************************************************************************/
  267. #if defined(_AIX)
  268. define(`COPY_4x2', `
  269. #else
  270. .macro COPY_4x2
  271. #endif
  272. lxvd2x vs0, o0, A0
  273. addi A0, A0, 16
  274. lxvd2x vs8, o0, A1
  275. addi A1, A1, 16
  276. lxvd2x vs9, o0, A2
  277. addi A2, A2, 16
  278. lxvd2x vs10, o0, A3
  279. addi A3, A3, 16
  280. xxpermdi vs32, vs0, vs8, 0
  281. xxpermdi vs33, vs9, vs10, 0
  282. xxpermdi vs34, vs0, vs8, 3
  283. xxpermdi vs35, vs9, vs10, 3
  284. stxvd2x vs32, o0, BO
  285. stxvd2x vs33, o16, BO
  286. stxvd2x vs34, o32, BO
  287. stxvd2x vs35, o48, BO
  288. addi BO, BO, 64
  289. #if defined(_AIX)
  290. ')
  291. #else
  292. .endm
  293. #endif
  294. /**********************************************************************************************
  295. * Macros for N=4 and M=1
  296. **********************************************************************************************/
  297. #if defined(_AIX)
  298. define(`COPY_4x1', `
  299. #else
  300. .macro COPY_4x1
  301. #endif
  302. lxsdx vs0, o0, A0
  303. addi A0, A0, 8
  304. lxsdx vs8, o0, A1
  305. addi A1, A1, 8
  306. lxsdx vs9, o0, A2
  307. addi A2, A2, 8
  308. lxsdx vs10, o0, A3
  309. addi A3, A3, 8
  310. xxpermdi vs32, vs0, vs8, 0
  311. xxpermdi vs33, vs9, vs10, 0
  312. stxvd2x vs32, o0, BO
  313. stxvd2x vs33, o16, BO
  314. addi BO, BO, 32
  315. #if defined(_AIX)
  316. ')
  317. #else
  318. .endm
  319. #endif
  320. /**********************************************************************************************
  321. * Macros for N=2 and M=16
  322. **********************************************************************************************/
  323. #if defined(_AIX)
  324. define(`COPY_2x16', `
  325. #else
  326. .macro COPY_2x16
  327. #endif
  328. lxvd2x vs0, o0, A0
  329. lxvd2x vs1, o16, A0
  330. lxvd2x vs2, o32, A0
  331. lxvd2x vs3, o48, A0
  332. lxvd2x vs4, o64, A0
  333. lxvd2x vs5, o80, A0
  334. lxvd2x vs6, o96, A0
  335. lxvd2x vs7, o112, A0
  336. addi A0, A0, 128
  337. lxvd2x vs8, o0, A1
  338. lxvd2x vs9, o16, A1
  339. lxvd2x vs10, o32, A1
  340. lxvd2x vs11, o48, A1
  341. lxvd2x vs12, o64, A1
  342. lxvd2x vs13, o80, A1
  343. lxvd2x vs48, o96, A1
  344. lxvd2x vs49, o112, A1
  345. addi A1, A1, 128
  346. xxpermdi vs32, vs0, vs8, 0
  347. xxpermdi vs33, vs0, vs8, 3
  348. xxpermdi vs34, vs1, vs9, 0
  349. xxpermdi vs35, vs1, vs9, 3
  350. xxpermdi vs36, vs2, vs10, 0
  351. xxpermdi vs37, vs2, vs10, 3
  352. xxpermdi vs38, vs3, vs11, 0
  353. xxpermdi vs39, vs3, vs11, 3
  354. xxpermdi vs40, vs4, vs12, 0
  355. xxpermdi vs41, vs4, vs12, 3
  356. xxpermdi vs42, vs5, vs13, 0
  357. xxpermdi vs43, vs5, vs13, 3
  358. xxpermdi vs44, vs6, vs48, 0
  359. xxpermdi vs45, vs6, vs48, 3
  360. xxpermdi vs46, vs7, vs49, 0
  361. xxpermdi vs47, vs7, vs49, 3
  362. stxvd2x vs32, o0, BO
  363. stxvd2x vs33, o16, BO
  364. stxvd2x vs34, o32, BO
  365. stxvd2x vs35, o48, BO
  366. stxvd2x vs36, o64, BO
  367. stxvd2x vs37, o80, BO
  368. stxvd2x vs38, o96, BO
  369. stxvd2x vs39, o112, BO
  370. addi BO, BO, 128
  371. stxvd2x vs40, o0, BO
  372. stxvd2x vs41, o16, BO
  373. stxvd2x vs42, o32, BO
  374. stxvd2x vs43, o48, BO
  375. stxvd2x vs44, o64, BO
  376. stxvd2x vs45, o80, BO
  377. stxvd2x vs46, o96, BO
  378. stxvd2x vs47, o112, BO
  379. addi BO, BO, 128
  380. #if defined(_AIX)
  381. ')
  382. #else
  383. .endm
  384. #endif
  385. /**********************************************************************************************
  386. * Macros for N=2 and M=8
  387. **********************************************************************************************/
  388. #if defined(_AIX)
  389. define(`COPY_2x8', `
  390. #else
  391. .macro COPY_2x8
  392. #endif
  393. lxvd2x vs0, o0, A0
  394. lxvd2x vs1, o16, A0
  395. lxvd2x vs2, o32, A0
  396. lxvd2x vs3, o48, A0
  397. addi A0, A0, 64
  398. lxvd2x vs8, o0, A1
  399. lxvd2x vs9, o16, A1
  400. lxvd2x vs10, o32, A1
  401. lxvd2x vs11, o48, A1
  402. addi A1, A1, 64
  403. xxpermdi vs32, vs0, vs8, 0
  404. xxpermdi vs33, vs0, vs8, 3
  405. xxpermdi vs34, vs1, vs9, 0
  406. xxpermdi vs35, vs1, vs9, 3
  407. xxpermdi vs36, vs2, vs10, 0
  408. xxpermdi vs37, vs2, vs10, 3
  409. xxpermdi vs38, vs3, vs11, 0
  410. xxpermdi vs39, vs3, vs11, 3
  411. stxvd2x vs32, o0, BO
  412. stxvd2x vs33, o16, BO
  413. stxvd2x vs34, o32, BO
  414. stxvd2x vs35, o48, BO
  415. stxvd2x vs36, o64, BO
  416. stxvd2x vs37, o80, BO
  417. stxvd2x vs38, o96, BO
  418. stxvd2x vs39, o112, BO
  419. addi BO, BO, 128
  420. #if defined(_AIX)
  421. ')
  422. #else
  423. .endm
  424. #endif
  425. /**********************************************************************************************
  426. * Macros for N=2 and M=4
  427. **********************************************************************************************/
  428. #if defined(_AIX)
  429. define(`COPY_2x4', `
  430. #else
  431. .macro COPY_2x4
  432. #endif
  433. lxvd2x vs0, o0, A0
  434. lxvd2x vs1, o16, A0
  435. addi A0, A0, 32
  436. lxvd2x vs8, o0, A1
  437. lxvd2x vs9, o16, A1
  438. addi A1, A1, 32
  439. xxpermdi vs32, vs0, vs8, 0
  440. xxpermdi vs33, vs0, vs8, 3
  441. xxpermdi vs34, vs1, vs9, 0
  442. xxpermdi vs35, vs1, vs9, 3
  443. stxvd2x vs32, o0, BO
  444. stxvd2x vs33, o16, BO
  445. stxvd2x vs34, o32, BO
  446. stxvd2x vs35, o48, BO
  447. addi BO, BO, 64
  448. #if defined(_AIX)
  449. ')
  450. #else
  451. .endm
  452. #endif
  453. /**********************************************************************************************
  454. * Macros for N=2 and M=2
  455. **********************************************************************************************/
  456. #if defined(_AIX)
  457. define(`COPY_2x2', `
  458. #else
  459. .macro COPY_2x2
  460. #endif
  461. lxvd2x vs0, o0, A0
  462. addi A0, A0, 16
  463. lxvd2x vs8, o0, A1
  464. addi A1, A1, 16
  465. xxpermdi vs32, vs0, vs8, 0
  466. xxpermdi vs33, vs0, vs8, 3
  467. stxvd2x vs32, o0, BO
  468. stxvd2x vs33, o16, BO
  469. addi BO, BO, 32
  470. #if defined(_AIX)
  471. ')
  472. #else
  473. .endm
  474. #endif
  475. /**********************************************************************************************
  476. * Macros for N=2 and M=1
  477. **********************************************************************************************/
  478. #if defined(_AIX)
  479. define(`COPY_2x1', `
  480. #else
  481. .macro COPY_2x1
  482. #endif
  483. lxsdx vs0, o0, A0
  484. addi A0, A0, 8
  485. lxsdx vs8, o0, A1
  486. addi A1, A1, 8
  487. xxpermdi vs32, vs0, vs8, 0
  488. stxvd2x vs32, o0, BO
  489. addi BO, BO, 16
  490. #if defined(_AIX)
  491. ')
  492. #else
  493. .endm
  494. #endif
  495. /**********************************************************************************************
  496. * Macros for N=1 and M=16
  497. **********************************************************************************************/
  498. #if defined(_AIX)
  499. define(`COPY_1x16', `
  500. #else
  501. .macro COPY_1x16
  502. #endif
  503. lxvd2x vs0, o0, A0
  504. lxvd2x vs1, o16, A0
  505. lxvd2x vs2, o32, A0
  506. lxvd2x vs3, o48, A0
  507. lxvd2x vs4, o64, A0
  508. lxvd2x vs5, o80, A0
  509. lxvd2x vs6, o96, A0
  510. lxvd2x vs7, o112, A0
  511. addi A0, A0, 128
  512. stxvd2x vs0, o0, BO
  513. stxvd2x vs1, o16, BO
  514. stxvd2x vs2, o32, BO
  515. stxvd2x vs3, o48, BO
  516. addi BO, BO, 64
  517. stxvd2x vs4, o0, BO
  518. stxvd2x vs5, o16, BO
  519. stxvd2x vs6, o32, BO
  520. stxvd2x vs7, o48, BO
  521. addi BO, BO, 64
  522. #if defined(_AIX)
  523. ')
  524. #else
  525. .endm
  526. #endif
  527. /**********************************************************************************************
  528. * Macros for N=1 and M=8
  529. **********************************************************************************************/
  530. #if defined(_AIX)
  531. define(`COPY_1x8', `
  532. #else
  533. .macro COPY_1x8
  534. #endif
  535. lxvd2x vs0, o0, A0
  536. lxvd2x vs1, o16, A0
  537. lxvd2x vs2, o32, A0
  538. lxvd2x vs3, o48, A0
  539. addi A0, A0, 64
  540. stxvd2x vs0, o0, BO
  541. stxvd2x vs1, o16, BO
  542. stxvd2x vs2, o32, BO
  543. stxvd2x vs3, o48, BO
  544. addi BO, BO, 64
  545. #if defined(_AIX)
  546. ')
  547. #else
  548. .endm
  549. #endif
  550. /**********************************************************************************************
  551. * Macros for N=1 and M=4
  552. **********************************************************************************************/
  553. #if defined(_AIX)
  554. define(`COPY_1x4', `
  555. #else
  556. .macro COPY_1x4
  557. #endif
  558. lxvd2x vs0, o0, A0
  559. lxvd2x vs1, o16, A0
  560. addi A0, A0, 32
  561. stxvd2x vs0, o0, BO
  562. stxvd2x vs1, o16, BO
  563. addi BO, BO, 32
  564. #if defined(_AIX)
  565. ')
  566. #else
  567. .endm
  568. #endif
  569. /**********************************************************************************************
  570. * Macros for N=1 and M=2
  571. **********************************************************************************************/
  572. #if defined(_AIX)
  573. define(`COPY_1x2', `
  574. #else
  575. .macro COPY_1x2
  576. #endif
  577. lxvd2x vs0, o0, A0
  578. addi A0, A0, 16
  579. stxvd2x vs0, o0, BO
  580. addi BO, BO, 16
  581. #if defined(_AIX)
  582. ')
  583. #else
  584. .endm
  585. #endif
  586. /**********************************************************************************************
  587. * Macros for N=1 and M=1
  588. **********************************************************************************************/
  589. #if defined(_AIX)
  590. define(`COPY_1x1', `
  591. #else
  592. .macro COPY_1x1
  593. #endif
  594. lxsdx vs0, o0, A0
  595. addi A0, A0, 8
  596. stxsdx vs0, o0, BO
  597. addi BO, BO, 8
  598. #if defined(_AIX)
  599. ')
  600. #else
  601. .endm
  602. #endif