You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zlaswp_ncopy_4.c 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include "common.h"
  40. #define a2 (a1 + 2)
  41. #define a4 (a3 + 2)
  42. #define a6 (a5 + 2)
  43. #define a8 (a7 + 2)
  44. int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint *ipiv, FLOAT *buffer){
  45. BLASLONG i, j, ip1, ip2;
  46. blasint *piv;
  47. FLOAT *a1, *a3, *a5, *a7;
  48. FLOAT *b1, *b2, *b3, *b4;
  49. FLOAT *b5, *b6, *b7, *b8;
  50. FLOAT A1, A2, A3, A4, A5, A6, A7, A8;
  51. FLOAT B1, B2, B3, B4, B5, B6, B7, B8;
  52. FLOAT A9, A10, A11, A12, A13, A14, A15, A16;
  53. FLOAT B9, B10, B11, B12, B13, B14, B15, B16;
  54. a -= 2;
  55. lda *= 2;
  56. k1 --;
  57. ipiv += k1;
  58. if (n <= 0) return 0;
  59. j = (n >> 2);
  60. if (j > 0) {
  61. do {
  62. piv = ipiv;
  63. a1 = a + (k1 + 1) * 2;
  64. a3 = a1 + 1 * lda;
  65. a5 = a1 + 2 * lda;
  66. a7 = a1 + 3 * lda;
  67. ip1 = *(piv + 0) * 2;
  68. ip2 = *(piv + 1) * 2;
  69. piv += 2;
  70. b1 = a + ip1;
  71. b2 = a + ip2;
  72. b3 = b1 + 1 * lda;
  73. b4 = b2 + 1 * lda;
  74. b5 = b1 + 2 * lda;
  75. b6 = b2 + 2 * lda;
  76. b7 = b1 + 3 * lda;
  77. b8 = b2 + 3 * lda;
  78. i = ((k2 - k1) >> 1);
  79. if (i > 0) {
  80. do {
  81. A1 = *(a1 + 0);
  82. A9 = *(a1 + 1);
  83. A2 = *(a2 + 0);
  84. A10 = *(a2 + 1);
  85. A3 = *(a3 + 0);
  86. A11 = *(a3 + 1);
  87. A4 = *(a4 + 0);
  88. A12 = *(a4 + 1);
  89. A5 = *(a5 + 0);
  90. A13 = *(a5 + 1);
  91. A6 = *(a6 + 0);
  92. A14 = *(a6 + 1);
  93. A7 = *(a7 + 0);
  94. A15 = *(a7 + 1);
  95. A8 = *(a8 + 0);
  96. A16 = *(a8 + 1);
  97. B1 = *(b1 + 0);
  98. B9 = *(b1 + 1);
  99. B2 = *(b2 + 0);
  100. B10 = *(b2 + 1);
  101. B3 = *(b3 + 0);
  102. B11 = *(b3 + 1);
  103. B4 = *(b4 + 0);
  104. B12 = *(b4 + 1);
  105. B5 = *(b5 + 0);
  106. B13 = *(b5 + 1);
  107. B6 = *(b6 + 0);
  108. B14 = *(b6 + 1);
  109. B7 = *(b7 + 0);
  110. B15 = *(b7 + 1);
  111. B8 = *(b8 + 0);
  112. B16 = *(b8 + 1);
  113. ip1 = *(piv + 0) * 2;
  114. ip2 = *(piv + 1) * 2;
  115. piv += 2;
  116. if (b1 == a1) {
  117. if (b2 == a2) {
  118. *(buffer + 0) = A1;
  119. *(buffer + 1) = A9;
  120. *(buffer + 2) = A3;
  121. *(buffer + 3) = A11;
  122. *(buffer + 4) = A5;
  123. *(buffer + 5) = A13;
  124. *(buffer + 6) = A7;
  125. *(buffer + 7) = A15;
  126. *(buffer + 8) = A2;
  127. *(buffer + 9) = A10;
  128. *(buffer + 10) = A4;
  129. *(buffer + 11) = A12;
  130. *(buffer + 12) = A6;
  131. *(buffer + 13) = A14;
  132. *(buffer + 14) = A8;
  133. *(buffer + 15) = A16;
  134. } else {
  135. *(buffer + 0) = A1;
  136. *(buffer + 1) = A9;
  137. *(buffer + 2) = A3;
  138. *(buffer + 3) = A11;
  139. *(buffer + 4) = A5;
  140. *(buffer + 5) = A13;
  141. *(buffer + 6) = A7;
  142. *(buffer + 7) = A15;
  143. *(buffer + 8) = B2;
  144. *(buffer + 9) = B10;
  145. *(buffer + 10) = B4;
  146. *(buffer + 11) = B12;
  147. *(buffer + 12) = B6;
  148. *(buffer + 13) = B14;
  149. *(buffer + 14) = B8;
  150. *(buffer + 15) = B16;
  151. *(b2 + 0) = A2;
  152. *(b2 + 1) = A10;
  153. *(b4 + 0) = A4;
  154. *(b4 + 1) = A12;
  155. *(b6 + 0) = A6;
  156. *(b6 + 1) = A14;
  157. *(b8 + 0) = A8;
  158. *(b8 + 1) = A16;
  159. }
  160. } else
  161. if (b1 == a2) {
  162. if (b2 == a2) {
  163. *(buffer + 0) = A2;
  164. *(buffer + 1) = A10;
  165. *(buffer + 2) = A4;
  166. *(buffer + 3) = A12;
  167. *(buffer + 4) = A6;
  168. *(buffer + 5) = A14;
  169. *(buffer + 6) = A8;
  170. *(buffer + 7) = A16;
  171. *(buffer + 8) = A1;
  172. *(buffer + 9) = A9;
  173. *(buffer + 10) = A3;
  174. *(buffer + 11) = A11;
  175. *(buffer + 12) = A5;
  176. *(buffer + 13) = A13;
  177. *(buffer + 14) = A7;
  178. *(buffer + 15) = A15;
  179. } else {
  180. *(buffer + 0) = A2;
  181. *(buffer + 1) = A10;
  182. *(buffer + 2) = A4;
  183. *(buffer + 3) = A12;
  184. *(buffer + 4) = A6;
  185. *(buffer + 5) = A14;
  186. *(buffer + 6) = A8;
  187. *(buffer + 7) = A16;
  188. *(buffer + 8) = B2;
  189. *(buffer + 9) = B10;
  190. *(buffer + 10) = B4;
  191. *(buffer + 11) = B12;
  192. *(buffer + 12) = B6;
  193. *(buffer + 13) = B14;
  194. *(buffer + 14) = B8;
  195. *(buffer + 15) = B16;
  196. *(b2 + 0) = A1;
  197. *(b2 + 1) = A9;
  198. *(b4 + 0) = A3;
  199. *(b4 + 1) = A11;
  200. *(b6 + 0) = A5;
  201. *(b6 + 1) = A13;
  202. *(b8 + 0) = A7;
  203. *(b8 + 1) = A15;
  204. }
  205. } else {
  206. if (b2 == a2) {
  207. *(buffer + 0) = B1;
  208. *(buffer + 1) = B9;
  209. *(buffer + 2) = B3;
  210. *(buffer + 3) = B11;
  211. *(buffer + 4) = B5;
  212. *(buffer + 5) = B13;
  213. *(buffer + 6) = B7;
  214. *(buffer + 7) = B15;
  215. *(buffer + 8) = A2;
  216. *(buffer + 9) = A10;
  217. *(buffer + 10) = A4;
  218. *(buffer + 11) = A12;
  219. *(buffer + 12) = A6;
  220. *(buffer + 13) = A14;
  221. *(buffer + 14) = A8;
  222. *(buffer + 15) = A16;
  223. *(b1 + 0) = A1;
  224. *(b1 + 1) = A9;
  225. *(b3 + 0) = A3;
  226. *(b3 + 1) = A11;
  227. *(b5 + 0) = A5;
  228. *(b5 + 1) = A13;
  229. *(b7 + 0) = A7;
  230. *(b7 + 1) = A15;
  231. } else
  232. if (b2 == b1) {
  233. *(buffer + 0) = B1;
  234. *(buffer + 1) = B9;
  235. *(buffer + 2) = B3;
  236. *(buffer + 3) = B11;
  237. *(buffer + 4) = B5;
  238. *(buffer + 5) = B13;
  239. *(buffer + 6) = B7;
  240. *(buffer + 7) = B15;
  241. *(buffer + 8) = A1;
  242. *(buffer + 9) = A9;
  243. *(buffer + 10) = A3;
  244. *(buffer + 11) = A11;
  245. *(buffer + 12) = A5;
  246. *(buffer + 13) = A13;
  247. *(buffer + 14) = A7;
  248. *(buffer + 15) = A15;
  249. *(b1 + 0) = A2;
  250. *(b1 + 1) = A10;
  251. *(b3 + 0) = A4;
  252. *(b3 + 1) = A12;
  253. *(b5 + 0) = A6;
  254. *(b5 + 1) = A14;
  255. *(b7 + 0) = A8;
  256. *(b7 + 1) = A16;
  257. } else {
  258. *(buffer + 0) = B1;
  259. *(buffer + 1) = B9;
  260. *(buffer + 2) = B3;
  261. *(buffer + 3) = B11;
  262. *(buffer + 4) = B5;
  263. *(buffer + 5) = B13;
  264. *(buffer + 6) = B7;
  265. *(buffer + 7) = B15;
  266. *(buffer + 8) = B2;
  267. *(buffer + 9) = B10;
  268. *(buffer + 10) = B4;
  269. *(buffer + 11) = B12;
  270. *(buffer + 12) = B6;
  271. *(buffer + 13) = B14;
  272. *(buffer + 14) = B8;
  273. *(buffer + 15) = B16;
  274. *(b1 + 0) = A1;
  275. *(b1 + 1) = A9;
  276. *(b2 + 0) = A2;
  277. *(b2 + 1) = A10;
  278. *(b3 + 0) = A3;
  279. *(b3 + 1) = A11;
  280. *(b4 + 0) = A4;
  281. *(b4 + 1) = A12;
  282. *(b5 + 0) = A5;
  283. *(b5 + 1) = A13;
  284. *(b6 + 0) = A6;
  285. *(b6 + 1) = A14;
  286. *(b7 + 0) = A7;
  287. *(b7 + 1) = A15;
  288. *(b8 + 0) = A8;
  289. *(b8 + 1) = A16;
  290. }
  291. }
  292. buffer += 16;
  293. b1 = a + ip1;
  294. b2 = a + ip2;
  295. b3 = b1 + 1 * lda;
  296. b4 = b2 + 1 * lda;
  297. b5 = b1 + 2 * lda;
  298. b6 = b2 + 2 * lda;
  299. b7 = b1 + 3 * lda;
  300. b8 = b2 + 3 * lda;
  301. a1 += 4;
  302. a3 += 4;
  303. a5 += 4;
  304. a7 += 4;
  305. i --;
  306. } while (i > 0);
  307. }
  308. i = ((k2 - k1) & 1);
  309. if (i > 0) {
  310. A1 = *(a1 + 0);
  311. A9 = *(a1 + 1);
  312. B1 = *(b1 + 0);
  313. B9 = *(b1 + 1);
  314. A3 = *(a3 + 0);
  315. A11 = *(a3 + 1);
  316. B3 = *(b3 + 0);
  317. B11 = *(b3 + 1);
  318. A5 = *(a5 + 0);
  319. A13 = *(a5 + 1);
  320. B5 = *(b5 + 0);
  321. B13 = *(b5 + 1);
  322. A7 = *(a7 + 0);
  323. A15 = *(a7 + 1);
  324. B7 = *(b7 + 0);
  325. B15 = *(b7 + 1);
  326. if (a1 == b1) {
  327. *(buffer + 0) = A1;
  328. *(buffer + 1) = A9;
  329. *(buffer + 2) = A3;
  330. *(buffer + 3) = A11;
  331. *(buffer + 4) = A5;
  332. *(buffer + 5) = A13;
  333. *(buffer + 6) = A7;
  334. *(buffer + 7) = A15;
  335. } else {
  336. *(buffer + 0) = B1;
  337. *(buffer + 1) = B9;
  338. *(buffer + 2) = B3;
  339. *(buffer + 3) = B11;
  340. *(buffer + 4) = B5;
  341. *(buffer + 5) = B13;
  342. *(buffer + 6) = B7;
  343. *(buffer + 7) = B15;
  344. *(b1 + 0) = A1;
  345. *(b1 + 1) = A9;
  346. *(b3 + 0) = A3;
  347. *(b3 + 1) = A11;
  348. *(b5 + 0) = A5;
  349. *(b5 + 1) = A13;
  350. *(b7 + 0) = A7;
  351. *(b7 + 1) = A15;
  352. }
  353. buffer += 8;
  354. }
  355. a += 4 * lda;
  356. j --;
  357. } while (j > 0);
  358. }
  359. if (n & 2) {
  360. piv = ipiv;
  361. a1 = a + (k1 + 1) * 2;
  362. a3 = a1 + lda;
  363. ip1 = *(piv + 0) * 2;
  364. ip2 = *(piv + 1) * 2;
  365. piv += 2;
  366. b1 = a + ip1;
  367. b2 = a + ip2;
  368. b3 = b1 + lda;
  369. b4 = b2 + lda;
  370. i = ((k2 - k1) >> 1);
  371. if (i > 0) {
  372. do {
  373. A1 = *(a1 + 0);
  374. A2 = *(a1 + 1);
  375. A3 = *(a2 + 0);
  376. A4 = *(a2 + 1);
  377. A5 = *(a3 + 0);
  378. A6 = *(a3 + 1);
  379. A7 = *(a4 + 0);
  380. A8 = *(a4 + 1);
  381. B1 = *(b1 + 0);
  382. B2 = *(b1 + 1);
  383. B3 = *(b2 + 0);
  384. B4 = *(b2 + 1);
  385. B5 = *(b3 + 0);
  386. B6 = *(b3 + 1);
  387. B7 = *(b4 + 0);
  388. B8 = *(b4 + 1);
  389. ip1 = *(piv + 0) * 2;
  390. ip2 = *(piv + 1) * 2;
  391. piv += 2;
  392. if (b1 == a1) {
  393. if (b2 == a2) {
  394. *(buffer + 0) = A1;
  395. *(buffer + 1) = A2;
  396. *(buffer + 2) = A5;
  397. *(buffer + 3) = A6;
  398. *(buffer + 4) = A3;
  399. *(buffer + 5) = A4;
  400. *(buffer + 6) = A7;
  401. *(buffer + 7) = A8;
  402. } else {
  403. *(buffer + 0) = A1;
  404. *(buffer + 1) = A2;
  405. *(buffer + 2) = A5;
  406. *(buffer + 3) = A6;
  407. *(buffer + 4) = B3;
  408. *(buffer + 5) = B4;
  409. *(buffer + 6) = B7;
  410. *(buffer + 7) = B8;
  411. *(b2 + 0) = A3;
  412. *(b2 + 1) = A4;
  413. *(b4 + 0) = A7;
  414. *(b4 + 1) = A8;
  415. }
  416. } else
  417. if (b1 == a2) {
  418. if (b2 == a2) {
  419. *(buffer + 0) = A3;
  420. *(buffer + 1) = A4;
  421. *(buffer + 2) = A7;
  422. *(buffer + 3) = A8;
  423. *(buffer + 4) = A1;
  424. *(buffer + 5) = A2;
  425. *(buffer + 6) = A5;
  426. *(buffer + 7) = A6;
  427. } else {
  428. *(buffer + 0) = A3;
  429. *(buffer + 1) = A4;
  430. *(buffer + 2) = A7;
  431. *(buffer + 3) = A8;
  432. *(buffer + 4) = B3;
  433. *(buffer + 5) = B4;
  434. *(buffer + 6) = B7;
  435. *(buffer + 7) = B8;
  436. *(b2 + 0) = A1;
  437. *(b2 + 1) = A2;
  438. *(b4 + 0) = A5;
  439. *(b4 + 1) = A6;
  440. }
  441. } else {
  442. if (b2 == a2) {
  443. *(buffer + 0) = B1;
  444. *(buffer + 1) = B2;
  445. *(buffer + 2) = B5;
  446. *(buffer + 3) = B6;
  447. *(buffer + 4) = A3;
  448. *(buffer + 5) = A4;
  449. *(buffer + 6) = A7;
  450. *(buffer + 7) = A8;
  451. *(b1 + 0) = A1;
  452. *(b1 + 1) = A2;
  453. *(b3 + 0) = A5;
  454. *(b3 + 1) = A6;
  455. } else
  456. if (b2 == b1) {
  457. *(buffer + 0) = B1;
  458. *(buffer + 1) = B2;
  459. *(buffer + 2) = B5;
  460. *(buffer + 3) = B6;
  461. *(buffer + 4) = A1;
  462. *(buffer + 5) = A2;
  463. *(buffer + 6) = A5;
  464. *(buffer + 7) = A6;
  465. *(b1 + 0) = A3;
  466. *(b1 + 1) = A4;
  467. *(b3 + 0) = A7;
  468. *(b3 + 1) = A8;
  469. } else {
  470. *(buffer + 0) = B1;
  471. *(buffer + 1) = B2;
  472. *(buffer + 2) = B5;
  473. *(buffer + 3) = B6;
  474. *(buffer + 4) = B3;
  475. *(buffer + 5) = B4;
  476. *(buffer + 6) = B7;
  477. *(buffer + 7) = B8;
  478. *(b1 + 0) = A1;
  479. *(b1 + 1) = A2;
  480. *(b2 + 0) = A3;
  481. *(b2 + 1) = A4;
  482. *(b3 + 0) = A5;
  483. *(b3 + 1) = A6;
  484. *(b4 + 0) = A7;
  485. *(b4 + 1) = A8;
  486. }
  487. }
  488. buffer += 8;
  489. b1 = a + ip1;
  490. b2 = a + ip2;
  491. b3 = b1 + lda;
  492. b4 = b2 + lda;
  493. a1 += 4;
  494. a3 += 4;
  495. i --;
  496. } while (i > 0);
  497. }
  498. i = ((k2 - k1) & 1);
  499. if (i > 0) {
  500. A1 = *(a1 + 0);
  501. A2 = *(a1 + 1);
  502. B1 = *(b1 + 0);
  503. B2 = *(b1 + 1);
  504. A3 = *(a3 + 0);
  505. A4 = *(a3 + 1);
  506. B3 = *(b3 + 0);
  507. B4 = *(b3 + 1);
  508. if (a1 == b1) {
  509. *(buffer + 0) = A1;
  510. *(buffer + 1) = A2;
  511. *(buffer + 2) = A3;
  512. *(buffer + 3) = A4;
  513. } else {
  514. *(buffer + 0) = B1;
  515. *(buffer + 1) = B2;
  516. *(buffer + 2) = B3;
  517. *(buffer + 3) = B4;
  518. *(b1 + 0) = A1;
  519. *(b1 + 1) = A2;
  520. *(b3 + 0) = A3;
  521. *(b3 + 1) = A4;
  522. }
  523. buffer += 4;
  524. }
  525. a += 2 * lda;
  526. }
  527. if (n & 1) {
  528. piv = ipiv;
  529. a1 = a + (k1 + 1) * 2;
  530. ip1 = *(piv + 0) * 2;
  531. ip2 = *(piv + 1) * 2;
  532. piv += 2;
  533. b1 = a + ip1;
  534. b2 = a + ip2;
  535. i = ((k2 - k1) >> 1);
  536. if (i > 0) {
  537. do {
  538. A1 = *(a1 + 0);
  539. A2 = *(a1 + 1);
  540. A3 = *(a2 + 0);
  541. A4 = *(a2 + 1);
  542. B1 = *(b1 + 0);
  543. B2 = *(b1 + 1);
  544. B3 = *(b2 + 0);
  545. B4 = *(b2 + 1);
  546. ip1 = *(piv + 0) * 2;
  547. ip2 = *(piv + 1) * 2;
  548. piv += 2;
  549. if (b1 == a1) {
  550. if (b2 == a2) {
  551. *(buffer + 0) = A1;
  552. *(buffer + 1) = A2;
  553. *(buffer + 2) = A3;
  554. *(buffer + 3) = A4;
  555. } else {
  556. *(buffer + 0) = A1;
  557. *(buffer + 1) = A2;
  558. *(buffer + 2) = B3;
  559. *(buffer + 3) = B4;
  560. *(b2 + 0) = A3;
  561. *(b2 + 1) = A4;
  562. }
  563. } else
  564. if (b1 == a2) {
  565. if (b2 == a2) {
  566. *(buffer + 0) = A3;
  567. *(buffer + 1) = A4;
  568. *(buffer + 2) = A1;
  569. *(buffer + 3) = A2;
  570. } else {
  571. *(buffer + 0) = A3;
  572. *(buffer + 1) = A4;
  573. *(buffer + 2) = B3;
  574. *(buffer + 3) = B4;
  575. *(b2 + 0) = A1;
  576. *(b2 + 1) = A2;
  577. }
  578. } else {
  579. if (b2 == a2) {
  580. *(buffer + 0) = B1;
  581. *(buffer + 1) = B2;
  582. *(buffer + 2) = A3;
  583. *(buffer + 3) = A4;
  584. *(b1 + 0) = A1;
  585. *(b1 + 1) = A2;
  586. } else
  587. if (b2 == b1) {
  588. *(buffer + 0) = B1;
  589. *(buffer + 1) = B2;
  590. *(buffer + 2) = A1;
  591. *(buffer + 3) = A2;
  592. *(b1 + 0) = A3;
  593. *(b1 + 1) = A4;
  594. } else {
  595. *(buffer + 0) = B1;
  596. *(buffer + 1) = B2;
  597. *(buffer + 2) = B3;
  598. *(buffer + 3) = B4;
  599. *(b1 + 0) = A1;
  600. *(b1 + 1) = A2;
  601. *(b2 + 0) = A3;
  602. *(b2 + 1) = A4;
  603. }
  604. }
  605. buffer += 4;
  606. b1 = a + ip1;
  607. b2 = a + ip2;
  608. a1 += 4;
  609. i --;
  610. } while (i > 0);
  611. }
  612. i = ((k2 - k1) & 1);
  613. if (i > 0) {
  614. A1 = *(a1 + 0);
  615. A2 = *(a1 + 1);
  616. B1 = *(b1 + 0);
  617. B2 = *(b1 + 1);
  618. if (a1 == b1) {
  619. *(buffer + 0) = A1;
  620. *(buffer + 1) = A2;
  621. } else {
  622. *(buffer + 0) = B1;
  623. *(buffer + 1) = B2;
  624. *(b1 + 0) = A1;
  625. *(b1 + 1) = A2;
  626. }
  627. buffer += 2;
  628. }
  629. }
  630. return 0;
  631. }