You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trsm_lncopy_4.c 6.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include "common.h"
  40. #ifndef UNIT
  41. #define INV(a) (ONE / (a))
  42. #else
  43. #define INV(a) (ONE)
  44. #endif
  45. int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *b){
  46. BLASLONG i, ii, j, jj;
  47. FLOAT data01, data02, data03, data04, data05, data06, data07, data08;
  48. FLOAT data09, data10, data11, data12, data13, data14, data15, data16;
  49. FLOAT *a1, *a2, *a3, *a4;
  50. jj = offset;
  51. j = (n >> 2);
  52. while (j > 0){
  53. a1 = a + 0 * lda;
  54. a2 = a + 1 * lda;
  55. a3 = a + 2 * lda;
  56. a4 = a + 3 * lda;
  57. i = (m >> 2);
  58. ii = 0;
  59. while (i > 0) {
  60. if (ii == jj) {
  61. #ifndef UNIT
  62. data01 = *(a1 + 0);
  63. #endif
  64. data02 = *(a1 + 1);
  65. data03 = *(a1 + 2);
  66. data04 = *(a1 + 3);
  67. #ifndef UNIT
  68. data06 = *(a2 + 1);
  69. #endif
  70. data07 = *(a2 + 2);
  71. data08 = *(a2 + 3);
  72. #ifndef UNIT
  73. data11 = *(a3 + 2);
  74. #endif
  75. data12 = *(a3 + 3);
  76. #ifndef UNIT
  77. data16 = *(a4 + 3);
  78. #endif
  79. *(b + 0) = INV(data01);
  80. *(b + 4) = data02;
  81. *(b + 5) = INV(data06);
  82. *(b + 8) = data03;
  83. *(b + 9) = data07;
  84. *(b + 10) = INV(data11);
  85. *(b + 12) = data04;
  86. *(b + 13) = data08;
  87. *(b + 14) = data12;
  88. *(b + 15) = INV(data16);
  89. }
  90. if (ii > jj) {
  91. data01 = *(a1 + 0);
  92. data02 = *(a1 + 1);
  93. data03 = *(a1 + 2);
  94. data04 = *(a1 + 3);
  95. data05 = *(a2 + 0);
  96. data06 = *(a2 + 1);
  97. data07 = *(a2 + 2);
  98. data08 = *(a2 + 3);
  99. data09 = *(a3 + 0);
  100. data10 = *(a3 + 1);
  101. data11 = *(a3 + 2);
  102. data12 = *(a3 + 3);
  103. data13 = *(a4 + 0);
  104. data14 = *(a4 + 1);
  105. data15 = *(a4 + 2);
  106. data16 = *(a4 + 3);
  107. *(b + 0) = data01;
  108. *(b + 1) = data05;
  109. *(b + 2) = data09;
  110. *(b + 3) = data13;
  111. *(b + 4) = data02;
  112. *(b + 5) = data06;
  113. *(b + 6) = data10;
  114. *(b + 7) = data14;
  115. *(b + 8) = data03;
  116. *(b + 9) = data07;
  117. *(b + 10) = data11;
  118. *(b + 11) = data15;
  119. *(b + 12) = data04;
  120. *(b + 13) = data08;
  121. *(b + 14) = data12;
  122. *(b + 15) = data16;
  123. }
  124. a1 += 4;
  125. a2 += 4;
  126. a3 += 4;
  127. a4 += 4;
  128. b += 16;
  129. i --;
  130. ii += 4;
  131. }
  132. if ((m & 2) != 0) {
  133. if (ii== jj) {
  134. #ifndef UNIT
  135. data01 = *(a1 + 0);
  136. #endif
  137. data02 = *(a1 + 1);
  138. #ifndef UNIT
  139. data06 = *(a2 + 1);
  140. #endif
  141. *(b + 0) = INV(data01);
  142. *(b + 4) = data02;
  143. *(b + 5) = INV(data06);
  144. }
  145. if (ii > jj) {
  146. data01 = *(a1 + 0);
  147. data02 = *(a1 + 1);
  148. data03 = *(a2 + 0);
  149. data04 = *(a2 + 1);
  150. data05 = *(a3 + 0);
  151. data06 = *(a3 + 1);
  152. data07 = *(a4 + 0);
  153. data08 = *(a4 + 1);
  154. *(b + 0) = data01;
  155. *(b + 1) = data03;
  156. *(b + 2) = data05;
  157. *(b + 3) = data07;
  158. *(b + 4) = data02;
  159. *(b + 5) = data04;
  160. *(b + 6) = data06;
  161. *(b + 7) = data08;
  162. }
  163. a1 += 2;
  164. a2 += 2;
  165. a3 += 2;
  166. a4 += 2;
  167. b += 8;
  168. ii += 2;
  169. }
  170. if ((m & 1) != 0) {
  171. if (ii== jj) {
  172. #ifndef UNIT
  173. data01 = *(a1 + 0);
  174. #endif
  175. *(b + 0) = INV(data01);
  176. }
  177. if (ii > jj) {
  178. data01 = *(a1 + 0);
  179. data02 = *(a2 + 0);
  180. data03 = *(a3 + 0);
  181. data04 = *(a4 + 0);
  182. *(b + 0) = data01;
  183. *(b + 1) = data02;
  184. *(b + 2) = data03;
  185. *(b + 3) = data04;
  186. }
  187. b += 4;
  188. }
  189. a += 4 * lda;
  190. jj += 4;
  191. j --;
  192. }
  193. if (n & 2) {
  194. a1 = a + 0 * lda;
  195. a2 = a + 1 * lda;
  196. i = (m >> 1);
  197. ii = 0;
  198. while (i > 0) {
  199. if (ii == jj) {
  200. #ifndef UNIT
  201. data01 = *(a1 + 0);
  202. #endif
  203. data02 = *(a1 + 1);
  204. #ifndef UNIT
  205. data04 = *(a2 + 1);
  206. #endif
  207. *(b + 0) = INV(data01);
  208. *(b + 2) = data02;
  209. *(b + 3) = INV(data04);
  210. }
  211. if (ii > jj) {
  212. data01 = *(a1 + 0);
  213. data02 = *(a1 + 1);
  214. data03 = *(a2 + 0);
  215. data04 = *(a2 + 1);
  216. *(b + 0) = data01;
  217. *(b + 1) = data03;
  218. *(b + 2) = data02;
  219. *(b + 3) = data04;
  220. }
  221. a1 += 2;
  222. a2 += 2;
  223. b += 4;
  224. i --;
  225. ii += 2;
  226. }
  227. if ((m & 1) != 0) {
  228. if (ii== jj) {
  229. #ifndef UNIT
  230. data01 = *(a1 + 0);
  231. #endif
  232. *(b + 0) = INV(data01);
  233. }
  234. if (ii > jj) {
  235. data01 = *(a1 + 0);
  236. data02 = *(a2 + 0);
  237. *(b + 0) = data01;
  238. *(b + 1) = data02;
  239. }
  240. b += 2;
  241. }
  242. a += 2 * lda;
  243. jj += 2;
  244. }
  245. if (n & 1) {
  246. a1 = a + 0 * lda;
  247. i = m;
  248. ii = 0;
  249. while (i > 0) {
  250. if (ii == jj) {
  251. #ifndef UNIT
  252. data01 = *(a1 + 0);
  253. #endif
  254. *(b + 0) = INV(data01);
  255. }
  256. if (ii > jj) {
  257. data01 = *(a1 + 0);
  258. *(b + 0) = data01;
  259. }
  260. a1+= 1;
  261. b += 1;
  262. i --;
  263. ii += 1;
  264. }
  265. }
  266. return 0;
  267. }