You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trsm_uncopy_6.c 7.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include "common.h"
  40. #ifndef UNIT
  41. #define INV(a) (ONE / (a))
  42. #else
  43. #define INV(a) (ONE)
  44. #endif
  45. int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *b){
  46. BLASLONG i, ii, j, jj;
  47. FLOAT data01, data02, data03, data04, data05, data06, data07, data08;
  48. FLOAT data09, data10, data11, data12, data13, data14, data15, data16;
  49. FLOAT *a1, *a2, *a3, *a4;
  50. jj = offset;
  51. j = (n >> 2);
  52. while (j > 0){
  53. a1 = a + 0 * lda;
  54. a2 = a + 1 * lda;
  55. a3 = a + 2 * lda;
  56. a4 = a + 3 * lda;
  57. i = (m >> 2);
  58. ii = 0;
  59. while (i > 0) {
  60. if (ii == jj) {
  61. #ifndef UNIT
  62. data01 = *(a1 + 0);
  63. #endif
  64. data05 = *(a2 + 0);
  65. #ifndef UNIT
  66. data06 = *(a2 + 1);
  67. #endif
  68. data09 = *(a3 + 0);
  69. data10 = *(a3 + 1);
  70. #ifndef UNIT
  71. data11 = *(a3 + 2);
  72. #endif
  73. data13 = *(a4 + 0);
  74. data14 = *(a4 + 1);
  75. data15 = *(a4 + 2);
  76. #ifndef UNIT
  77. data16 = *(a4 + 3);
  78. #endif
  79. *(b + 0) = INV(data01);
  80. *(b + 1) = data05;
  81. *(b + 2) = data09;
  82. *(b + 3) = data13;
  83. *(b + 5) = INV(data06);
  84. *(b + 6) = data10;
  85. *(b + 7) = data14;
  86. *(b + 10) = INV(data11);
  87. *(b + 11) = data15;
  88. *(b + 15) = INV(data16);
  89. }
  90. if (ii < jj) {
  91. data01 = *(a1 + 0);
  92. data02 = *(a1 + 1);
  93. data03 = *(a1 + 2);
  94. data04 = *(a1 + 3);
  95. data05 = *(a2 + 0);
  96. data06 = *(a2 + 1);
  97. data07 = *(a2 + 2);
  98. data08 = *(a2 + 3);
  99. data09 = *(a3 + 0);
  100. data10 = *(a3 + 1);
  101. data11 = *(a3 + 2);
  102. data12 = *(a3 + 3);
  103. data13 = *(a4 + 0);
  104. data14 = *(a4 + 1);
  105. data15 = *(a4 + 2);
  106. data16 = *(a4 + 3);
  107. *(b + 0) = data01;
  108. *(b + 1) = data05;
  109. *(b + 2) = data09;
  110. *(b + 3) = data13;
  111. *(b + 4) = data02;
  112. *(b + 5) = data06;
  113. *(b + 6) = data10;
  114. *(b + 7) = data14;
  115. *(b + 8) = data03;
  116. *(b + 9) = data07;
  117. *(b + 10) = data11;
  118. *(b + 11) = data15;
  119. *(b + 12) = data04;
  120. *(b + 13) = data08;
  121. *(b + 14) = data12;
  122. *(b + 15) = data16;
  123. }
  124. a1 += 4;
  125. a2 += 4;
  126. a3 += 4;
  127. a4 += 4;
  128. b += 16;
  129. i --;
  130. ii += 4;
  131. }
  132. if ((m & 2) != 0) {
  133. if (ii== jj) {
  134. #ifndef UNIT
  135. data01 = *(a1 + 0);
  136. #endif
  137. data05 = *(a2 + 0);
  138. #ifndef UNIT
  139. data06 = *(a2 + 1);
  140. #endif
  141. data09 = *(a3 + 0);
  142. data10 = *(a3 + 1);
  143. data13 = *(a4 + 0);
  144. data14 = *(a4 + 1);
  145. *(b + 0) = INV(data01);
  146. *(b + 1) = data05;
  147. *(b + 2) = data09;
  148. *(b + 3) = data13;
  149. *(b + 5) = INV(data06);
  150. *(b + 6) = data10;
  151. *(b + 7) = data14;
  152. }
  153. if (ii < jj) {
  154. data01 = *(a1 + 0);
  155. data02 = *(a1 + 1);
  156. data03 = *(a2 + 0);
  157. data04 = *(a2 + 1);
  158. data05 = *(a3 + 0);
  159. data06 = *(a3 + 1);
  160. data07 = *(a4 + 0);
  161. data08 = *(a4 + 1);
  162. *(b + 0) = data01;
  163. *(b + 1) = data02;
  164. *(b + 2) = data03;
  165. *(b + 3) = data04;
  166. *(b + 4) = data05;
  167. *(b + 5) = data06;
  168. *(b + 6) = data07;
  169. *(b + 7) = data08;
  170. }
  171. a1 += 2;
  172. a2 += 2;
  173. b += 8;
  174. ii += 2;
  175. }
  176. if ((m & 1) != 0) {
  177. if (ii== jj) {
  178. #ifndef UNIT
  179. data01 = *(a1 + 0);
  180. #endif
  181. data05 = *(a2 + 0);
  182. data09 = *(a3 + 0);
  183. data13 = *(a4 + 0);
  184. *(b + 0) = INV(data01);
  185. *(b + 1) = data05;
  186. *(b + 2) = data09;
  187. *(b + 3) = data13;
  188. }
  189. if (ii < jj) {
  190. data01 = *(a1 + 0);
  191. data02 = *(a2 + 0);
  192. data03 = *(a3 + 0);
  193. data04 = *(a4 + 0);
  194. *(b + 0) = data01;
  195. *(b + 1) = data02;
  196. *(b + 2) = data03;
  197. *(b + 3) = data04;
  198. }
  199. b += 4;
  200. }
  201. a += 4 * lda;
  202. jj += 4;
  203. j --;
  204. }
  205. if (n & 2) {
  206. a1 = a + 0 * lda;
  207. a2 = a + 1 * lda;
  208. i = (m >> 1);
  209. ii = 0;
  210. while (i > 0) {
  211. if (ii == jj) {
  212. #ifndef UNIT
  213. data01 = *(a1 + 0);
  214. #endif
  215. data03 = *(a2 + 0);
  216. #ifndef UNIT
  217. data04 = *(a2 + 1);
  218. #endif
  219. *(b + 0) = INV(data01);
  220. *(b + 1) = data03;
  221. *(b + 3) = INV(data04);
  222. }
  223. if (ii < jj) {
  224. data01 = *(a1 + 0);
  225. data02 = *(a1 + 1);
  226. data03 = *(a2 + 0);
  227. data04 = *(a2 + 1);
  228. *(b + 0) = data01;
  229. *(b + 1) = data03;
  230. *(b + 2) = data02;
  231. *(b + 3) = data04;
  232. }
  233. a1 += 2;
  234. a2 += 2;
  235. b += 4;
  236. i --;
  237. ii += 2;
  238. }
  239. if ((m & 1) != 0) {
  240. if (ii== jj) {
  241. #ifndef UNIT
  242. data01 = *(a1 + 0);
  243. #endif
  244. data03 = *(a2 + 0);
  245. *(b + 0) = INV(data01);
  246. *(b + 1) = data03;
  247. }
  248. if (ii < jj) {
  249. data01 = *(a1 + 0);
  250. data02 = *(a2 + 0);
  251. *(b + 0) = data01;
  252. *(b + 1) = data02;
  253. }
  254. b += 2;
  255. }
  256. a += 2 * lda;
  257. jj += 2;
  258. }
  259. if (n & 1) {
  260. a1 = a + 0 * lda;
  261. i = m;
  262. ii = 0;
  263. while (i > 0) {
  264. if (ii == jj) {
  265. #ifndef UNIT
  266. data01 = *(a1 + 0);
  267. #endif
  268. *(b + 0) = INV(data01);
  269. }
  270. if (ii < jj) {
  271. data01 = *(a1 + 0);
  272. *(b + 0) = data01;
  273. }
  274. a1+= 1;
  275. b += 1;
  276. i --;
  277. ii += 1;
  278. }
  279. }
  280. return 0;
  281. }