You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trsm_utcopy_6.c 7.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include "common.h"
  40. #ifndef UNIT
  41. #define INV(a) (ONE / (a))
  42. #else
  43. #define INV(a) (ONE)
  44. #endif
  45. int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT *b){
  46. BLASLONG i, ii, j, jj;
  47. FLOAT data01, data02, data03, data04, data05, data06, data07, data08;
  48. FLOAT data09, data10, data11, data12, data13, data14, data15, data16;
  49. FLOAT *a1, *a2, *a3, *a4;
  50. jj = offset;
  51. j = (n >> 2);
  52. while (j > 0){
  53. a1 = a + 0 * lda;
  54. a2 = a + 1 * lda;
  55. a3 = a + 2 * lda;
  56. a4 = a + 3 * lda;
  57. i = (m >> 2);
  58. ii = 0;
  59. while (i > 0) {
  60. if (ii == jj) {
  61. #ifndef UNIT
  62. data01 = *(a1 + 0);
  63. #endif
  64. data05 = *(a2 + 0);
  65. #ifndef UNIT
  66. data06 = *(a2 + 1);
  67. #endif
  68. data09 = *(a3 + 0);
  69. data10 = *(a3 + 1);
  70. #ifndef UNIT
  71. data11 = *(a3 + 2);
  72. #endif
  73. data13 = *(a4 + 0);
  74. data14 = *(a4 + 1);
  75. data15 = *(a4 + 2);
  76. #ifndef UNIT
  77. data16 = *(a4 + 3);
  78. #endif
  79. *(b + 0) = INV(data01);
  80. *(b + 4) = data05;
  81. *(b + 5) = INV(data06);
  82. *(b + 8) = data09;
  83. *(b + 9) = data10;
  84. *(b + 10) = INV(data11);
  85. *(b + 12) = data13;
  86. *(b + 13) = data14;
  87. *(b + 14) = data15;
  88. *(b + 15) = INV(data16);
  89. }
  90. if (ii > jj) {
  91. data01 = *(a1 + 0);
  92. data02 = *(a1 + 1);
  93. data03 = *(a1 + 2);
  94. data04 = *(a1 + 3);
  95. data05 = *(a2 + 0);
  96. data06 = *(a2 + 1);
  97. data07 = *(a2 + 2);
  98. data08 = *(a2 + 3);
  99. data09 = *(a3 + 0);
  100. data10 = *(a3 + 1);
  101. data11 = *(a3 + 2);
  102. data12 = *(a3 + 3);
  103. data13 = *(a4 + 0);
  104. data14 = *(a4 + 1);
  105. data15 = *(a4 + 2);
  106. data16 = *(a4 + 3);
  107. *(b + 0) = data01;
  108. *(b + 1) = data02;
  109. *(b + 2) = data03;
  110. *(b + 3) = data04;
  111. *(b + 4) = data05;
  112. *(b + 5) = data06;
  113. *(b + 6) = data07;
  114. *(b + 7) = data08;
  115. *(b + 8) = data09;
  116. *(b + 9) = data10;
  117. *(b + 10) = data11;
  118. *(b + 11) = data12;
  119. *(b + 12) = data13;
  120. *(b + 13) = data14;
  121. *(b + 14) = data15;
  122. *(b + 15) = data16;
  123. }
  124. a1 += 4 * lda;
  125. a2 += 4 * lda;
  126. a3 += 4 * lda;
  127. a4 += 4 * lda;
  128. b += 16;
  129. i --;
  130. ii += 4;
  131. }
  132. if ((m & 2) != 0) {
  133. if (ii== jj) {
  134. #ifndef UNIT
  135. data01 = *(a1 + 0);
  136. #endif
  137. data05 = *(a2 + 0);
  138. #ifndef UNIT
  139. data06 = *(a2 + 1);
  140. #endif
  141. *(b + 0) = INV(data01);
  142. *(b + 4) = data05;
  143. *(b + 5) = INV(data06);
  144. }
  145. if (ii > jj) {
  146. data01 = *(a1 + 0);
  147. data02 = *(a1 + 1);
  148. data03 = *(a1 + 2);
  149. data04 = *(a1 + 3);
  150. data05 = *(a2 + 0);
  151. data06 = *(a2 + 1);
  152. data07 = *(a2 + 2);
  153. data08 = *(a2 + 3);
  154. *(b + 0) = data01;
  155. *(b + 1) = data02;
  156. *(b + 2) = data03;
  157. *(b + 3) = data04;
  158. *(b + 4) = data05;
  159. *(b + 5) = data06;
  160. *(b + 6) = data07;
  161. *(b + 7) = data08;
  162. }
  163. a1 += 2 * lda;
  164. a2 += 2 * lda;
  165. b += 8;
  166. ii += 2;
  167. }
  168. if ((m & 1) != 0) {
  169. if (ii== jj) {
  170. #ifndef UNIT
  171. data01 = *(a1 + 0);
  172. #endif
  173. *(b + 0) = INV(data01);
  174. }
  175. if (ii > jj) {
  176. data01 = *(a1 + 0);
  177. data02 = *(a1 + 1);
  178. data03 = *(a1 + 2);
  179. data04 = *(a1 + 3);
  180. *(b + 0) = data01;
  181. *(b + 1) = data02;
  182. *(b + 2) = data03;
  183. *(b + 3) = data04;
  184. }
  185. b += 4;
  186. }
  187. a += 4;
  188. jj += 4;
  189. j --;
  190. }
  191. if (n & 2) {
  192. a1 = a + 0 * lda;
  193. a2 = a + 1 * lda;
  194. i = (m >> 1);
  195. ii = 0;
  196. while (i > 0) {
  197. if (ii == jj) {
  198. #ifndef UNIT
  199. data01 = *(a1 + 0);
  200. #endif
  201. data03 = *(a2 + 0);
  202. #ifndef UNIT
  203. data04 = *(a2 + 1);
  204. #endif
  205. *(b + 0) = INV(data01);
  206. *(b + 2) = data03;
  207. *(b + 3) = INV(data04);
  208. }
  209. if (ii > jj) {
  210. data01 = *(a1 + 0);
  211. data02 = *(a1 + 1);
  212. data03 = *(a2 + 0);
  213. data04 = *(a2 + 1);
  214. *(b + 0) = data01;
  215. *(b + 1) = data02;
  216. *(b + 2) = data03;
  217. *(b + 3) = data04;
  218. }
  219. a1 += 2 * lda;
  220. a2 += 2 * lda;
  221. b += 4;
  222. i --;
  223. ii += 2;
  224. }
  225. if ((m & 1) != 0) {
  226. if (ii== jj) {
  227. #ifndef UNIT
  228. data01 = *(a1 + 0);
  229. #endif
  230. *(b + 0) = INV(data01);
  231. }
  232. if (ii > jj) {
  233. data01 = *(a1 + 0);
  234. data02 = *(a1 + 1);
  235. *(b + 0) = data01;
  236. *(b + 1) = data02;
  237. }
  238. b += 2;
  239. }
  240. a += 2;
  241. jj += 2;
  242. }
  243. if (n & 1) {
  244. a1 = a + 0 * lda;
  245. i = m;
  246. ii = 0;
  247. while (i > 0) {
  248. if (ii == jj) {
  249. #ifndef UNIT
  250. data01 = *(a1 + 0);
  251. #endif
  252. *(b + 0) = INV(data01);
  253. }
  254. if (ii > jj) {
  255. data01 = *(a1 + 0);
  256. *(b + 0) = data01;
  257. }
  258. a1 += 1 * lda;
  259. b += 1;
  260. i --;
  261. ii += 1;
  262. }
  263. }
  264. return 0;
  265. }