You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zgbmv.c 7.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include <stdio.h>
  39. #include "common.h"
  40. #ifdef FUNCTION_PROFILE
  41. #include "functable.h"
  42. #endif
  43. #ifdef XDOUBLE
  44. #define ERROR_NAME "XGBMV "
  45. #elif defined(DOUBLE)
  46. #define ERROR_NAME "ZGBMV "
  47. #else
  48. #define ERROR_NAME "CGBMV "
  49. #endif
  50. static void (*gbmv[])(BLASLONG, BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT,
  51. FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, void *) = {
  52. #ifdef XDOUBLE
  53. xgbmv_n, xgbmv_t, xgbmv_r, xgbmv_c,
  54. xgbmv_o, xgbmv_u, xgbmv_s, xgbmv_d,
  55. #elif defined(DOUBLE)
  56. zgbmv_n, zgbmv_t, zgbmv_r, zgbmv_c,
  57. zgbmv_o, zgbmv_u, zgbmv_s, zgbmv_d,
  58. #else
  59. cgbmv_n, cgbmv_t, cgbmv_r, cgbmv_c,
  60. cgbmv_o, cgbmv_u, cgbmv_s, cgbmv_d,
  61. #endif
  62. };
  63. #ifdef SMP
  64. static int (*gbmv_thread[])(BLASLONG, BLASLONG, BLASLONG, BLASLONG, FLOAT *,
  65. FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
  66. #ifdef XDOUBLE
  67. xgbmv_thread_n, xgbmv_thread_t, xgbmv_thread_r, xgbmv_thread_c,
  68. xgbmv_thread_o, xgbmv_thread_u, xgbmv_thread_s, xgbmv_thread_d,
  69. #elif defined(DOUBLE)
  70. zgbmv_thread_n, zgbmv_thread_t, zgbmv_thread_r, zgbmv_thread_c,
  71. zgbmv_thread_o, zgbmv_thread_u, zgbmv_thread_s, zgbmv_thread_d,
  72. #else
  73. cgbmv_thread_n, cgbmv_thread_t, cgbmv_thread_r, cgbmv_thread_c,
  74. cgbmv_thread_o, cgbmv_thread_u, cgbmv_thread_s, cgbmv_thread_d,
  75. #endif
  76. };
  77. #endif
  78. #ifndef CBLAS
  79. void NAME(char *TRANS, blasint *M, blasint *N,
  80. blasint *KU, blasint *KL,
  81. FLOAT *ALPHA, FLOAT *a, blasint *LDA,
  82. FLOAT *x, blasint *INCX,
  83. FLOAT *BETA, FLOAT *y, blasint *INCY){
  84. char trans = *TRANS;
  85. blasint m = *M;
  86. blasint n = *N;
  87. blasint ku = *KU;
  88. blasint kl = *KL;
  89. blasint lda = *LDA;
  90. blasint incx = *INCX;
  91. blasint incy = *INCY;
  92. FLOAT *buffer;
  93. #ifdef SMP
  94. int nthreads;
  95. #endif
  96. FLOAT alpha_r = ALPHA[0];
  97. FLOAT alpha_i = ALPHA[1];
  98. FLOAT beta_r = BETA[0];
  99. FLOAT beta_i = BETA[1];
  100. blasint info;
  101. blasint lenx, leny;
  102. blasint i;
  103. PRINT_DEBUG_NAME;
  104. TOUPPER(trans);
  105. info = 0;
  106. i = -1;
  107. if (trans == 'N') i = 0;
  108. if (trans == 'T') i = 1;
  109. if (trans == 'R') i = 2;
  110. if (trans == 'C') i = 3;
  111. if (trans == 'O') i = 4;
  112. if (trans == 'U') i = 5;
  113. if (trans == 'S') i = 6;
  114. if (trans == 'D') i = 7;
  115. if (incy == 0) info = 13;
  116. if (incx == 0) info = 10;
  117. if (lda < kl + ku + 1) info = 8;
  118. if (kl < 0) info = 5;
  119. if (ku < 0) info = 4;
  120. if (n < 0) info = 3;
  121. if (m < 0) info = 2;
  122. if (i < 0) info = 1;
  123. trans = i;
  124. if (info != 0){
  125. BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
  126. return;
  127. }
  128. #else
  129. void CNAME(enum CBLAS_ORDER order,
  130. enum CBLAS_TRANSPOSE TransA,
  131. blasint m, blasint n,
  132. blasint ku, blasint kl,
  133. void *VALPHA,
  134. void *va, blasint lda,
  135. void *vx, blasint incx,
  136. void *VBETA,
  137. void *vy, blasint incy){
  138. FLOAT* ALPHA = (FLOAT*) VALPHA;
  139. FLOAT* BETA = (FLOAT*) VBETA;
  140. FLOAT* a = (FLOAT*) va;
  141. FLOAT* x = (FLOAT*) vx;
  142. FLOAT* y = (FLOAT*) vy;
  143. FLOAT alpha_r = ALPHA[0];
  144. FLOAT alpha_i = ALPHA[1];
  145. FLOAT beta_r = BETA[0];
  146. FLOAT beta_i = BETA[1];
  147. FLOAT *buffer;
  148. blasint lenx, leny;
  149. int trans;
  150. blasint info, t;
  151. #ifdef SMP
  152. int nthreads;
  153. #endif
  154. PRINT_DEBUG_CNAME;
  155. trans = -1;
  156. info = 0;
  157. if (order == CblasColMajor) {
  158. if (TransA == CblasNoTrans) trans = 0;
  159. if (TransA == CblasTrans) trans = 1;
  160. if (TransA == CblasConjNoTrans) trans = 2;
  161. if (TransA == CblasConjTrans) trans = 3;
  162. info = -1;
  163. if (incy == 0) info = 13;
  164. if (incx == 0) info = 10;
  165. if (lda < kl + ku + 1) info = 8;
  166. if (kl < 0) info = 5;
  167. if (ku < 0) info = 4;
  168. if (n < 0) info = 3;
  169. if (m < 0) info = 2;
  170. if (trans < 0) info = 1;
  171. }
  172. if (order == CblasRowMajor) {
  173. if (TransA == CblasNoTrans) trans = 1;
  174. if (TransA == CblasTrans) trans = 0;
  175. if (TransA == CblasConjNoTrans) trans = 3;
  176. if (TransA == CblasConjTrans) trans = 2;
  177. info = -1;
  178. t = n;
  179. n = m;
  180. m = t;
  181. t = ku;
  182. ku = kl;
  183. kl = t;
  184. if (incy == 0) info = 13;
  185. if (incx == 0) info = 10;
  186. if (lda < kl + ku + 1) info = 8;
  187. if (kl < 0) info = 5;
  188. if (ku < 0) info = 4;
  189. if (n < 0) info = 3;
  190. if (m < 0) info = 2;
  191. if (trans < 0) info = 1;
  192. }
  193. if (info >= 0) {
  194. BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
  195. return;
  196. }
  197. #endif
  198. if ((m==0) || (n==0)) return;
  199. lenx = n;
  200. leny = m;
  201. if (trans & 1) lenx = m;
  202. if (trans & 1) leny = n;
  203. if (beta_r != ONE || beta_i != ZERO) SCAL_K(leny, 0, 0, beta_r, beta_i, y, blasabs(incy), NULL, 0, NULL, 0);
  204. if (alpha_r == ZERO && alpha_i == ZERO) return;
  205. IDEBUG_START;
  206. FUNCTION_PROFILE_START();
  207. if (incx < 0) x -= (lenx - 1) * incx * 2;
  208. if (incy < 0) y -= (leny - 1) * incy * 2;
  209. buffer = (FLOAT *)blas_memory_alloc(1);
  210. #ifdef SMP
  211. if (m * n < 125000 || ku + kl < 15)
  212. nthreads = 1;
  213. else
  214. nthreads = num_cpu_avail(2);
  215. if (nthreads == 1) {
  216. #endif
  217. (gbmv[(int)trans])(m, n, kl, ku, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);
  218. #ifdef SMP
  219. } else {
  220. (gbmv_thread[(int)trans])(m, n, kl, ku, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);
  221. }
  222. #endif
  223. blas_memory_free(buffer);
  224. FUNCTION_PROFILE_END(4, m * n / 2 + n, m * n);
  225. IDEBUG_END;
  226. return;
  227. }