You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common_thread.h 7.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_THREAD
  39. #define COMMON_THREAD
  40. #ifdef USE_OPENMP
  41. #include <omp.h>
  42. extern void goto_set_num_threads(int nthreads);
  43. #endif
  44. /* Basic Thread Debugging */
  45. #undef SMP_DEBUG
  46. /* Thread Timing Debugging */
  47. #undef TIMING_DEBUG
  48. /* Global Parameter */
  49. extern int blas_cpu_number;
  50. extern int blas_num_threads;
  51. extern int blas_omp_linked;
  52. #define BLAS_LEGACY 0x8000U
  53. #define BLAS_PTHREAD 0x4000U
  54. #define BLAS_NODE 0x2000U
  55. #define BLAS_PREC 0x000FU
  56. #define BLAS_INT8 0x0000U
  57. #define BLAS_BFLOAT16 0x0001U
  58. #define BLAS_SINGLE 0x0002U
  59. #define BLAS_DOUBLE 0x0003U
  60. #define BLAS_XDOUBLE 0x0004U
  61. #define BLAS_STOBF16 0x0008U
  62. #define BLAS_DTOBF16 0x0009U
  63. #define BLAS_BF16TOS 0x000AU
  64. #define BLAS_BF16TOD 0x000BU
  65. #define BLAS_REAL 0x0000U
  66. #define BLAS_COMPLEX 0x1000U
  67. #define BLAS_TRANSA 0x0030U /* 2bit */
  68. #define BLAS_TRANSA_N 0x0000U
  69. #define BLAS_TRANSA_T 0x0010U
  70. #define BLAS_TRANSA_R 0x0020U
  71. #define BLAS_TRANSA_C 0x0030U
  72. #define BLAS_TRANSA_SHIFT 4
  73. #define BLAS_TRANSB 0x0300U /* 2bit */
  74. #define BLAS_TRANSB_N 0x0000U
  75. #define BLAS_TRANSB_T 0x0100U
  76. #define BLAS_TRANSB_R 0x0200U
  77. #define BLAS_TRANSB_C 0x0300U
  78. #define BLAS_TRANSB_SHIFT 8
  79. #define BLAS_RSIDE 0x0400U
  80. #define BLAS_RSIDE_SHIFT 10
  81. #define BLAS_UPLO 0x0800U
  82. #define BLAS_UPLO_SHIFT 11
  83. #define BLAS_STATUS_NOTYET 0
  84. #define BLAS_STATUS_QUEUED 1
  85. #define BLAS_STATUS_RUNNING 2
  86. #define BLAS_STATUS_FINISHED 4
  87. typedef struct blas_queue {
  88. void *routine;
  89. BLASLONG position;
  90. BLASLONG assigned;
  91. blas_arg_t *args;
  92. void *range_m;
  93. void *range_n;
  94. void *sa, *sb;
  95. struct blas_queue *next;
  96. #if defined( __WIN32__) || defined(__CYGWIN32__) || defined(_WIN32) || defined(__CYGWIN__)
  97. CRITICAL_SECTION lock;
  98. HANDLE finish;
  99. volatile int finished;
  100. #else
  101. pthread_mutex_t lock;
  102. pthread_cond_t finished;
  103. #endif
  104. int mode, status;
  105. #ifdef CONSISTENT_FPCSR
  106. unsigned int sse_mode, x87_mode;
  107. #endif
  108. #ifdef SMP_DEBUG
  109. int num;
  110. #endif
  111. #ifdef TIMING_DEBUG
  112. unsigned int clocks;
  113. #endif
  114. } blas_queue_t;
  115. #ifdef SMP_SERVER
  116. extern int blas_server_avail;
  117. extern int blas_omp_number_max;
  118. extern int blas_omp_threads_local;
  119. static __inline int num_cpu_avail(int level) {
  120. #ifdef USE_OPENMP
  121. int openmp_nthreads;
  122. openmp_nthreads=omp_get_max_threads();
  123. if (omp_in_parallel()) openmp_nthreads = blas_omp_threads_local;
  124. #endif
  125. #ifndef USE_OPENMP
  126. if (blas_cpu_number == 1
  127. #else
  128. if (openmp_nthreads == 1
  129. #endif
  130. ) return 1;
  131. #ifdef USE_OPENMP
  132. if (openmp_nthreads > blas_omp_number_max){
  133. #ifdef DEBUG
  134. fprintf(stderr,"WARNING - more OpenMP threads requested (%d) than available (%d)\n",openmp_nthreads,blas_omp_number_max);
  135. #endif
  136. openmp_nthreads = blas_omp_number_max;
  137. }
  138. if (blas_cpu_number != openmp_nthreads) {
  139. goto_set_num_threads(openmp_nthreads);
  140. }
  141. #endif
  142. return blas_cpu_number;
  143. }
  144. static __inline void blas_queue_init(blas_queue_t *queue){
  145. queue -> sa = NULL;
  146. queue -> sb = NULL;
  147. queue-> next = NULL;
  148. }
  149. int blas_thread_init(void);
  150. int BLASFUNC(blas_thread_shutdown)(void);
  151. int exec_blas(BLASLONG, blas_queue_t *);
  152. int exec_blas_async(BLASLONG, blas_queue_t *);
  153. int exec_blas_async_wait(BLASLONG, blas_queue_t *);
  154. #else
  155. int exec_blas_async(BLASLONG num_cpu, blas_param_t *param, pthread_t *);
  156. int exec_blas_async_wait(BLASLONG num_cpu, pthread_t *blas_threads);
  157. int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
  158. #endif
  159. #ifndef ASSEMBLER
  160. int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
  161. void *a, BLASLONG lda,
  162. void *b, BLASLONG ldb,
  163. void *c, BLASLONG ldc, int (*function)(void), int threads);
  164. int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *, void *, BLASLONG);
  165. int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG), void *, void *, BLASLONG);
  166. int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG);
  167. int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG, BLASLONG);
  168. int trsm_thread(int mode, BLASLONG m, BLASLONG n,
  169. double alpha_r, double alpha_i,
  170. void *a, BLASLONG lda,
  171. void *c, BLASLONG ldc, int (*function)(void), void *buffer);
  172. int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void*, void*, BLASLONG);
  173. int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
  174. void *offsetA, BLASLONG lda,
  175. void *offsetB, BLASLONG jb,
  176. void *ipiv, BLASLONG offset, int (*function)(void), void *buffer);
  177. #endif /* ENDIF ASSEMBLER */
  178. #endif