You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sbgemv_thread.c 5.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. /*********************************************************************/
  2. /* Copyright 2025 The OpenBLAS Project. */
  3. /* Copyright 2009, 2010 The University of Texas at Austin. */
  4. /* All rights reserved. */
  5. /* */
  6. /* Redistribution and use in source and binary forms, with or */
  7. /* without modification, are permitted provided that the following */
  8. /* conditions are met: */
  9. /* */
  10. /* 1. Redistributions of source code must retain the above */
  11. /* copyright notice, this list of conditions and the following */
  12. /* disclaimer. */
  13. /* */
  14. /* 2. Redistributions in binary form must reproduce the above */
  15. /* copyright notice, this list of conditions and the following */
  16. /* disclaimer in the documentation and/or other materials */
  17. /* provided with the distribution. */
  18. /* */
  19. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  20. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  21. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  22. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  23. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  24. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  25. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  26. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  27. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  28. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  29. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  30. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  31. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  32. /* POSSIBILITY OF SUCH DAMAGE. */
  33. /* */
  34. /* The views and conclusions contained in the software and */
  35. /* documentation are those of the authors and should not be */
  36. /* interpreted as representing official policies, either expressed */
  37. /* or implied, of The University of Texas at Austin. */
  38. /*********************************************************************/
  39. #include <stdio.h>
  40. #include <stdlib.h>
  41. #include "common.h"
  42. #ifndef TRANSA
  43. #define GEMV GEMV_N
  44. #else
  45. #define GEMV GEMV_T
  46. #endif
  47. static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){
  48. IFLOAT *a, *x;
  49. FLOAT *y;
  50. BLASLONG lda, incx, incy;
  51. BLASLONG m_from, m_to, n_from, n_to;
  52. a = (IFLOAT *)args->a;
  53. x = (IFLOAT *)args->b;
  54. y = (FLOAT *)args->c;
  55. lda = args->lda;
  56. incx = args->ldb;
  57. incy = args->ldc;
  58. #ifndef TRANSA // N
  59. m_from = *(range_m + 0);
  60. m_to = *(range_m + 1);
  61. n_from = 0;
  62. n_to = args -> n;
  63. a += m_from;
  64. y += m_from * incy;
  65. #else // T
  66. m_from = 0;
  67. m_to = args->m;
  68. n_from = *(range_n + 0);
  69. n_to = *(range_n + 1);
  70. a += n_from * lda;
  71. y += n_from * incy;
  72. #endif
  73. GEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);
  74. return 0;
  75. }
  76. int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG incx, FLOAT beta, FLOAT *y, BLASLONG incy, int threads)
  77. {
  78. blas_arg_t args;
  79. blas_queue_t queue[MAX_CPU_NUMBER];
  80. BLASLONG range[MAX_CPU_NUMBER + 1];
  81. #ifndef TRANSA
  82. BLASLONG width_for_split = m;
  83. #else
  84. BLASLONG width_for_split = n;
  85. #endif
  86. BLASLONG BLOCK_WIDTH = width_for_split/threads;
  87. int mode = BLAS_BFLOAT16 | BLAS_REAL;
  88. args.m = m;
  89. args.n = n;
  90. args.a = (void *)a;
  91. args.b = (void *)x;
  92. args.c = (void *)y;
  93. args.lda = lda;
  94. args.ldb = incx;
  95. args.ldc = incy;
  96. args.alpha = (void *)&alpha;
  97. args.beta = (void *)&beta;
  98. range[0] = 0;
  99. int thread_idx;
  100. for (thread_idx=0; thread_idx<threads; thread_idx++) {
  101. if (thread_idx != threads-1) {
  102. range[thread_idx + 1] = range[thread_idx] + BLOCK_WIDTH;
  103. } else {
  104. range[thread_idx + 1] = range[thread_idx] + width_for_split;
  105. }
  106. queue[thread_idx].mode = mode;
  107. queue[thread_idx].routine = sbgemv_kernel;
  108. queue[thread_idx].args = &args;
  109. #ifndef TRANSA
  110. queue[thread_idx].range_m = &range[thread_idx];
  111. queue[thread_idx].range_n = NULL;
  112. #else
  113. queue[thread_idx].range_m = NULL;
  114. queue[thread_idx].range_n = &range[thread_idx];
  115. #endif
  116. queue[thread_idx].sa = NULL;
  117. queue[thread_idx].sb = NULL;
  118. queue[thread_idx].next = &queue[thread_idx + 1];
  119. width_for_split -= BLOCK_WIDTH;
  120. }
  121. if (thread_idx) {
  122. queue[0].sa = NULL;
  123. queue[0].sb = NULL;
  124. queue[thread_idx - 1].next = NULL;
  125. exec_blas(thread_idx, queue);
  126. }
  127. return 0;
  128. }