You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dot.c 3.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /***************************************************************************
  2. Copyright (c) 2017, The OpenBLAS Project
  3. Copyright (c) 2022, Arm Ltd
  4. All rights reserved.
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions are
  7. met:
  8. 1. Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. 2. Redistributions in binary form must reproduce the above copyright
  11. notice, this list of conditions and the following disclaimer in
  12. the documentation and/or other materials provided with the
  13. distribution.
  14. 3. Neither the name of the OpenBLAS project nor the names of
  15. its contributors may be used to endorse or promote products
  16. derived from this software without specific prior written permission.
  17. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
  21. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  23. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  25. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  26. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. *****************************************************************************/
  28. #include "common.h"
  29. // Some compilers will report feature support for SVE without the appropriate
  30. // header available
  31. #ifdef HAVE_SVE
  32. #if defined __has_include
  33. #if __has_include(<arm_sve.h>) && __ARM_FEATURE_SVE
  34. #define USE_SVE
  35. #endif
  36. #endif
  37. #endif
  38. #ifdef USE_SVE
  39. #include "dot_kernel_sve.c"
  40. #endif
  41. #include "dot_kernel_asimd.c"
  42. #if defined(SMP)
  43. extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
  44. BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
  45. void *c, BLASLONG ldc, int (*function)(), int nthreads);
  46. #endif
  47. static RETURN_TYPE dot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
  48. {
  49. RETURN_TYPE dot = 0.0 ;
  50. if ( n <= 0 ) return dot;
  51. #ifdef USE_SVE
  52. if (inc_x == 1 && inc_y == 1) {
  53. return dot_kernel_sve(n, x, y);
  54. }
  55. #endif
  56. return dot_kernel_asimd(n, x, inc_x, y, inc_y);
  57. }
  58. #if defined(SMP)
  59. static int dot_thread_function(BLASLONG n, BLASLONG dummy0,
  60. BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *y,
  61. BLASLONG inc_y, FLOAT *result, BLASLONG dummy3)
  62. {
  63. *(RETURN_TYPE *)result = dot_compute(n, x, inc_x, y, inc_y);
  64. return 0;
  65. }
  66. #endif
  67. RETURN_TYPE CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
  68. {
  69. #if defined(SMP)
  70. int nthreads;
  71. FLOAT dummy_alpha;
  72. #endif
  73. RETURN_TYPE dot = 0.0;
  74. #if defined(SMP)
  75. if (inc_x == 0 || inc_y == 0 || n <= 10000)
  76. nthreads = 1;
  77. else
  78. nthreads = num_cpu_avail(1);
  79. if (nthreads == 1) {
  80. dot = dot_compute(n, x, inc_x, y, inc_y);
  81. } else {
  82. int mode, i;
  83. char result[MAX_CPU_NUMBER * sizeof(double) * 2];
  84. RETURN_TYPE *ptr;
  85. #if !defined(DOUBLE)
  86. mode = BLAS_SINGLE | BLAS_REAL;
  87. #else
  88. mode = BLAS_DOUBLE | BLAS_REAL;
  89. #endif
  90. blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha,
  91. x, inc_x, y, inc_y, result, 0,
  92. ( void *)dot_thread_function, nthreads);
  93. ptr = (RETURN_TYPE *)result;
  94. for (i = 0; i < nthreads; i++) {
  95. dot = dot + (*ptr);
  96. ptr = (RETURN_TYPE *)(((char *)ptr) + sizeof(double) * 2);
  97. }
  98. }
  99. #else
  100. dot = dot_compute(n, x, inc_x, y, inc_y);
  101. #endif
  102. return dot;
  103. }