You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sasum.c 3.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #include "common.h"
  2. #if defined(DOUBLE)
  3. #error supports float only
  4. #else
  5. #ifndef ABS_K
  6. #define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
  7. #endif
  8. #endif
  9. #if defined(SKYLAKEX)
  10. #include "sasum_microk_skylakex-2.c"
  11. #elif defined(HASWELL)
  12. #include "sasum_microk_haswell-2.c"
  13. #endif
  14. #ifndef HAVE_SASUM_KERNEL
  15. static FLOAT sasum_kernel(BLASLONG n, FLOAT *x1)
  16. {
  17. BLASLONG i=0;
  18. BLASLONG n_8 = n & -8;
  19. FLOAT *x = x1;
  20. FLOAT temp0, temp1, temp2, temp3;
  21. FLOAT temp4, temp5, temp6, temp7;
  22. FLOAT sum0 = 0.0;
  23. FLOAT sum1 = 0.0;
  24. FLOAT sum2 = 0.0;
  25. FLOAT sum3 = 0.0;
  26. FLOAT sum4 = 0.0;
  27. while (i < n_8) {
  28. temp0 = ABS_K(x[0]);
  29. temp1 = ABS_K(x[1]);
  30. temp2 = ABS_K(x[2]);
  31. temp3 = ABS_K(x[3]);
  32. temp4 = ABS_K(x[4]);
  33. temp5 = ABS_K(x[5]);
  34. temp6 = ABS_K(x[6]);
  35. temp7 = ABS_K(x[7]);
  36. sum0 += temp0;
  37. sum1 += temp1;
  38. sum2 += temp2;
  39. sum3 += temp3;
  40. sum0 += temp4;
  41. sum1 += temp5;
  42. sum2 += temp6;
  43. sum3 += temp7;
  44. x+=8;
  45. i+=8;
  46. }
  47. while (i < n) {
  48. sum4 += ABS_K(x1[i]);
  49. i++;
  50. }
  51. return sum0+sum1+sum2+sum3+sum4;
  52. }
  53. #endif
  54. static FLOAT asum_compute(BLASLONG n, FLOAT * x, BLASLONG inc_x)
  55. {
  56. BLASLONG i = 0;
  57. FLOAT sumf = 0.0;
  58. if (n <= 0 || inc_x <= 0) return (sumf);
  59. if (inc_x == 1) {
  60. sumf = sasum_kernel(n, x);
  61. }
  62. else {
  63. n *= inc_x;
  64. while(i < n) {
  65. sumf += ABS_K(x[i]);
  66. i += inc_x;
  67. }
  68. }
  69. return (sumf);
  70. }
  71. #if defined(SMP)
  72. static int asum_thread_function(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *dummy3, BLASLONG dummy4, FLOAT *result, BLASLONG dummy5)
  73. {
  74. *(FLOAT *)result = asum_compute(n, x, inc_x);
  75. return 0;
  76. }
  77. extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void * alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int(*function)(), int nthreads);
  78. #endif
  79. FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
  80. {
  81. #if defined(SMP)
  82. int nthreads;
  83. FLOAT dummy_alpha;
  84. #endif
  85. FLOAT sumf = 0.0;
  86. #if defined(SMP)
  87. int num_cpu = num_cpu_avail(1);
  88. if (n <= 100000 || inc_x <= 0)
  89. nthreads = 1;
  90. else
  91. nthreads = num_cpu < n/100000 ? num_cpu : n/100000;
  92. if (nthreads == 1) {
  93. sumf = asum_compute(n, x, inc_x);
  94. }
  95. else {
  96. int mode, i;
  97. char result[MAX_CPU_NUMBER * sizeof(double) *2];
  98. FLOAT * ptr;
  99. #if !defined(DOUBLE)
  100. mode = BLAS_SINGLE | BLAS_REAL;
  101. #else
  102. mode = BLAS_DOUBLE | BLAS_REAL;
  103. #endif
  104. blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads);
  105. ptr = (FLOAT *)result;
  106. for (i = 0; i < nthreads; i++) {
  107. sumf += (*ptr);
  108. ptr = (FLOAT *)(((char *)ptr) + sizeof(double) * 2);
  109. }
  110. }
  111. #else
  112. sumf = asum_compute(n, x, inc_x);
  113. #endif
  114. return(sumf);
  115. }