You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dasum.c 3.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #include "common.h"
  2. #ifndef ABS_K
  3. #define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
  4. #endif
  5. #if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS)
  6. #include "dasum_microk_skylakex-2.c"
  7. #elif defined(HASWELL) || defined(ZEN)
  8. #include "dasum_microk_haswell-2.c"
  9. #endif
  10. #ifndef HAVE_DASUM_KERNEL
  11. static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1)
  12. {
  13. BLASLONG i=0;
  14. BLASLONG n_8 = n & -8;
  15. FLOAT *x = x1;
  16. FLOAT temp0, temp1, temp2, temp3;
  17. FLOAT temp4, temp5, temp6, temp7;
  18. FLOAT sum0 = 0.0;
  19. FLOAT sum1 = 0.0;
  20. FLOAT sum2 = 0.0;
  21. FLOAT sum3 = 0.0;
  22. FLOAT sum4 = 0.0;
  23. while (i < n_8) {
  24. temp0 = ABS_K(x[0]);
  25. temp1 = ABS_K(x[1]);
  26. temp2 = ABS_K(x[2]);
  27. temp3 = ABS_K(x[3]);
  28. temp4 = ABS_K(x[4]);
  29. temp5 = ABS_K(x[5]);
  30. temp6 = ABS_K(x[6]);
  31. temp7 = ABS_K(x[7]);
  32. sum0 += temp0;
  33. sum1 += temp1;
  34. sum2 += temp2;
  35. sum3 += temp3;
  36. sum0 += temp4;
  37. sum1 += temp5;
  38. sum2 += temp6;
  39. sum3 += temp7;
  40. x+=8;
  41. i+=8;
  42. }
  43. while (i < n) {
  44. sum4 += ABS_K(x1[i]);
  45. i++;
  46. }
  47. return sum0+sum1+sum2+sum3+sum4;
  48. }
  49. #endif
  50. static FLOAT asum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
  51. {
  52. BLASLONG i = 0;
  53. FLOAT sumf = 0.0;
  54. if (n <= 0 || inc_x <= 0) return (sumf);
  55. if (inc_x == 1) {
  56. sumf = dasum_kernel(n, x);
  57. }
  58. else {
  59. n *= inc_x;
  60. while (i < n) {
  61. sumf += ABS_K(x[i]);
  62. i += inc_x;
  63. }
  64. }
  65. return(sumf);
  66. }
  67. #if defined(SMP)
  68. static int asum_thread_function(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *dummy3, BLASLONG dummy4, FLOAT *result, BLASLONG dummy5)
  69. {
  70. *(FLOAT *)result = asum_compute(n, x, inc_x);
  71. return 0;
  72. }
  73. extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads);
  74. #endif
  75. FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
  76. {
  77. #if defined(SMP)
  78. int nthreads;
  79. FLOAT dummy_alpha;
  80. #endif
  81. FLOAT sumf = 0.0;
  82. #if defined(SMP)
  83. int num_cpu = num_cpu_avail(1);
  84. if (n <= 100000 || inc_x <= 0)
  85. nthreads = 1;
  86. else
  87. nthreads = num_cpu < n/100000 ? num_cpu : n/100000;
  88. if (nthreads == 1) {
  89. sumf = asum_compute(n, x, inc_x);
  90. } else {
  91. int mode, i;
  92. char result[MAX_CPU_NUMBER * sizeof(double) *2];
  93. FLOAT *ptr;
  94. #if !defined(DOUBLE)
  95. mode = BLAS_SINGLE | BLAS_REAL;
  96. #else
  97. mode = BLAS_DOUBLE | BLAS_REAL;
  98. #endif
  99. blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (int (*)(void))asum_thread_function, nthreads);
  100. ptr = (FLOAT *)result;
  101. for (i = 0; i < nthreads; i++) {
  102. sumf += (*ptr);
  103. ptr = (FLOAT *)(((char *)ptr) + sizeof(double) *2);
  104. }
  105. }
  106. #else
  107. sumf = asum_compute(n, x, inc_x);
  108. #endif
  109. return(sumf);
  110. }