You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sasum.c 1.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. #include "common.h"
  2. #if defined(DOUBLE)
  3. #error supports float only
  4. #else
  5. #ifndef ABS_K
  6. #define ABS_K(a) ((a) > 0 ? (a) : (-(a)))
  7. #endif
  8. #endif
  9. #if defined(SKYLAKEX)
  10. #include "sasum_microk_skylakex-2.c"
  11. #elif defined(HASWELL)
  12. #include "sasum_microk_haswell-2.c"
  13. #endif
  14. #ifndef HAVE_SASUM_KERNEL
  15. static FLOAT sasum_kernel(BLASLONG n, FLOAT *x1)
  16. {
  17. BLASLONG i=0;
  18. BLASLONG n_8 = n & -8;
  19. FLOAT *x = x1;
  20. FLOAT temp0, temp1, temp2, temp3;
  21. FLOAT temp4, temp5, temp6, temp7;
  22. FLOAT sum0 = 0.0;
  23. FLOAT sum1 = 0.0;
  24. FLOAT sum2 = 0.0;
  25. FLOAT sum3 = 0.0;
  26. FLOAT sum4 = 0.0;
  27. while (i < n_8) {
  28. temp0 = ABS_K(x[0]);
  29. temp1 = ABS_K(x[1]);
  30. temp2 = ABS_K(x[2]);
  31. temp3 = ABS_K(x[3]);
  32. temp4 = ABS_K(x[4]);
  33. temp5 = ABS_K(x[5]);
  34. temp6 = ABS_K(x[6]);
  35. temp7 = ABS_K(x[7]);
  36. sum0 += temp0;
  37. sum1 += temp1;
  38. sum2 += temp2;
  39. sum3 += temp3;
  40. sum0 += temp4;
  41. sum1 += temp5;
  42. sum2 += temp6;
  43. sum3 += temp7;
  44. x+=8;
  45. i+=8;
  46. }
  47. while (i < n) {
  48. sum4 += ABS_K(x1[i]);
  49. i++;
  50. }
  51. return sum0+sum1+sum2+sum3+sum4;
  52. }
  53. #endif
  54. FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
  55. {
  56. BLASLONG i=0;
  57. FLOAT sumf = 0.0;
  58. if (n <= 0 || inc_x <= 0) return(sumf);
  59. if ( inc_x == 1 ) {
  60. sumf = sasum_kernel(n, x);
  61. }
  62. else {
  63. n *= inc_x;
  64. while(i < n) {
  65. sumf += ABS_K(x[i]);
  66. i += inc_x;
  67. }
  68. }
  69. return(sumf);
  70. }