Browse Source

Merge branch 'risc-v' into img-riscv64-zvl128b

tags/v0.3.27
Octavian Maghiar 1 year ago
parent
commit
deecfb1a39
67 changed files with 1119 additions and 912 deletions
  1. +14
    -3
      common_riscv64.h
  2. +4
    -2
      cpuid_riscv64.c
  3. +4
    -4
      kernel/riscv64/amax_rvv.c
  4. +11
    -7
      kernel/riscv64/amax_vector.c
  5. +4
    -4
      kernel/riscv64/amin_rvv.c
  6. +11
    -7
      kernel/riscv64/amin_vector.c
  7. +4
    -4
      kernel/riscv64/asum_rvv.c
  8. +12
    -8
      kernel/riscv64/asum_vector.c
  9. +8
    -8
      kernel/riscv64/axpby_vector.c
  10. +6
    -6
      kernel/riscv64/axpy_vector.c
  11. +7
    -7
      kernel/riscv64/copy_vector.c
  12. +8
    -8
      kernel/riscv64/dot_rvv.c
  13. +26
    -18
      kernel/riscv64/dot_vector.c
  14. +12
    -12
      kernel/riscv64/gemv_n_vector.c
  15. +4
    -4
      kernel/riscv64/gemv_t_rvv.c
  16. +25
    -17
      kernel/riscv64/gemv_t_vector.c
  17. +13
    -10
      kernel/riscv64/generate_kernel.py
  18. +12
    -12
      kernel/riscv64/iamax_rvv.c
  19. +46
    -34
      kernel/riscv64/iamax_vector.c
  20. +12
    -12
      kernel/riscv64/iamin_rvv.c
  21. +48
    -34
      kernel/riscv64/iamin_vector.c
  22. +12
    -12
      kernel/riscv64/imax_rvv.c
  23. +48
    -34
      kernel/riscv64/imax_vector.c
  24. +12
    -12
      kernel/riscv64/imin_rvv.c
  25. +47
    -33
      kernel/riscv64/imin_vector.c
  26. +13
    -13
      kernel/riscv64/izamax_rvv.c
  27. +54
    -40
      kernel/riscv64/izamax_vector.c
  28. +12
    -12
      kernel/riscv64/izamin_rvv.c
  29. +53
    -39
      kernel/riscv64/izamin_vector.c
  30. +4
    -4
      kernel/riscv64/max_rvv.c
  31. +12
    -8
      kernel/riscv64/max_vector.c
  32. +4
    -4
      kernel/riscv64/min_rvv.c
  33. +12
    -8
      kernel/riscv64/min_vector.c
  34. +4
    -4
      kernel/riscv64/nrm2_rvv.c
  35. +34
    -30
      kernel/riscv64/nrm2_vector.c
  36. +18
    -18
      kernel/riscv64/rot_vector.c
  37. +7
    -7
      kernel/riscv64/scal_vector.c
  38. +4
    -4
      kernel/riscv64/sum_rvv.c
  39. +16
    -16
      kernel/riscv64/sum_vector.c
  40. +5
    -5
      kernel/riscv64/swap_vector.c
  41. +6
    -6
      kernel/riscv64/symv_L_rvv.c
  42. +32
    -24
      kernel/riscv64/symv_L_vector.c
  43. +6
    -6
      kernel/riscv64/symv_U_rvv.c
  44. +34
    -26
      kernel/riscv64/symv_U_vector.c
  45. +4
    -4
      kernel/riscv64/zamax_rvv.c
  46. +15
    -10
      kernel/riscv64/zamax_vector.c
  47. +4
    -4
      kernel/riscv64/zamin_rvv.c
  48. +15
    -10
      kernel/riscv64/zamin_vector.c
  49. +6
    -6
      kernel/riscv64/zasum_rvv.c
  50. +13
    -9
      kernel/riscv64/zasum_vector.c
  51. +16
    -16
      kernel/riscv64/zaxpby_vector.c
  52. +10
    -10
      kernel/riscv64/zaxpy_vector.c
  53. +6
    -6
      kernel/riscv64/zcopy_vector.c
  54. +28
    -28
      kernel/riscv64/zdot_rvv.c
  55. +34
    -26
      kernel/riscv64/zdot_vector.c
  56. +14
    -14
      kernel/riscv64/zgemv_n_vector.c
  57. +20
    -20
      kernel/riscv64/zgemv_t_rvv.c
  58. +32
    -24
      kernel/riscv64/zgemv_t_vector.c
  59. +34
    -26
      kernel/riscv64/zhemv_LM_vector.c
  60. +34
    -26
      kernel/riscv64/zhemv_UV_vector.c
  61. +12
    -12
      kernel/riscv64/znrm2_rvv.c
  62. +29
    -22
      kernel/riscv64/znrm2_vector.c
  63. +18
    -18
      kernel/riscv64/zrot_vector.c
  64. +16
    -16
      kernel/riscv64/zscal_vector.c
  65. +6
    -6
      kernel/riscv64/zsum_rvv.c
  66. +8
    -8
      kernel/riscv64/zsum_vector.c
  67. +5
    -5
      kernel/riscv64/zswap_vector.c

+ 14
- 3
common_riscv64.h View File

@@ -91,12 +91,23 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define BUFFER_SIZE ( 32 << 20) #define BUFFER_SIZE ( 32 << 20)
#define SEEK_ADDRESS #define SEEK_ADDRESS


#if defined(C910V) || defined(RISCV64_ZVL256B) || defined(__riscv_v)
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC)))
# include <riscv_vector.h> # include <riscv_vector.h>
#endif

#if defined( __riscv_xtheadc ) && defined( __riscv_v ) && ( __riscv_v <= 7000 )
// t-head toolchain uses obsolete rvv intrinsics, can't build for C910V without this
#define RISCV_0p10_INTRINSICS
#define RISCV_RVV(x) x
#else
#define RISCV_RVV(x) __riscv_ ## x
#endif

#if defined(C910V) || defined(RISCV64_ZVL256B)
# if !defined(DOUBLE) # if !defined(DOUBLE)
# define EXTRACT_FLOAT(v) __riscv_vfmv_f_s_f32m1_f32(v)
# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f32m1_f32)(v)
# else # else
# define EXTRACT_FLOAT(v) __riscv_vfmv_f_s_f64m1_f64(v)
# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f64m1_f64)(v)
# endif # endif
#else #else
# define EXTRACT_FLOAT(v) (v[0]) # define EXTRACT_FLOAT(v) (v[0])


+ 4
- 2
cpuid_riscv64.c View File

@@ -72,12 +72,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#define CPU_GENERIC 0 #define CPU_GENERIC 0
#define CPU_C910V 1 #define CPU_C910V 1
#define CPU_RISCV64_ZVL256B 2
#define CPU_RISCV64_ZVL128B 3
#define CPU_x280 2
#define CPU_RISCV64_ZVL256B 3
#define CPU_RISCV64_ZVL128B 4


static char *cpuname[] = { static char *cpuname[] = {
"RISCV64_GENERIC", "RISCV64_GENERIC",
"C910V", "C910V",
"x280",
"CPU_RISCV64_ZVL256B", "CPU_RISCV64_ZVL256B",
"CPU_RISCV64_ZVL128B" "CPU_RISCV64_ZVL128B"
}; };


+ 4
- 4
kernel/riscv64/amax_rvv.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#else #else
@@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#endif #endif
@@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


vx = VLEV_FLOAT(x, vl); vx = VLEV_FLOAT(x, vl);
vx = VFABSV_FLOAT(vx, vl); vx = VFABSV_FLOAT(vx, vl);
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
} }


} else { } else {
@@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


vx = VLSEV_FLOAT(x, stride_x, vl); vx = VLSEV_FLOAT(x, stride_x, vl);
vx = VFABSV_FLOAT(vx, vl); vx = VFABSV_FLOAT(vx, vl);
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
} }


} }


+ 11
- 7
kernel/riscv64/amax_vector.c View File

@@ -49,15 +49,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
#else
#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#endif
#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 4
- 4
kernel/riscv64/amin_rvv.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#else #else
@@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#endif #endif
@@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


vx = VLEV_FLOAT(x, vl); vx = VLEV_FLOAT(x, vl);
vx = VFABSV_FLOAT(vx, vl); vx = VFABSV_FLOAT(vx, vl);
vmin = VFMINVV_FLOAT(vmin, vx, vl);
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
} }


} else { } else {
@@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


vx = VLSEV_FLOAT(x, stride_x, vl); vx = VLSEV_FLOAT(x, stride_x, vl);
vx = VFABSV_FLOAT(vx, vl); vx = VFABSV_FLOAT(vx, vl);
vmin = VFMINVV_FLOAT(vmin, vx, vl);
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
} }


} }


+ 11
- 7
kernel/riscv64/amin_vector.c View File

@@ -48,15 +48,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
#else
#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#endif
#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 4
- 4
kernel/riscv64/asum_rvv.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
@@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
@@ -76,7 +76,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


vx = VLEV_FLOAT(x, vl); vx = VLEV_FLOAT(x, vl);
vx = VFABSV_FLOAT(vx, vl); vx = VFABSV_FLOAT(vx, vl);
vsum = VFADDVV_FLOAT(vsum, vx, vl);
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
} }


} else { } else {
@@ -88,7 +88,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


vx = VLSEV_FLOAT(x, stride_x, vl); vx = VLSEV_FLOAT(x, stride_x, vl);
vx = VFABSV_FLOAT(vx, vl); vx = VFABSV_FLOAT(vx, vl);
vsum = VFADDVV_FLOAT(vsum, vx, vl);
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
} }


} }


+ 12
- 8
kernel/riscv64/asum_vector.c View File

@@ -49,16 +49,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUMVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
#else
#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#endif
#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 8
- 8
kernel/riscv64/axpby_vector.c View File

@@ -48,15 +48,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL)
#define VFMACCVF_FLOAT JOIN(__riscv_vfmacc, _vf_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMULVF_FLOAT JOIN(__riscv_vfmul, _vf_f, ELEN, LMUL, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)
#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMULVF_FLOAT JOIN(RISCV_RVV(vfmul), _vf_f, ELEN, LMUL, _)


int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
{ {


+ 6
- 6
kernel/riscv64/axpy_vector.c View File

@@ -49,13 +49,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL)
#define VFMACCVF_FLOAT JOIN(__riscv_vfmacc, _vf_f, ELEN, LMUL, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)
#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _)


int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{ {


+ 7
- 7
kernel/riscv64/copy_vector.c View File

@@ -47,12 +47,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)


int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{ {
@@ -71,7 +71,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
stride_x = inc_x * sizeof(FLOAT); stride_x = inc_x * sizeof(FLOAT);
if(gvl <= n/4){ if(gvl <= n/4){
BLASLONG inc_xv = inc_x * gvl; BLASLONG inc_xv = inc_x * gvl;
BLASLONG gvl3 = gvl * 3;
unsigned int gvl3 = gvl * 3;
BLASLONG inc_xv3 = inc_xv * 3; BLASLONG inc_xv3 = inc_xv * 3;
for(i=0,j=0; i<n/(4*gvl); i++){ for(i=0,j=0; i<n/(4*gvl); i++){
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
@@ -99,7 +99,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
if(gvl <= n/4){ if(gvl <= n/4){
BLASLONG inc_yv = inc_y * gvl; BLASLONG inc_yv = inc_y * gvl;
BLASLONG inc_yv3 = inc_yv * 3; BLASLONG inc_yv3 = inc_yv * 3;
BLASLONG gvl3 = gvl * 3;
unsigned int gvl3 = gvl * 3;
for(i=0,j=0; i<n/(4*gvl); i++){ for(i=0,j=0; i<n/(4*gvl); i++){
v0 = VLEV_FLOAT(&x[j], gvl); v0 = VLEV_FLOAT(&x[j], gvl);
VSSEV_FLOAT(&y[iy], stride_y, v0, gvl); VSSEV_FLOAT(&y[iy], stride_y, v0, gvl);


+ 8
- 8
kernel/riscv64/dot_rvv.c View File

@@ -49,12 +49,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl); vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl); vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);


vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
#else #else
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl); vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl); vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);


vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
#endif #endif
} }


@@ -69,12 +69,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl); vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl); vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);


vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
#else #else
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl); vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl); vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);


vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
#endif #endif
} }
} else if (1 == inc_y) { } else if (1 == inc_y) {
@@ -88,12 +88,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl); vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl); vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);


vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
#else #else
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl); vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl); vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);


vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
#endif #endif
} }
} else { } else {
@@ -108,12 +108,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl); vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl); vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);


vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
#else #else
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl); vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl); vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);


vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
#endif #endif
} }
} }


+ 26
- 18
kernel/riscv64/dot_vector.c View File

@@ -27,29 +27,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFDOTVV_FLOAT __riscv_vfdot_vv_f32m4
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m4_f32m1(v_res, va, vb, gvl)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f32m4)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFDOTVV_FLOAT __riscv_vfdot_vv_f64m4
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m4_f64m1(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f64m4)
#endif #endif


#if defined(DSDOT) #if defined(DSDOT)


+ 12
- 12
kernel/riscv64/gemv_n_vector.c View File

@@ -27,21 +27,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSEV_FLOAT __riscv_vse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSEV_FLOAT __riscv_vse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#endif #endif


int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)


+ 4
- 4
kernel/riscv64/gemv_t_rvv.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -79,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO


va = VLEV_FLOAT(a_ptr, vl); va = VLEV_FLOAT(a_ptr, vl);
vx = VLEV_FLOAT(x_ptr, vl); vx = VLEV_FLOAT(x_ptr, vl);
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
} }


v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
@@ -103,7 +103,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO


va = VLEV_FLOAT(a_ptr, vl); va = VLEV_FLOAT(a_ptr, vl);
vx = VLSEV_FLOAT(x_ptr, stride_x, vl); vx = VLSEV_FLOAT(x_ptr, stride_x, vl);
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
} }


v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);


+ 25
- 17
kernel/riscv64/gemv_t_vector.c View File

@@ -27,28 +27,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m2(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m2)(n)
#define FLOAT_V_T vfloat32m2_t #define FLOAT_V_T vfloat32m2_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m2
#define VLSEV_FLOAT __riscv_vlse32_v_f32m2
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m2_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m2
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m2
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m2)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m2)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m2_f32m1(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m2_f32m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m2)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m2)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m2)
#define xint_t int #define xint_t int
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m2(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m2)(n)
#define FLOAT_V_T vfloat64m2_t #define FLOAT_V_T vfloat64m2_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m2
#define VLSEV_FLOAT __riscv_vlse64_v_f64m2
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m2_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m2
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m2
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m2)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m2)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m2_f64m1(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m2_f64m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m2)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m2)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m2)
#define xint_t long long #define xint_t long long
#endif #endif


@@ -60,7 +68,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
FLOAT temp; FLOAT temp;


FLOAT_V_T va, vr, vx; FLOAT_V_T va, vr, vx;
BLASLONG gvl = 0;
unsigned int gvl = 0;
FLOAT_V_T_M1 v_res; FLOAT_V_T_M1 v_res;






+ 13
- 10
kernel/riscv64/generate_kernel.py View File

@@ -197,13 +197,13 @@ def generate_gemm_kernel_inner_complex( settings, dest, M, N, vlen, a_regs ):
dest.write("ai += {M}*2;") dest.write("ai += {M}*2;")
dest.write() dest.write()


accumulation_regs = a_regs * N * settings['LMUL_ACC'].value
# for each vector register loaded from matrix A, we require N registers to hold vector-scalar multiply-accumulate results
accumulation_regs = a_regs * N
dest.write("// {a_regs} vector regs to hold A array contents, {accumulation_regs} regs to hold values accumulated over k", dest.write("// {a_regs} vector regs to hold A array contents, {accumulation_regs} regs to hold values accumulated over k",
a_regs=a_regs*2, accumulation_regs=accumulation_regs*2 a_regs=a_regs*2, accumulation_regs=accumulation_regs*2
) )
pass_regs = (accumulation_regs + a_regs)*2 pass_regs = (accumulation_regs + a_regs)*2
tmp_regs = 32-pass_regs
tmp_regs = (32 // settings['LMUL_ACC'].value) - pass_regs
if tmp_regs < 2: if tmp_regs < 2:
raise RuntimeError("Complex kernel would use too many registers!") raise RuntimeError("Complex kernel would use too many registers!")


@@ -337,10 +337,12 @@ def generate_gemm_kernel( settings, OUTPUT ):


M = settings['M'].value M = settings['M'].value
N = settings['N'].value N = settings['N'].value
vlenmax = int( settings['reg_width_bits'].value / settings['ELEN_PARAM'].value )
vlenmax = int(settings['reg_width_bits'].value * settings['LMUL_ACC'].value /
settings['ELEN_PARAM'].value)
a_regs = max(int(M/vlenmax), 1) a_regs = max(int(M/vlenmax), 1)


accumulation_regs = a_regs * N * settings['LMUL_ACC'].value
# for each vector register loaded from matrix A, we require N registers to hold vector-scalar multiply-accumulate results
accumulation_regs = a_regs * N
required_regs = accumulation_regs + a_regs required_regs = accumulation_regs + a_regs
if is_complex: if is_complex:
required_regs = required_regs * 2 + 2 required_regs = required_regs * 2 + 2
@@ -380,9 +382,9 @@ def generate_gemm_kernel( settings, OUTPUT ):
'''.format(tail_policy=settings['tail_policy'].value)) '''.format(tail_policy=settings['tail_policy'].value))




if required_regs > 32:
raise Exception("{} vector registers needed during accumulation for unrolling {} x {}{} but only 32 are available".format(
required_regs, N, M, (" with wide accumulator" if settings['LMUL_ACC'].value > 1 else '')
if required_regs > (32 // settings['LMUL_ACC'].value):
raise Exception("{} vector registers needed during accumulation for unrolling {} x {}{} but only {} are available".format(
required_regs, N, M, (" with wide accumulator" if settings['LMUL_ACC'].value > 1 else ''), 32 // settings['LMUL_ACC'].value
)) ))


TRMM = (settings['op'].value == 'trmm') TRMM = (settings['op'].value == 'trmm')
@@ -448,7 +450,8 @@ def generate_gemm_kernel( settings, OUTPUT ):
def generate_M_tails( dest, settings, M, N ): def generate_M_tails( dest, settings, M, N ):
M_tail = int(M/2) M_tail = int(M/2)
M_tail_min = settings['M_tail_scalar_from'].value M_tail_min = settings['M_tail_scalar_from'].value
vlenmax = int( settings['reg_width_bits'].value / settings['ELEN_PARAM'].value )
vlenmax = int(settings['reg_width_bits'].value * settings['LMUL_ACC'].value
/ settings['ELEN_PARAM'].value )
TRMM = (settings['op'].value == 'trmm') TRMM = (settings['op'].value == 'trmm')
is_complex = settings['complex'].value is_complex = settings['complex'].value
generate_gemm_kernel_inner = generate_gemm_kernel_inner_complex if is_complex else generate_gemm_kernel_inner_real generate_gemm_kernel_inner = generate_gemm_kernel_inner_complex if is_complex else generate_gemm_kernel_inner_real
@@ -667,4 +670,4 @@ def main():
ERROR("unsupported kernel type {}".format(settings['op'])) ERROR("unsupported kernel type {}".format(settings['op']))


if __name__ == "__main__": if __name__ == "__main__":
main()
main()

+ 12
- 12
kernel/riscv64/iamax_rvv.c View File

@@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
#define VFIRSTM __riscv_vfirst_m_b8 #define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
#define VIDV_UINT __riscv_vid_v_u64m8 #define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -68,12 +68,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
#define VFIRSTM __riscv_vfirst_m_b4 #define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t #define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
#define VIDV_UINT __riscv_vid_v_u32m8 #define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -106,11 +106,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


//index where element greater than v_max //index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl); mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);


//update v_max //update v_max
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
} }


} else { } else {
@@ -125,11 +125,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


//index where element greater than v_max //index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl); mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);


//update v_max //update v_max
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
} }
} }


+ 46
- 34
kernel/riscv64/iamax_vector.c View File

@@ -31,50 +31,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(DOUBLE) #if defined(DOUBLE)


#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f64m4_f64m1(v_res, va, vb, gvl)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m4_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m4)(vm, compressed, va, gvl)
#else
#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f64m4_f64m1)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m4_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m4)
#endif
#define MASK_T vbool16_t #define MASK_T vbool16_t
#define VMFLTVV_FLOAT __riscv_vmflt_vv_f64m4_b16
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m4_b16
#define VMFIRSTM __riscv_vfirst_m_b16
#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f64m4_b16)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f64m4)
#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f64m4_b16)
#define VMFIRSTM RISCV_RVV(vfirst_m_b16)
#define UINT_V_T vuint64m4_t #define UINT_V_T vuint64m4_t
#define VIDV_UINT __riscv_vid_v_u64m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m4
#define VMVVX_UINT __riscv_vmv_v_x_u64m4
#define VFABS_FLOAT __riscv_vfabs_v_f64m4
#define VCOMPRESS __riscv_vcompress_vm_u64m4
#define VMV_X __riscv_vmv_x_s_u64m4_u64
#define VIDV_UINT RISCV_RVV(vid_v_u64m4)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m4)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m4)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m4)
#define VMV_X RISCV_RVV(vmv_x_s_u64m4_u64)
#else #else


#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f32m4_f32m1(v_res, va, vb, gvl)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m4_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m4)(vm, compressed, va, gvl)
#else
#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f32m4_f32m1)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m4_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m4)
#endif
#define MASK_T vbool8_t #define MASK_T vbool8_t
#define VMFLTVV_FLOAT __riscv_vmflt_vv_f32m4_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m4_b8
#define VMFIRSTM __riscv_vfirst_m_b8
#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f32m4_b8)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f32m4)
#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f32m4_b8)
#define VMFIRSTM RISCV_RVV(vfirst_m_b8)
#define UINT_V_T vuint32m4_t #define UINT_V_T vuint32m4_t
#define VIDV_UINT __riscv_vid_v_u32m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m4
#define VMVVX_UINT __riscv_vmv_v_x_u32m4
#define VFABS_FLOAT __riscv_vfabs_v_f32m4
#define VCOMPRESS __riscv_vcompress_vm_u32m4
#define VMV_X __riscv_vmv_x_s_u32m4_u32
#define VIDV_UINT RISCV_RVV(vid_v_u32m4)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m4)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m4)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m4)
#define VMV_X RISCV_RVV(vmv_x_s_u32m4_u32)
#endif #endif






+ 12
- 12
kernel/riscv64/iamin_rvv.c View File

@@ -43,12 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
#define VFIRSTM __riscv_vfirst_m_b8 #define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
#define VIDV_UINT __riscv_vid_v_u64m8 #define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -69,12 +69,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
#define VFIRSTM __riscv_vfirst_m_b4 #define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t #define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
#define VIDV_UINT __riscv_vid_v_u32m8 #define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -107,11 +107,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


// index where element less than v_min // index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl); mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);


//update v_min and start_index j //update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
} }


} else { } else {
@@ -126,11 +126,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


// index where element less than v_min // index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl); mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);


//update v_min and start_index j //update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
} }
} }


+ 48
- 34
kernel/riscv64/iamin_vector.c View File

@@ -31,52 +31,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(DOUBLE) #if defined(DOUBLE)


#define VSETVL(n) __riscv_vsetvl_e64m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n)
#define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T vfloat64m8_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f64m8_f64m1(v_res, va, vb, gvl)
#define VIDV_MASK_UINT vid_v_u64m8_m
#define VADDVX_MASK_UINT vadd_vx_u64m8_m
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl)
#else
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
#define MASK_T vbool8_t
#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f64m8_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8
#define VMFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu #define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu #define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFABS_FLOAT __riscv_vfabs_v_f64m8
#define VCOMPRESS __riscv_vcompress_vm_u64m8
#define VMV_X __riscv_vmv_x_s_u64m8_u64
#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8)
#endif
#define MASK_T vbool8_t
#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f64m8_b8)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f64m8)
#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f64m8_b8)
#define VMFIRSTM RISCV_RVV(vfirst_m_b8)
#define UINT_V_T vuint64m8_t
#define VIDV_UINT RISCV_RVV(vid_v_u64m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m8)
#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64)
#else #else


#define VSETVL(n) __riscv_vsetvl_e32m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n)
#define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T vfloat32m8_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f32m8_f32m1(v_res, va, vb, gvl)
#define VIDV_MASK_UINT vid_v_u32m8_m
#define VADDVX_MASK_UINT vadd_vx_u32m8_m
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl)
#else
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
#define MASK_T vbool4_t
#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f32m8_b4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4
#define VMFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu #define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu #define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFABS_FLOAT __riscv_vfabs_v_f32m8
#define VCOMPRESS __riscv_vcompress_vm_u32m8
#define VMV_X __riscv_vmv_x_s_u32m8_u32
#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8)
#endif
#define MASK_T vbool4_t
#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f32m8_b4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f32m8)
#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f32m8_b4)
#define VMFIRSTM RISCV_RVV(vfirst_m_b4)
#define UINT_V_T vuint32m8_t
#define VIDV_UINT RISCV_RVV(vid_v_u32m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m8)
#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32)
#endif #endif






+ 12
- 12
kernel/riscv64/imax_rvv.c View File

@@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8 #define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
#define VFIRSTM __riscv_vfirst_m_b8 #define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
#define VIDV_UINT __riscv_vid_v_u64m8 #define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4 #define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
#define VFIRSTM __riscv_vfirst_m_b4 #define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t #define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
#define VIDV_UINT __riscv_vid_v_u32m8 #define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


//index where element greater than v_max //index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl); mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);


//update v_max and start_index j //update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
} }


} else { } else {
@@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


//index where element greater than v_max //index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl); mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);


//update v_max and start_index j //update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
} }
} }


+ 48
- 34
kernel/riscv64/imax_vector.c View File

@@ -31,50 +31,64 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(DOUBLE) #if defined(DOUBLE)


#define VSETVL(n) __riscv_vsetvl_e64m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n)
#define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T vfloat64m8_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f64m8_f64m1(v_res, va, vb, gvl)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl)
#else
#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f64m8_f64m1)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_mu)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8)
#endif
#define MASK_T vbool8_t #define MASK_T vbool8_t
#define VMFLTVV_FLOAT __riscv_vmflt_vv_f64m8_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8
#define VMFIRSTM __riscv_vfirst_m_b8
#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f64m8_b8)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f64m8)
#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f64m8_b8)
#define VMFIRSTM RISCV_RVV(vfirst_m_b8)
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VCOMPRESS __riscv_vcompress_vm_u64m8
#define VMV_X __riscv_vmv_x_s_u64m8_u64
#define VIDV_UINT RISCV_RVV(vid_v_u64m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8)
#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64)
#else #else


#define VSETVL(n) __riscv_vsetvl_e32m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n)
#define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T vfloat32m8_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f32m8_f32m1(v_res, va, vb, gvl)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_m)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl)
#else
#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f32m8_f32m1)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_mu)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8)
#endif
#define MASK_T vbool4_t #define MASK_T vbool4_t
#define VMFLTVV_FLOAT __riscv_vmflt_vv_f32m8_b4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4
#define VMFIRSTM __riscv_vfirst_m_b4
#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f32m8_b4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f32m8)
#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f32m8_b4)
#define VMFIRSTM RISCV_RVV(vfirst_m_b4)
#define UINT_V_T vuint32m8_t #define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VCOMPRESS __riscv_vcompress_vm_u32m8
#define VMV_X __riscv_vmv_x_s_u32m8_u32
#define VIDV_UINT RISCV_RVV(vid_v_u32m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8)
#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32)
#endif #endif






+ 12
- 12
kernel/riscv64/imin_rvv.c View File

@@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8 #define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
#define VFIRSTM __riscv_vfirst_m_b8 #define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
#define VIDV_UINT __riscv_vid_v_u64m8 #define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4 #define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
#define VFIRSTM __riscv_vfirst_m_b4 #define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t #define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
#define VIDV_UINT __riscv_vid_v_u32m8 #define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
#define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


// index where element less than v_min // index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl); mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);


//update v_min and start_index j //update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
} }


} else { } else {
@@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


// index where element less than v_min // index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl); mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);


//update v_min and start_index j //update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
} }
} }


+ 47
- 33
kernel/riscv64/imin_vector.c View File

@@ -31,50 +31,64 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(DOUBLE) #if defined(DOUBLE)


#define VSETVL(n) __riscv_vsetvl_e64m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n)
#define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T vfloat64m8_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f64m8_f64m1(v_res, va, vb, gvl)
#define VIDV_MASK_UINT(mask, gvl) RISCV_RVV(vid_v_u64m8_m)(mask, v_min_index, gvl)
#define VADDVX_MASK_UINT(mask, a, b, gvl) RISCV_RVV(vadd_vx_u64m8_m)(mask, a, a, b, gvl)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl)
#else
#define VFREDMINVS_FLOAT RISCV_RVV(vfredmin_vs_f64m8_f64m1)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8)
#endif
#define MASK_T vbool8_t #define MASK_T vbool8_t
#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f64m8_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8
#define VMFIRSTM __riscv_vfirst_m_b8
#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f64m8_b8)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f64m8)
#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f64m8_b8)
#define VMFIRSTM RISCV_RVV(vfirst_m_b8)
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VCOMPRESS __riscv_vcompress_vm_u64m8
#define VMV_X __riscv_vmv_x_s_u64m8_u64
#define VIDV_UINT RISCV_RVV(vid_v_u64m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8)
#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64)
#else #else


#define VSETVL(n) __riscv_vsetvl_e32m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n)
#define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T vfloat32m8_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f32m8_f32m1(v_res, va, vb, gvl)
#define VIDV_MASK_UINT(mask, gvl) RISCV_RVV(vid_v_u32m8_m)(mask, v_min_index, gvl)
#define VADDVX_MASK_UINT(mask, a, b, gvl) RISCV_RVV(vadd_vx_u32m8_m)(mask, a, a, b, gvl)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl)
#else
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
#define MASK_T vbool4_t
#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f32m8_b4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4
#define VMFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m #define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m #define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VCOMPRESS __riscv_vcompress_vm_u32m8
#define VMV_X __riscv_vmv_x_s_u32m8_u32
#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8)
#endif
#define MASK_T vbool4_t
#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f32m8_b4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f32m8)
#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f32m8_b4)
#define VMFIRSTM RISCV_RVV(vfirst_m_b4)
#define UINT_V_T vuint32m8_t
#define VIDV_UINT RISCV_RVV(vid_v_u32m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8)
#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32)
#endif #endif






+ 13
- 13
kernel/riscv64/izamax_rvv.c View File

@@ -44,13 +44,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
#define VFIRSTM __riscv_vfirst_m_b16 #define VFIRSTM __riscv_vfirst_m_b16
#define UINT_V_T vuint64m4_t #define UINT_V_T vuint64m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu
#define VIDV_UINT __riscv_vid_v_u64m4 #define VIDV_UINT __riscv_vid_v_u64m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu
#define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VADDVX_UINT __riscv_vadd_vx_u64m4
#define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -73,13 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
#define VFIRSTM __riscv_vfirst_m_b8 #define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint32m4_t #define UINT_V_T vuint32m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu
#define VIDV_UINT __riscv_vid_v_u32m4 #define VIDV_UINT __riscv_vid_v_u32m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu
#define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VADDVX_UINT __riscv_vadd_vx_u32m4
#define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -116,11 +116,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


//index where element greater than v_max //index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx0, vl); mask = VMFLTVV_FLOAT(v_max, vx0, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);


//update v_max and start_index j //update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx0, vl);
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl);
} }
} }
else { else {
@@ -138,11 +138,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//index where element greater than v_max //index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx0, vl); mask = VMFLTVV_FLOAT(v_max, vx0, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
//update v_max and start_index j //update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx0, vl);
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl);
} }


} }


+ 54
- 40
kernel/riscv64/izamax_vector.c View File

@@ -31,58 +31,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(DOUBLE) #if defined(DOUBLE)


#define VSETVL(n) __riscv_vsetvl_e64m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n)
#define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T vfloat64m8_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmax_vs_f64m8_f64m1)(v_res, va, vb, gvl)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl)
#else
#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f64m8_f64m1)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_mu)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8)
#endif
#define MASK_T vbool8_t #define MASK_T vbool8_t
#define VMFLTVV_FLOAT __riscv_vmflt_vv_f64m8_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8
#define VMFIRSTM __riscv_vfirst_m_b8
#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f64m8_b8)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f64m8)
#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f64m8_b8)
#define VMFIRSTM RISCV_RVV(vfirst_m_b8)
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VSEVU_UINT __riscv_vse64_v_u64m8
#define VSEVU_UINT RISCV_RVV(vse64_v_u64m8)
#define UINT_T long unsigned int #define UINT_T long unsigned int
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFABS_FLOAT __riscv_vfabs_v_f64m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
#define VCOMPRESS __riscv_vcompress_vm_u64m8
#define VMV_X __riscv_vmv_x_s_u64m8_u64
#define VIDV_UINT RISCV_RVV(vid_v_u64m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m8)
#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f64m8)
#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64)
#else #else


#define VSETVL(n) __riscv_vsetvl_e32m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n)
#define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T vfloat32m8_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmax_vs_f32m8_f32m1)(v_res, va, vb, gvl)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_m)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl)
#else
#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f32m8_f32m1)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_mu)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8)
#endif
#define MASK_T vbool4_t #define MASK_T vbool4_t
#define VMFLTVV_FLOAT __riscv_vmflt_vv_f32m8_b4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4
#define VMFIRSTM __riscv_vfirst_m_b4
#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f32m8_b4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f32m8)
#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f32m8_b4)
#define VMFIRSTM RISCV_RVV(vfirst_m_b4)
#define UINT_V_T vuint32m8_t #define UINT_V_T vuint32m8_t
#define UINT_T unsigned int #define UINT_T unsigned int
#define VSEVU_UINT __riscv_vse32_v_u32m8
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFABS_FLOAT __riscv_vfabs_v_f32m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
#define VCOMPRESS __riscv_vcompress_vm_u32m8
#define VMV_X __riscv_vmv_x_s_u32m8_u32
#define VSEVU_UINT RISCV_RVV(vse32_v_u32m8)
#define VIDV_UINT RISCV_RVV(vid_v_u32m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m8)
#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f32m8)
#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32)
#endif #endif






+ 12
- 12
kernel/riscv64/izamin_rvv.c View File

@@ -43,13 +43,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
#define VFIRSTM __riscv_vfirst_m_b16 #define VFIRSTM __riscv_vfirst_m_b16
#define UINT_V_T vuint64m4_t #define UINT_V_T vuint64m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu
#define VIDV_UINT __riscv_vid_v_u64m4 #define VIDV_UINT __riscv_vid_v_u64m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu
#define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VADDVX_UINT __riscv_vadd_vx_u64m4
#define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -70,13 +70,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
#define VFIRSTM __riscv_vfirst_m_b8 #define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint32m4_t #define UINT_V_T vuint32m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu
#define VIDV_UINT __riscv_vid_v_u32m4 #define VIDV_UINT __riscv_vid_v_u32m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu
#define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VADDVX_UINT __riscv_vadd_vx_u32m4
#define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -113,11 +113,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


// index where element less than v_min // index where element less than v_min
mask = VMFLTVV_FLOAT(vx0, v_min, vl); mask = VMFLTVV_FLOAT(vx0, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);


//update v_min and start_index j //update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx0, vl);
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl);
} }


} else { } else {
@@ -136,11 +136,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


// index where element less than v_min // index where element less than v_min
mask = VMFLTVV_FLOAT(vx0, v_min, vl); mask = VMFLTVV_FLOAT(vx0, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);


//update v_min and start_index j //update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx0, vl);
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl);
} }


} }


+ 53
- 39
kernel/riscv64/izamin_vector.c View File

@@ -31,58 +31,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(DOUBLE) #if defined(DOUBLE)


#define VSETVL(n) __riscv_vsetvl_e64m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n)
#define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T vfloat64m8_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmin_vs_f64m8_f64m1)(v_res, va, vb, gvl)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl)
#else
#define VFREDMINVS_FLOAT RISCV_RVV(vfredmin_vs_f64m8_f64m1)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_mu)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8)
#endif
#define MASK_T vbool8_t #define MASK_T vbool8_t
#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f64m8_b8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8
#define VMFIRSTM __riscv_vfirst_m_b8
#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f64m8_b8)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f64m8)
#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f64m8_b8)
#define VMFIRSTM RISCV_RVV(vfirst_m_b8)
#define UINT_V_T vuint64m8_t #define UINT_V_T vuint64m8_t
#define VSEVU_UINT vse64_v_u64m8 #define VSEVU_UINT vse64_v_u64m8
#define UINT_T long unsigned int #define UINT_T long unsigned int
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFABS_FLOAT __riscv_vfabs_v_f64m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
#define VCOMPRESS __riscv_vcompress_vm_u64m8
#define VMV_X __riscv_vmv_x_s_u64m8_u64
#define VIDV_UINT RISCV_RVV(vid_v_u64m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m8)
#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f64m8)
#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64)
#else #else


#define VSETVL(n) __riscv_vsetvl_e32m8(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n)
#define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T vfloat32m8_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmin_vs_f32m8_f32m1)(v_res, va, vb, gvl)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_m)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_m)
#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl)
#else
#define VFREDMINVS_FLOAT RISCV_RVV(vfredmin_vs_f32m8_f32m1)
#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_mu)
#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_mu)
#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8)
#endif
#define MASK_T vbool4_t #define MASK_T vbool4_t
#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f32m8_b4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4
#define VMFIRSTM __riscv_vfirst_m_b4
#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f32m8_b4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f32m8)
#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f32m8_b4)
#define VMFIRSTM RISCV_RVV(vfirst_m_b4)
#define UINT_V_T vuint32m8_t #define UINT_V_T vuint32m8_t
#define UINT_T unsigned int #define UINT_T unsigned int
#define VSEVU_UINT __riscv_vse32_v_u32m8
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFABS_FLOAT __riscv_vfabs_v_f32m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
#define VCOMPRESS __riscv_vcompress_vm_u32m8
#define VMV_X __riscv_vmv_x_s_u32m8_u32
#define VSEVU_UINT RISCV_RVV(vse32_v_u32m8)
#define VIDV_UINT RISCV_RVV(vid_v_u32m8)
#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8)
#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8)
#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m8)
#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f32m8)
#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32)
#endif #endif






+ 4
- 4
kernel/riscv64/max_rvv.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m8(n) #define VSETVL(n) __riscv_vsetvl_e64m8(n)
@@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#endif #endif


@@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
vl = VSETVL(n); vl = VSETVL(n);


vx = VLEV_FLOAT(x, vl); vx = VLEV_FLOAT(x, vl);
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
} }


} else { } else {
@@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
vl = VSETVL(n); vl = VSETVL(n);


vx = VLSEV_FLOAT(x, stride_x, vl); vx = VLSEV_FLOAT(x, stride_x, vl);
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
} }


} }


+ 12
- 8
kernel/riscv64/max_vector.c View File

@@ -54,17 +54,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
#else
#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#endif
#define MASK_T JOIN(vbool, MLEN, _t, _, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _)
#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VFMAXVV_FLOAT JOIN(__riscv_vfmax, _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 4
- 4
kernel/riscv64/min_rvv.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m8(n) #define VSETVL(n) __riscv_vsetvl_e64m8(n)
@@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#endif #endif


@@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
vl = VSETVL(n); vl = VSETVL(n);


vx = VLEV_FLOAT(x, vl); vx = VLEV_FLOAT(x, vl);
vmin = VFMINVV_FLOAT(vmin, vx, vl);
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
} }


} else { } else {
@@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
vl = VSETVL(n); vl = VSETVL(n);


vx = VLSEV_FLOAT(x, stride_x, vl); vx = VLSEV_FLOAT(x, stride_x, vl);
vmin = VFMINVV_FLOAT(vmin, vx, vl);
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
} }


} }


+ 12
- 8
kernel/riscv64/min_vector.c View File

@@ -54,17 +54,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
#else
#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#endif
#define MASK_T JOIN(vbool, MLEN, _t, _, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _)
#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VFMINVV_FLOAT JOIN(__riscv_vfmin, _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 4
- 4
kernel/riscv64/nrm2_rvv.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -79,7 +79,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


v0 = VLEV_FLOAT(x, vl); v0 = VLEV_FLOAT(x, vl);


vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);
} }


} else { } else {
@@ -91,7 +91,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


v0 = VLSEV_FLOAT(x, stride_x, vl); v0 = VLSEV_FLOAT(x, stride_x, vl);


vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);
} }
} }




+ 34
- 30
kernel/riscv64/nrm2_vector.c View File

@@ -52,38 +52,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVSF_FLOAT JOIN(__riscv_vfmv, _s_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVSF_FLOAT JOIN(RISCV_RVV(vfmv), _s_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define MASK_T JOIN(vbool, MLEN, _t, _, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _)
#define VFABS JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _)
#define VMFNE JOIN(__riscv_vmfne_vf_f,ELEN, LMUL, _b, MLEN)
#define VMFGT JOIN(__riscv_vmfgt_vv_f,ELEN, LMUL, _b, MLEN)
#define VMFEQ JOIN(__riscv_vmfeq_vf_f,ELEN, LMUL, _b, MLEN)
#define VCPOP JOIN(__riscv_vcpop, _m_b, MLEN, _, _)
#define VFREDMAX JOIN(__riscv_vfredmax_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1)
#define VFREDMIN JOIN(__riscv_vfredmin_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1)
#define VFIRST JOIN(__riscv_vfirst, _m_b, MLEN, _, _)
#define VRGATHER JOIN(__riscv_vrgather, _vx_f, ELEN, LMUL, _)
#define VFDIV JOIN(__riscv_vfdiv, _vv_f, ELEN, LMUL, _)
#define VFDIV_M JOIN(__riscv_vfdiv, _vv_f, ELEN, LMUL, _mu)
#define VFMUL JOIN(__riscv_vfmul, _vv_f, ELEN, LMUL, _)
#define VFMUL_M JOIN(__riscv_vfmul, _vv_f, ELEN, LMUL, _mu)
#define VFMACC JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _)
#define VFMACC_M JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _mu)
#define VMSBF JOIN(__riscv_vmsbf, _m_b, MLEN, _, _)
#define VMSOF JOIN(__riscv_vmsof, _m_b, MLEN, _, _)
#define VMAND JOIN(__riscv_vmand, _mm_b, MLEN, _, _)
#define VMANDN JOIN(__riscv_vmandn, _mm_b, MLEN, _, _)
#define VFREDSUM JOIN(__riscv_vfredusum_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1)
#define VMERGE JOIN(__riscv_vmerge, _vvm_f, ELEN, LMUL, _)

#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL)
#define VFABS JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
#define VMFNE JOIN(RISCV_RVV(vmfne_vf_f),ELEN, LMUL, _b, MLEN)
#define VMFGT JOIN(RISCV_RVV(vmfgt_vv_f),ELEN, LMUL, _b, MLEN)
#define VMFEQ JOIN(RISCV_RVV(vmfeq_vf_f),ELEN, LMUL, _b, MLEN)
#define VCPOP JOIN(RISCV_RVV(vcpop), _m_b, MLEN, _, _)
#ifdef RISCV_0p10_INTRINSICS
#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _m)
#define VFMUL_M JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _m)
#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _m)
#define VMERGE(a, b, mask, gvl) JOIN(RISCV_RVV(vmerge), _vvm_f, ELEN, LMUL, _)(mask, a, b, gvl)
#else
#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _mu)
#define VFMUL_M JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _mu)
#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _mu)
#define VMERGE JOIN(RISCV_RVV(vmerge), _vvm_f, ELEN, LMUL, _)
#endif
#define VFIRST JOIN(RISCV_RVV(vfirst), _m_b, MLEN, _, _)
#define VRGATHER JOIN(RISCV_RVV(vrgather), _vx_f, ELEN, LMUL, _)
#define VFDIV JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _)
#define VFMUL JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _)
#define VFMACC JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _)
#define VMSBF JOIN(RISCV_RVV(vmsbf), _m_b, MLEN, _, _)
#define VMSOF JOIN(RISCV_RVV(vmsof), _m_b, MLEN, _, _)
#define VMAND JOIN(RISCV_RVV(vmand), _mm_b, MLEN, _, _)
#define VMANDN JOIN(RISCV_RVV(vmandn), _mm_b, MLEN, _, _)

#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)


#if defined(DOUBLE) #if defined(DOUBLE)
#define ABS fabs #define ABS fabs
@@ -91,7 +95,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ABS fabsf #define ABS fabsf
#endif #endif


#define EXTRACT_FLOAT0_V(v) JOIN(__riscv_vfmv_f_s_f, ELEN, LMUL, _f, ELEN)(v)
#define EXTRACT_FLOAT0_V(v) JOIN(RISCV_RVV(vfmv_f_s_f), ELEN, LMUL, _f, ELEN)(v)


//#define DUMP( label, v0, gvl ) //#define DUMP( label, v0, gvl )
#define DUMP( label, v0, gvl ) do{ FLOAT x[16]; VSEV_FLOAT( x, v0, gvl ); printf ("%s(%d): %s [ ", __FILE__, __LINE__, label); for( int xxx = 0; xxx < gvl; ++xxx ) { printf("%f, ", x[xxx]); } printf(" ]\n"); } while(0) #define DUMP( label, v0, gvl ) do{ FLOAT x[16]; VSEV_FLOAT( x, v0, gvl ); printf ("%s(%d): %s [ ", __FILE__, __LINE__, label); for( int xxx = 0; xxx < gvl; ++xxx ) { printf("%f, ", x[xxx]); } printf(" ]\n"); } while(0)


+ 18
- 18
kernel/riscv64/rot_vector.c View File

@@ -28,27 +28,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"


#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSEV_FLOAT __riscv_vse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m4
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSEV_FLOAT __riscv_vse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m4
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f64m4)
#endif #endif


int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)


+ 7
- 7
kernel/riscv64/scal_vector.c View File

@@ -52,14 +52,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMULVF_FLOAT JOIN(__riscv_vfmul, _vf_f, ELEN, LMUL, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMULVF_FLOAT JOIN(RISCV_RVV(vfmul), _vf_f, ELEN, LMUL, _)


int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{ {


+ 4
- 4
kernel/riscv64/sum_rvv.c View File

@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -73,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
vl = VSETVL(n); vl = VSETVL(n);


vx = VLEV_FLOAT(x, vl); vx = VLEV_FLOAT(x, vl);
vsum = VFADDVV_FLOAT(vsum, vx, vl);
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
} }


} else { } else {
@@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
vl = VSETVL(n); vl = VSETVL(n);


vx = VLSEV_FLOAT(x, stride_x, vl); vx = VLSEV_FLOAT(x, stride_x, vl);
vsum = VFADDVV_FLOAT(vsum, vx, vl);
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
} }


} }


+ 16
- 16
kernel/riscv64/sum_vector.c View File

@@ -29,27 +29,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <math.h> #include <math.h>


#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m8(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T vfloat32m8_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
#define VFREDSUMVS_FLOAT RISCV_RVV(vfredusum_vs_f32m8_f32m1)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f32m8)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T vfloat64m8_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8)
#define VFREDSUMVS_FLOAT RISCV_RVV(vfredusum_vs_f64m8_f64m1)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f64m8)
#endif #endif
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 5
- 5
kernel/riscv64/swap_vector.c View File

@@ -53,12 +53,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)


int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{ {


+ 6
- 6
kernel/riscv64/symv_L_rvv.c View File

@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VSEV_FLOAT __riscv_vse32_v_f32m8 #define VSEV_FLOAT __riscv_vse32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 #define VSSEV_FLOAT __riscv_vsse32_v_f32m8
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
@@ -56,7 +56,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VSEV_FLOAT __riscv_vse64_v_f64m8 #define VSEV_FLOAT __riscv_vse64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 #define VSSEV_FLOAT __riscv_vsse64_v_f64m8
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
@@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSEV_FLOAT(&y[i], vy, vl); VSEV_FLOAT(&y[i], vy, vl);


vx = VLEV_FLOAT(&x[i], vl); vx = VLEV_FLOAT(&x[i], vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);


} }
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
@@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); VSSEV_FLOAT(&y[iy], stride_y, vy, vl);


vx = VLEV_FLOAT(&x[i], vl); vx = VLEV_FLOAT(&x[i], vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);


iy += inc_yv; iy += inc_yv;
} }
@@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSEV_FLOAT(&y[i], vy, vl); VSEV_FLOAT(&y[i], vy, vl);


vx = VLSEV_FLOAT(&x[ix], stride_x, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);


ix += inc_xv; ix += inc_xv;
} }
@@ -201,7 +201,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); VSSEV_FLOAT(&y[iy], stride_y, vy, vl);


vx = VLSEV_FLOAT(&x[ix], stride_x, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);


ix += inc_xv; ix += inc_xv;
iy += inc_yv; iy += inc_yv;


+ 32
- 24
kernel/riscv64/symv_L_vector.c View File

@@ -27,35 +27,43 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSEV_FLOAT __riscv_vse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m4_f32m1(v_res, va, vb, gvl)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSEV_FLOAT __riscv_vse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m4_f64m1(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4)
#endif #endif


int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)


+ 6
- 6
kernel/riscv64/symv_U_rvv.c View File

@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VSEV_FLOAT __riscv_vse32_v_f32m8 #define VSEV_FLOAT __riscv_vse32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 #define VSSEV_FLOAT __riscv_vsse32_v_f32m8
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
@@ -57,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VSEV_FLOAT __riscv_vse64_v_f64m8 #define VSEV_FLOAT __riscv_vse64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 #define VSSEV_FLOAT __riscv_vsse64_v_f64m8
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
@@ -101,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSEV_FLOAT(&y[i], vy, vl); VSEV_FLOAT(&y[i], vy, vl);


vx = VLEV_FLOAT(&x[i], vl); vx = VLEV_FLOAT(&x[i], vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
} }
v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max); v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max);


@@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); VSSEV_FLOAT(&y[iy], stride_y, vy, vl);


vx = VLEV_FLOAT(&x[i], vl); vx = VLEV_FLOAT(&x[i], vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);


iy += inc_yv; iy += inc_yv;
} }
@@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSEV_FLOAT(&y[i], vy, vl); VSEV_FLOAT(&y[i], vy, vl);


vx = VLSEV_FLOAT(&x[ix], stride_x, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);


ix += inc_xv; ix += inc_xv;
} }
@@ -200,7 +200,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); VSSEV_FLOAT(&y[iy], stride_y, vy, vl);


vx = VLSEV_FLOAT(&x[ix], stride_x, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
ix += inc_xv; ix += inc_xv;
iy += inc_yv; iy += inc_yv;
} }


+ 34
- 26
kernel/riscv64/symv_U_vector.c View File

@@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSEV_FLOAT __riscv_vse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFDOTVV_FLOAT __riscv_vfdot_vv_f32m4
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m4_f32m1(v_res, va, vb, gvl)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f32m4)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSEV_FLOAT __riscv_vse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFDOTVV_FLOAT __riscv_vfdot_vv_f64m4
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m4_f64m1(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f64m4)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4)
#endif #endif


int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)


+ 4
- 4
kernel/riscv64/zamax_rvv.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1 #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1 #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v0 = VFADDVV_FLOAT(v0, v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl);
vmax = VFMAXVV_FLOAT(vmax, v0, vl);
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl);
} }


@@ -101,7 +101,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v0 = VFADDVV_FLOAT(v0, v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl);
vmax = VFMAXVV_FLOAT(vmax, v0, vl);
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl);
} }


} }


+ 15
- 10
kernel/riscv64/zamax_vector.c View File

@@ -53,19 +53,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl)
#else
#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m)
#endif
#define MASK_T JOIN(vbool, MLEN, _t, _, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _)
#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VFRSUBVF_MASK_FLOAT JOIN(__riscv_vfrsub,_vf_f, ELEN, LMUL, _m)
#define VFMAXVV_FLOAT JOIN(__riscv_vfmax, _vv_f, ELEN, LMUL, _)
#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _)
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 4
- 4
kernel/riscv64/zamin_rvv.c View File

@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m4_f32m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m4_f32m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m4_f64m1 #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m4_f64m1
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v0 = VFADDVV_FLOAT(v0, v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl);
vmin = VFMINVV_FLOAT(vmin, v0, vl);
vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl);
} }


} else { } else {
@@ -100,7 +100,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v0 = VFADDVV_FLOAT(v0, v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl);
vmin = VFMINVV_FLOAT(vmin, v0, vl);
vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl);
} }


} }


+ 15
- 10
kernel/riscv64/zamin_vector.c View File

@@ -55,19 +55,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMINVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl)
#else
#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m)
#endif
#define MASK_T JOIN(vbool, MLEN, _t, _, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _)
#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VFRSUBVF_MASK_FLOAT JOIN(__riscv_vfrsub,_vf_f, ELEN, LMUL, _m)
#define VFMINVV_FLOAT JOIN(__riscv_vfmin, _vv_f, ELEN, LMUL, _)
#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _)
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 6
- 6
kernel/riscv64/zasum_rvv.c View File

@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m8(n) #define VSETVL(n) __riscv_vsetvl_e64m8(n)
@@ -51,7 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8
#endif #endif


@@ -75,8 +75,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v0 = VFABSV_FLOAT(v0, vl); v0 = VFABSV_FLOAT(v0, vl);
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
} }


} }
@@ -93,8 +93,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v0 = VFABSV_FLOAT(v0, vl); v0 = VFABSV_FLOAT(v0, vl);
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
} }


} }


+ 13
- 9
kernel/riscv64/zasum_vector.c View File

@@ -53,17 +53,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(__riscv_vmflt, _vf_f, ELEN, LMUL, MLEN)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUMVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
#else
#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#endif
#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt), _vf_f, ELEN, LMUL, MLEN)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 16
- 16
kernel/riscv64/zaxpby_vector.c View File

@@ -28,25 +28,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"


#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f32m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f64m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
#endif #endif


int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i, FLOAT *y, BLASLONG inc_y) int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i, FLOAT *y, BLASLONG inc_y)


+ 10
- 10
kernel/riscv64/zaxpy_vector.c View File

@@ -28,19 +28,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"


#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
#endif #endif


int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)


+ 6
- 6
kernel/riscv64/zcopy_vector.c View File

@@ -27,15 +27,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#endif #endif






+ 28
- 28
kernel/riscv64/zdot_rvv.c View File

@@ -36,12 +36,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 #define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4 #define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n) #define VSETVL(n) __riscv_vsetvl_e64m4(n)
@@ -52,12 +52,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 #define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4 #define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#endif #endif


@@ -86,14 +86,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
VLSEG_FLOAT(&vx0, &vx1, x, vl); VLSEG_FLOAT(&vx0, &vx1, x, vl);
VLSEG_FLOAT(&vy0, &vy1, y, vl); VLSEG_FLOAT(&vy0, &vy1, y, vl);


vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
#if !defined(CONJ) #if !defined(CONJ)
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
#else #else
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
#endif #endif
} }


@@ -107,14 +107,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
VLSEG_FLOAT(&vx0, &vx1, x, vl); VLSEG_FLOAT(&vx0, &vx1, x, vl);
VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl); VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl);


vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
#if !defined(CONJ) #if !defined(CONJ)
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
#else #else
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
#endif #endif
} }
} else if (inc_y == 1){ } else if (inc_y == 1){
@@ -127,14 +127,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl); VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl);
VLSEG_FLOAT(&vy0, &vy1, y, vl); VLSEG_FLOAT(&vy0, &vy1, y, vl);


vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
#if !defined(CONJ) #if !defined(CONJ)
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
#else #else
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
#endif #endif
} }
}else { }else {
@@ -148,14 +148,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl); VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl);
VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl); VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl);


vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
#if !defined(CONJ) #if !defined(CONJ)
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
#else #else
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
#endif #endif
} }
} }


+ 34
- 26
kernel/riscv64/zdot_vector.c View File

@@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFDOTVV_FLOAT __riscv_vfdot_vv_f32m4
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32)
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f32m4)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4)
#define VFMSACVV_FLOAT RISCV_RVV(vfmsac_vv_f32m4)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFDOTVV_FLOAT __riscv_vfdot_vv_f64m4
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64)
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f64m4)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4)
#define VFMSACVV_FLOAT RISCV_RVV(vfmsac_vv_f64m4)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4)
#endif #endif


OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)


+ 14
- 14
kernel/riscv64/zgemv_n_vector.c View File

@@ -27,23 +27,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSEV_FLOAT __riscv_vse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSEV_FLOAT __riscv_vse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
#endif #endif


int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)


+ 20
- 20
kernel/riscv64/zgemv_t_rvv.c View File

@@ -35,8 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 #define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
@@ -49,8 +49,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 #define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
@@ -90,15 +90,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl); VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl);


#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
#else #else
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
#endif #endif
j += vl * 2; j += vl * 2;
ix += vl * inc_x * 2; ix += vl * inc_x * 2;
@@ -134,15 +134,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl); VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl);
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
#else #else
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
#endif #endif
j += vl * 2; j += vl * 2;
ix += vl * inc_x * 2; ix += vl * inc_x * 2;


+ 32
- 24
kernel/riscv64/zgemv_t_vector.c View File

@@ -27,31 +27,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m2(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m2)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m2_t #define FLOAT_V_T vfloat32m2_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32
#define VLSEV_FLOAT __riscv_vlse32_v_f32m2
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m2_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m2
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m2
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m2
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m2)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m2_f32m1)(vr, va, vb, gvl)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m2(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m2_f32m1)(va, vb, gvl)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m2)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m2)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m2)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m2)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m2)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m2_t #define FLOAT_V_T vfloat64m2_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64
#define VLSEV_FLOAT __riscv_vlse64_v_f64m2
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m2_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m2
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m2
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m2
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m2)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m2_f64m1)(vr, va, vb, gvl)
#else
#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m2_f64m1)(va, vb, gvl)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m2)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m2)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m2)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m2)
#endif #endif


int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
@@ -93,8 +101,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl); vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl);
vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl);
#endif #endif
v_res_r = VFREDSUM_FLOAT(vr, v_res_r, gvl);
v_res_i = VFREDSUM_FLOAT(vi, v_res_i, gvl);
v_res_r = VFREDSUM_FLOAT(v_res_r, vr, v_res_r, gvl);
v_res_i = VFREDSUM_FLOAT(v_res_i, vi, v_res_i, gvl);


j += inc_av; j += inc_av;
ix += inc_xv; ix += inc_xv;
@@ -117,8 +125,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl);


#endif #endif
v_res_r = VFREDSUM_FLOAT(vr, v_res_r, gvl);
v_res_i = VFREDSUM_FLOAT(vi, v_res_i, gvl);
v_res_r = VFREDSUM_FLOAT(v_res_r, vr, v_res_r, gvl);
v_res_i = VFREDSUM_FLOAT(v_res_i, vi, v_res_i, gvl);
} }


temp_r = VFMVFS_FLOAT(v_res_r); temp_r = VFMVFS_FLOAT(v_res_r);


+ 34
- 26
kernel/riscv64/zhemv_LM_vector.c View File

@@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4)
#endif #endif


int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){


+ 34
- 26
kernel/riscv64/zhemv_UV_vector.c View File

@@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define FLOAT_V_T_M1 vfloat32m1_t #define FLOAT_V_T_M1 vfloat32m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4)
#else
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define FLOAT_V_T_M1 vfloat64m1_t #define FLOAT_V_T_M1 vfloat64m1_t
#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl)
#else
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1)
#endif
#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1)
#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4)
#endif #endif


int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){


+ 12
- 12
kernel/riscv64/znrm2_rvv.c View File

@@ -36,10 +36,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1
#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f32m4_f32m1_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4
#else #else
@@ -51,10 +51,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1
#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f64m4_f64m1_tu
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4
#endif #endif
@@ -85,11 +85,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v0 = VFABSV_FLOAT(v0, vl); v0 = VFABSV_FLOAT(v0, vl);
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v_max = VFREDMAXVS_FLOAT(v0, v_max, vl);
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl);
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);


v_max = VFREDMAXVS_FLOAT(v1, v_max, vl);
vr = VFMACCVV_FLOAT(vr, v1, v1, vl);
v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl);
vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl);
} }


} else { } else {
@@ -103,11 +103,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v0 = VFABSV_FLOAT(v0, vl); v0 = VFABSV_FLOAT(v0, vl);
v1 = VFABSV_FLOAT(v1, vl); v1 = VFABSV_FLOAT(v1, vl);


v_max = VFREDMAXVS_FLOAT(v0, v_max, vl);
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl);
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);


v_max = VFREDMAXVS_FLOAT(v1, v_max, vl);
vr = VFMACCVV_FLOAT(vr, v1, v1, vl);
v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl);
vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl);
} }


} }


+ 29
- 22
kernel/riscv64/znrm2_vector.c View File

@@ -52,37 +52,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define MASK_T JOIN(vbool, MLEN, _t, _, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _)
#define VFABS JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _)
#define VMFNE JOIN(__riscv_vmfne_vf_f,ELEN, LMUL, _b, MLEN)
#define VMFGT JOIN(__riscv_vmfgt_vv_f,ELEN, LMUL, _b, MLEN)
#define VMFEQ JOIN(__riscv_vmfeq_vv_f,ELEN, LMUL, _b, MLEN)
#define VCPOP JOIN(__riscv_vcpop, _m_b, MLEN, _, _)
#define VFREDMAX JOIN(__riscv_vfredmax_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1)
#define VFIRST JOIN(__riscv_vfirst, _m_b, MLEN, _, _)
#define VRGATHER JOIN(__riscv_vrgather, _vx_f, ELEN, LMUL, _)
#define VFDIV JOIN(__riscv_vfdiv, _vf_f, ELEN, LMUL, _)
#define VFDIV_M JOIN(__riscv_vfdiv, _vv_f, ELEN, LMUL, _mu)
#define VFMUL JOIN(__riscv_vfmul, _vv_f, ELEN, LMUL, _)
#define VFMACC JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _)
#define VFMACC_M JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _mu)
#define VMSOF JOIN(__riscv_vmsof, _m_b, MLEN, _, _)
#define VMANDN JOIN(__riscv_vmandn, _mm_b, MLEN, _, _)
#define VFREDUSUM JOIN(__riscv_vfredusum_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1)
#define VFABS JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
#define VMFNE JOIN(RISCV_RVV(vmfne_vf_f),ELEN, LMUL, _b, MLEN)
#define VMFGT JOIN(RISCV_RVV(vmfgt_vv_f),ELEN, LMUL, _b, MLEN)
#define VMFEQ JOIN(RISCV_RVV(vmfeq_vv_f),ELEN, LMUL, _b, MLEN)
#define VCPOP JOIN(RISCV_RVV(vcpop), _m_b, MLEN, _, _)
#ifdef RISCV_0p10_INTRINSICS
#define VFREDMAX(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)(v_res, va, vb, gvl)
#define VFREDUSUM(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)(v_res, va, vb, gvl)
#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _m)
#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _m)
#else
#define VFREDMAX JOIN(RISCV_RVV(vfredmax_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)
#define VFREDUSUM JOIN(RISCV_RVV(vfredusum_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)
#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _mu)
#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _mu)
#endif
#define VFIRST JOIN(RISCV_RVV(vfirst), _m_b, MLEN, _, _)
#define VRGATHER JOIN(RISCV_RVV(vrgather), _vx_f, ELEN, LMUL, _)
#define VFDIV JOIN(RISCV_RVV(vfdiv), _vf_f, ELEN, LMUL, _)
#define VFMUL JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _)
#define VFMACC JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _)
#define VMSOF JOIN(RISCV_RVV(vmsof), _m_b, MLEN, _, _)
#define VMANDN JOIN(RISCV_RVV(vmandn), _mm_b, MLEN, _, _)
#if defined(DOUBLE) #if defined(DOUBLE)
#define ABS fabs #define ABS fabs
#else #else
#define ABS fabsf #define ABS fabsf
#endif #endif


#define EXTRACT_FLOAT0_V(v) JOIN(__riscv_vfmv_f_s_f, ELEN, LMUL, _f, ELEN)(v)
#define EXTRACT_FLOAT0_V(v) JOIN(RISCV_RVV(vfmv_f_s_f), ELEN, LMUL, _f, ELEN)(v)




FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


+ 18
- 18
kernel/riscv64/zrot_vector.c View File

@@ -27,27 +27,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLEV_FLOAT __riscv_vle32_v_f32m4
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSEV_FLOAT __riscv_vse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLEV_FLOAT __riscv_vle64_v_f64m4
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSEV_FLOAT __riscv_vse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
#endif #endif


int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)


+ 16
- 16
kernel/riscv64/zscal_vector.c View File

@@ -27,25 +27,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"
#if !defined(DOUBLE) #if !defined(DOUBLE)
#define VSETVL(n) __riscv_vsetvl_e32m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e32m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)()
#define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T vfloat32m4_t
#define VLSEV_FLOAT __riscv_vlse32_v_f32m4
#define VSSEV_FLOAT __riscv_vsse32_v_f32m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m1()
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)()
#define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T vfloat64m4_t
#define VLSEV_FLOAT __riscv_vlse64_v_f64m4
#define VSSEV_FLOAT __riscv_vsse64_v_f64m4
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
#endif #endif


int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)


+ 6
- 6
kernel/riscv64/zsum_rvv.c View File

@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m4_tu
#else #else
#define VSETVL(n) __riscv_vsetvl_e64m4(n) #define VSETVL(n) __riscv_vsetvl_e64m4(n)
#define VSETVL_MAX __riscv_vsetvlmax_e64m4() #define VSETVL_MAX __riscv_vsetvlmax_e64m4()
@@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m4_tu
#endif #endif


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
@@ -69,8 +69,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


VLSEG_FLOAT(&v0, &v1, x, vl); VLSEG_FLOAT(&v0, &v1, x, vl);


v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
} }


} else { } else {
@@ -82,8 +82,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)


VLSSEG_FLOAT(&v0, &v1, x, stride_x, vl); VLSSEG_FLOAT(&v0, &v1, x, stride_x, vl);


v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
} }


} }


+ 8
- 8
kernel/riscv64/zsum_vector.c View File

@@ -53,16 +53,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(__riscv_vmflt, _vf_f, ELEN, LMUL, MLEN)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt), _vf_f, ELEN, LMUL, MLEN)


FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{ {


+ 5
- 5
kernel/riscv64/zswap_vector.c View File

@@ -53,12 +53,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define JOIN2(x, y) JOIN2_X(x, y) #define JOIN2(x, y) JOIN2_X(x, y)
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)


#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL)
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)


int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{ {


Loading…
Cancel
Save