During the last iteration of some RVV operations, accumulators can get overwritten when VL < VLMAX and tail policy is agnostic. Commit changes intrinsics tail policy to undistrubed.tags/v0.3.27
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8 | #define VLEV_FLOAT __riscv_vle32_v_f32m8 | ||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | ||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 | #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu | |||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 | ||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 | #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 | ||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 | #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 | ||||
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8 | #define VLEV_FLOAT __riscv_vle64_v_f64m8 | ||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | ||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 | #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu | |||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 | ||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 | #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 | ||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 | #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 | ||||
@@ -79,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
va = VLEV_FLOAT(a_ptr, vl); | va = VLEV_FLOAT(a_ptr, vl); | ||||
vx = VLEV_FLOAT(x_ptr, vl); | vx = VLEV_FLOAT(x_ptr, vl); | ||||
vr = VFMACCVV_FLOAT(vr, va, vx, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl); | |||||
} | } | ||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); | v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); | ||||
@@ -103,7 +103,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
va = VLEV_FLOAT(a_ptr, vl); | va = VLEV_FLOAT(a_ptr, vl); | ||||
vx = VLSEV_FLOAT(x_ptr, stride_x, vl); | vx = VLSEV_FLOAT(x_ptr, stride_x, vl); | ||||
vr = VFMACCVV_FLOAT(vr, va, vx, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl); | |||||
} | } | ||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); | v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); | ||||
@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8 | #define VSEV_FLOAT __riscv_vse32_v_f32m8 | ||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | ||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 | #define VSSEV_FLOAT __riscv_vsse32_v_f32m8 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu | |||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 | #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 | ||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 | #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 | ||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 | #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 | ||||
@@ -56,7 +56,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8 | #define VSEV_FLOAT __riscv_vse64_v_f64m8 | ||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | ||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 | #define VSSEV_FLOAT __riscv_vsse64_v_f64m8 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu | |||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 | #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 | ||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 | #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 | ||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 | #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 | ||||
@@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSEV_FLOAT(&y[i], vy, vl); | VSEV_FLOAT(&y[i], vy, vl); | ||||
vx = VLEV_FLOAT(&x[i], vl); | vx = VLEV_FLOAT(&x[i], vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
} | } | ||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); | v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); | ||||
@@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | ||||
vx = VLEV_FLOAT(&x[i], vl); | vx = VLEV_FLOAT(&x[i], vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
iy += inc_yv; | iy += inc_yv; | ||||
} | } | ||||
@@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSEV_FLOAT(&y[i], vy, vl); | VSEV_FLOAT(&y[i], vy, vl); | ||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
ix += inc_xv; | ix += inc_xv; | ||||
} | } | ||||
@@ -201,7 +201,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | ||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
ix += inc_xv; | ix += inc_xv; | ||||
iy += inc_yv; | iy += inc_yv; | ||||
@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8 | #define VSEV_FLOAT __riscv_vse32_v_f32m8 | ||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | ||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 | #define VSSEV_FLOAT __riscv_vsse32_v_f32m8 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu | |||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 | #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 | ||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 | #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 | ||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 | #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 | ||||
@@ -57,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8 | #define VSEV_FLOAT __riscv_vse64_v_f64m8 | ||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | ||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 | #define VSSEV_FLOAT __riscv_vsse64_v_f64m8 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu | |||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 | #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 | ||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 | #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 | ||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 | #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 | ||||
@@ -101,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSEV_FLOAT(&y[i], vy, vl); | VSEV_FLOAT(&y[i], vy, vl); | ||||
vx = VLEV_FLOAT(&x[i], vl); | vx = VLEV_FLOAT(&x[i], vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
} | } | ||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max); | v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max); | ||||
@@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | ||||
vx = VLEV_FLOAT(&x[i], vl); | vx = VLEV_FLOAT(&x[i], vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
iy += inc_yv; | iy += inc_yv; | ||||
} | } | ||||
@@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSEV_FLOAT(&y[i], vy, vl); | VSEV_FLOAT(&y[i], vy, vl); | ||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
ix += inc_xv; | ix += inc_xv; | ||||
} | } | ||||
@@ -200,7 +200,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA | |||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | VSSEV_FLOAT(&y[iy], stride_y, vy, vl); | ||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | vx = VLSEV_FLOAT(&x[ix], stride_x, vl); | ||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); | |||||
ix += inc_xv; | ix += inc_xv; | ||||
iy += inc_yv; | iy += inc_yv; | ||||
} | } | ||||
@@ -35,8 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 | #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 | ||||
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 | #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 | ||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 | #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 | |||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu | |||||
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu | |||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 | ||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 | #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 | ||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 | #define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 | ||||
@@ -49,8 +49,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 | #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 | ||||
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 | #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 | ||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 | #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 | ||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 | |||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 | |||||
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu | |||||
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu | |||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 | #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 | ||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 | #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 | ||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 | #define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 | ||||
@@ -90,15 +90,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, | |||||
VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl); | VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl); | ||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | ||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); | |||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); | |||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); | |||||
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); | |||||
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl); | |||||
#else | #else | ||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); | |||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); | |||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); | |||||
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl); | |||||
#endif | #endif | ||||
j += vl * 2; | j += vl * 2; | ||||
ix += vl * inc_x * 2; | ix += vl * inc_x * 2; | ||||
@@ -134,15 +134,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, | |||||
VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl); | VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl); | ||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | ||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); | |||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); | |||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); | |||||
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); | |||||
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl); | |||||
#else | #else | ||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); | |||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); | |||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); | |||||
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl); | |||||
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); | |||||
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl); | |||||
#endif | #endif | ||||
j += vl * 2; | j += vl * 2; | ||||
ix += vl * inc_x * 2; | ix += vl * inc_x * 2; | ||||