Fix BLAS and LAPACK tests for C910V and RISCV64_ZVL256B targetstags/v0.3.27
@@ -59,6 +59,10 @@ ifeq ($(TARGET), x280) | |||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d | |||
endif | |||
ifeq ($(TARGET), RISCV64_ZVL256B) | |||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d | |||
endif | |||
ifeq ($(TARGET), RISCV64_ZVL128B) | |||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d | |||
endif | |||
@@ -6,6 +6,10 @@ ifeq ($(CORE), x280) | |||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math | |||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static | |||
endif | |||
ifeq ($(CORE), RISCV64_ZVL256B) | |||
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d | |||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static | |||
endif | |||
ifeq ($(CORE), RISCV64_ZVL128B) | |||
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d | |||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static | |||
@@ -122,6 +122,7 @@ RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54) | |||
RISCV64_ZVL128B | |||
C910V | |||
x280 | |||
RISCV64_ZVL256B | |||
11.LOONGARCH64: | |||
LOONGSONGENERIC | |||
@@ -91,7 +91,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||
#define BUFFER_SIZE ( 32 << 20) | |||
#define SEEK_ADDRESS | |||
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B) | |||
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B) || defined(x280) | |||
# include <riscv_vector.h> | |||
#endif | |||
@@ -1691,6 +1691,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CORENAME "x280" | |||
#else | |||
#endif | |||
#ifdef FORCE_RISCV64_ZVL256B | |||
#define FORCE | |||
#define ARCHITECTURE "RISCV64" | |||
#define SUBARCHITECTURE "RISCV64_ZVL256B" | |||
#define SUBDIRNAME "riscv64" | |||
#define ARCHCONFIG "-DRISCV64_ZVL256B " \ | |||
"-DL1_DATA_SIZE=64536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
#define LIBNAME "riscv64_zvl256b" | |||
#define CORENAME "RISCV64_ZVL256B" | |||
#endif | |||
#ifdef FORCE_RISCV64_ZVL128B | |||
#define FORCE | |||
#define ARCHITECTURE "RISCV64" | |||
@@ -59,6 +59,7 @@ SDOTKERNEL = dot_vector.c | |||
DDOTKERNEL = dot_vector.c | |||
CDOTKERNEL = zdot_vector.c | |||
ZDOTKERNEL = zdot_vector.c | |||
DSDOTKERNEL = dsdot_vector.c | |||
SNRM2KERNEL = nrm2_vector.c | |||
DNRM2KERNEL = nrm2_vector.c | |||
@@ -31,15 +31,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
# define LMUL m2 | |||
# if defined(DOUBLE) | |||
# define ELEN 64 | |||
# define ABS fabs | |||
# else | |||
# define ELEN 32 | |||
# define ABS fabsf | |||
# endif | |||
#else | |||
# define LMUL m8 | |||
# if defined(DOUBLE) | |||
# define ELEN 64 | |||
# define ABS fabs | |||
# else | |||
# define ELEN 32 | |||
# define ABS fabsf | |||
# endif | |||
#endif | |||
@@ -69,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
FLOAT minf=0.0; | |||
if (n <= 0 || inc_x <= 0) return(minf); | |||
minf = *x; | |||
minf = ABS(*x); | |||
x += inc_x; | |||
--n; | |||
if (n == 0) return(minf); | |||
@@ -67,7 +67,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
{ | |||
BLASLONG i=0, j=0; | |||
BLASLONG ix=0; | |||
FLOAT asumf=0.0; | |||
if (n <= 0 || inc_x <= 0) return(asumf); | |||
unsigned int gvl = 0; | |||
@@ -103,17 +102,15 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
if(gvl <= n/2){ | |||
v_sum = VFMVVF_FLOAT(0, gvl); | |||
BLASLONG inc_xv = inc_x * gvl; | |||
for(i=0,j=0; i<n/(gvl*2); i++){ | |||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
v0 = VFABS_FLOAT(v0, gvl); | |||
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl); | |||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
v1 = VLSEV_FLOAT(&x[(j+gvl)*inc_x], stride_x, gvl); | |||
v1 = VFABS_FLOAT(v1, gvl); | |||
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl); | |||
j += gvl * 2; | |||
inc_xv += inc_xv * 2; | |||
} | |||
v_res = VFREDSUMVS_FLOAT(v_sum, v_res, gvl); | |||
} | |||
@@ -60,7 +60,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) | |||
{ | |||
if (n < 0) return(0); | |||
if (n <= 0) return(0); | |||
BLASLONG i=0, j=0; | |||
unsigned int gvl = 0; | |||
@@ -196,7 +196,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL | |||
asm volatile( | |||
"vsetvli zero, zero, e64,m1 \n\t" | |||
"fmv.w.x ft11, zero \n\t" | |||
"fmv.d.x ft11, zero \n\t" | |||
"mv t0, %[BK] \n\t" | |||
"vfmv.v.f v16, ft11 \n\t" | |||
@@ -0,0 +1,152 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2023, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "common.h" | |||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
{ | |||
BLASLONG i=0, j=0; | |||
double dot = 0.0 ; | |||
if ( n < 1 ) return(dot); | |||
vfloat64m4_t vr; | |||
vfloat32m2_t vx, vy; | |||
unsigned int gvl = 0; | |||
vfloat64m1_t v_res, v_z0; | |||
gvl = vsetvlmax_e64m1(); | |||
v_res = vfmv_v_f_f64m1(0, gvl); | |||
v_z0 = vfmv_v_f_f64m1(0, gvl); | |||
if(inc_x == 1 && inc_y == 1){ | |||
gvl = vsetvl_e64m4(n); | |||
vr = vfmv_v_f_f64m4(0, gvl); | |||
for(i=0,j=0; i<n/gvl; i++){ | |||
vx = vle32_v_f32m2(&x[j], gvl); | |||
vy = vle32_v_f32m2(&y[j], gvl); | |||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl); | |||
j += gvl; | |||
} | |||
if(j > 0){ | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
//tail | |||
if(j < n){ | |||
gvl = vsetvl_e64m4(n-j); | |||
vx = vle32_v_f32m2(&x[j], gvl); | |||
vy = vle32_v_f32m2(&y[j], gvl); | |||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); | |||
//vr = vfdot_vv_f32m2(vx, vy, gvl); | |||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
}else if(inc_y == 1){ | |||
gvl = vsetvl_e64m4(n); | |||
vr = vfmv_v_f_f64m4(0, gvl); | |||
int stride_x = inc_x * sizeof(FLOAT); | |||
for(i=0,j=0; i<n/gvl; i++){ | |||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl); | |||
vy = vle32_v_f32m2(&y[j], gvl); | |||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl); | |||
j += gvl; | |||
} | |||
if(j > 0){ | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
//tail | |||
if(j < n){ | |||
gvl = vsetvl_e64m4(n-j); | |||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl); | |||
vy = vle32_v_f32m2(&y[j], gvl); | |||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); | |||
//vr = vfdot_vv_f32m2(vx, vy, gvl); | |||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
}else if(inc_x == 1){ | |||
gvl = vsetvl_e64m4(n); | |||
vr = vfmv_v_f_f64m4(0, gvl); | |||
int stride_y = inc_y * sizeof(FLOAT); | |||
for(i=0,j=0; i<n/gvl; i++){ | |||
vx = vle32_v_f32m2(&x[j], gvl); | |||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl); | |||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl); | |||
j += gvl; | |||
} | |||
if(j > 0){ | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
//tail | |||
if(j < n){ | |||
gvl = vsetvl_e64m4(n-j); | |||
vx = vle32_v_f32m2(&x[j], gvl); | |||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl); | |||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); | |||
//vr = vfdot_vv_f32m2(vx, vy, gvl); | |||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
}else{ | |||
gvl = vsetvl_e64m4(n); | |||
vr = vfmv_v_f_f64m4(0, gvl); | |||
int stride_x = inc_x * sizeof(FLOAT); | |||
int stride_y = inc_y * sizeof(FLOAT); | |||
for(i=0,j=0; i<n/gvl; i++){ | |||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl); | |||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl); | |||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl); | |||
j += gvl; | |||
} | |||
if(j > 0){ | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
//tail | |||
if(j < n){ | |||
gvl = vsetvl_e64m4(n-j); | |||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl); | |||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl); | |||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); | |||
//vr = vfdot_vv_f32m2(vx, vy, gvl); | |||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); | |||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); | |||
dot += (double)vfmv_f_s_f64m1_f64(v_res); | |||
} | |||
} | |||
return(dot); | |||
} |
@@ -139,7 +139,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl); | |||
FLOAT cur_minf = EXTRACT_FLOAT(v_res); | |||
if(cur_minf > minf){ | |||
if(cur_minf < minf){ | |||
//tail index | |||
v_min_index = VIDV_UINT(gvl); | |||
v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
@@ -185,7 +185,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl); | |||
FLOAT cur_minf = EXTRACT_FLOAT(v_res); | |||
if(cur_minf > minf){ | |||
if(cur_minf < minf){ | |||
//tail index | |||
v_min_index = VIDV_UINT(gvl); | |||
v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
@@ -156,7 +156,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl); | |||
FLOAT cur_minf = EXTRACT_FLOAT(v_res); | |||
if(cur_minf > minf){ | |||
if(cur_minf < minf){ | |||
//tail index | |||
v_min_index = VIDV_UINT(gvl); | |||
v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
@@ -104,7 +104,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
{ | |||
BLASLONG i=0; | |||
if(n <= 0) return(0.0); | |||
if (n <= 0 || inc_x <= 0) return(0.0); | |||
if(n == 1) return (ABS(x[0])); | |||
unsigned int gvl = 0; | |||
@@ -61,7 +61,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
BLASLONG i=0, j=0; | |||
double len = 0.0 ; | |||
if ( n < 0 ) return(0.0); | |||
if ( n <= 0 ) return(0.0); | |||
if(n == 1) return (ABS(x[0])); | |||
FLOAT_V_T vr, v0, v1; | |||
@@ -67,7 +67,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, | |||
BLASLONG stride_x, stride_y; | |||
FLOAT_V_T vx0, vx1, vy0, vy1; | |||
if (n < 0) return(0); | |||
if (n <= 0) return(0); | |||
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1); | |||
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; } | |||
@@ -60,17 +60,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) | |||
#ifdef RISCV_0p10_INTRINSICS | |||
#define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl) | |||
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl) | |||
#else | |||
#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) | |||
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) | |||
#endif | |||
#define MASK_T JOIN(vbool, MLEN, _t, _, _) | |||
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) | |||
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) | |||
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) | |||
#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _) | |||
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) | |||
#define VFABSV_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) | |||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
{ | |||
@@ -91,10 +89,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
for(; i<n/gvl; i++){ | |||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl); | |||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl); | |||
v0 = VFABSV_FLOAT(v0, gvl); | |||
v1 = VFABSV_FLOAT(v1, gvl); | |||
v0 = VFADDVV_FLOAT(v0, v1, gvl); | |||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl); | |||
@@ -108,10 +105,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
gvl = VSETVL(n-j); | |||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl); | |||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl); | |||
v0 = VFABSV_FLOAT(v0, gvl); | |||
v1 = VFABSV_FLOAT(v1, gvl); | |||
v1 = VFADDVV_FLOAT(v0, v1, gvl); | |||
v_res = VFREDMAXVS_FLOAT(v1, v_res, gvl); | |||
} | |||
@@ -62,17 +62,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) | |||
#ifdef RISCV_0p10_INTRINSICS | |||
#define VFREDMINVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl) | |||
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl) | |||
#else | |||
#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) | |||
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) | |||
#endif | |||
#define MASK_T JOIN(vbool, MLEN, _t, _, _) | |||
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) | |||
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) | |||
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) | |||
#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _) | |||
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) | |||
#define VFABSV_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) | |||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
{ | |||
@@ -93,10 +91,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
for(; i<n/gvl; i++){ | |||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl); | |||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl); | |||
v0 = VFABSV_FLOAT(v0, gvl); | |||
v1 = VFABSV_FLOAT(v1, gvl); | |||
v0 = VFADDVV_FLOAT(v0, v1, gvl); | |||
v_min = VFMINVV_FLOAT(v_min, v0, gvl); | |||
@@ -110,10 +107,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
gvl = VSETVL(n-j); | |||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl); | |||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl); | |||
v0 = VFABSV_FLOAT(v0, gvl); | |||
v1 = VFABSV_FLOAT(v1, gvl); | |||
v1 = VFADDVV_FLOAT(v0, v1, gvl); | |||
v_res = VFREDMINVS_FLOAT(v1, v_res, gvl); | |||
} | |||
@@ -96,7 +96,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
{ | |||
BLASLONG i=0; | |||
if(n < 0) return(0.0); | |||
if (n <= 0 || inc_x <= 0) return(0.0); | |||
FLOAT_V_T v_ssq, v_scale, v0, v1, v_zero; | |||
unsigned int gvl = 0; | |||
@@ -69,7 +69,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm | |||
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1); | |||
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; } | |||
if (n < 0) return(0); | |||
if (n <= 0) return(0); | |||
if(inc_x == 1 && inc_y == 1){ | |||
BLASLONG n2 = n * 2; | |||
if(gvl <= n2/2){ | |||