Merge branch 'develop' into dev/slewis/merge-from-riscv

1 year ago · 3ffd6868d7
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
--- a/cblas.h
+++ b/cblas.h
@@ -101,6 +101,16 @@ CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPE
 CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void  *x, OPENBLAS_CONST blasint incx);
 CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);

 float cblas_samax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float  *x, OPENBLAS_CONST blasint incx);
 double cblas_damax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
 float cblas_scamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void  *x, OPENBLAS_CONST blasint incx);
 double cblas_dzamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);

 float cblas_samin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float  *x, OPENBLAS_CONST blasint incx);
 double cblas_damin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
 float cblas_scamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void  *x, OPENBLAS_CONST blasint incx);
 double cblas_dzamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);

 CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float  *x, OPENBLAS_CONST blasint incx);
 CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
 CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void  *x, OPENBLAS_CONST blasint incx);
@@ -116,6 +126,9 @@ void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS
 void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
 void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);

 void cblas_caxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
 void cblas_zaxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);

 void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
 void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
 void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
--- a/interface/CMakeLists.txt
+++ b/interface/CMakeLists.txt
@@ -130,6 +130,8 @@ endif ()
 foreach (float_type ${FLOAT_TYPES})

  if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
    GenerateNamedObjects("zaxpy.c" "" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type})

    GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
    GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})
    GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type})
--- a/interface/Makefile
+++ b/interface/Makefile
@@ -270,7 +270,8 @@ CSBLAS1OBJS   = \
 	cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \
 	cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \
 	cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) \
 	cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX)
 	cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX) cblas_samax.$(SUFFIX) \
 	cblas_samin.$(SUFFIX)

 CSBLAS2OBJS   = \
 	cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \
@@ -295,7 +296,8 @@ CDBLAS1OBJS   = \
 	cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \
 	cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \
 	cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) \
 	cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX)
 	cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX) cblas_damax.$(SUFFIX) \
 	cblas_damin.$(SUFFIX)

 CDBLAS2OBJS   = \
 	cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \
@@ -315,7 +317,7 @@ CCBLAS1OBJS   = \
 	cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \
 	cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \
 	cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \
 	cblas_caxpby.$(SUFFIX) \
 	cblas_caxpby.$(SUFFIX) cblas_scamax.$(SUFFIX) cblas_caxpyc.$(SUFFIX) cblas_scamin.$(SUFFIX) \
 	cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) cblas_crotg.$(SUFFIX)

 CCBLAS2OBJS   = \
@@ -340,12 +342,12 @@ CXERBLAOBJ = \

 CZBLAS1OBJS   = \
 	cblas_izamax.$(SUFFIX) cblas_izamin.$(SUFFIX) cblas_dzasum.$(SUFFIX)  cblas_zaxpy.$(SUFFIX) \
 	cblas_zcopy.$(SUFFIX) \
 	cblas_zcopy.$(SUFFIX) cblas_dzamax.$(SUFFIX) cblas_dzamin.$(SUFFIX) \
 	cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \
 	cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \
 	cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \
 	cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \
 	cblas_zaxpby.$(SUFFIX) \
 	cblas_zaxpby.$(SUFFIX) cblas_zaxpyc.$(SUFFIX) \
 	cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) cblas_zrotg.$(SUFFIX)


@@ -1533,6 +1535,30 @@ cblas_icmin.$(SUFFIX) cblas_icmin.$(PSUFFIX) : imax.c
 cblas_izmin.$(SUFFIX) cblas_izmin.$(PSUFFIX) : imax.c
 	$(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F)

 cblas_samax.$(SUFFIX) cblas_samax.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)

 cblas_damax.$(SUFFIX) cblas_damax.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)

 cblas_scamax.$(SUFFIX) cblas_scamax.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)

 cblas_dzamax.$(SUFFIX) cblas_dzamax.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)

 cblas_samin.$(SUFFIX) cblas_samin.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)

 cblas_damin.$(SUFFIX) cblas_damin.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)

 cblas_scamin.$(SUFFIX) cblas_scamin.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)

 cblas_dzamin.$(SUFFIX) cblas_dzamin.$(PSUFFIX) : max.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)

 cblas_sasum.$(SUFFIX) cblas_sasum.$(PSUFFIX) : asum.c
 	$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)

@@ -1627,6 +1653,19 @@ cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c
 cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
 	$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)

 cblas_caxpyc.$(SUFFIX) cblas_caxpyc.$(PSUFFIX) : zaxpy.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)

 cblas_zaxpyc.$(SUFFIX) cblas_zaxpyc.$(PSUFFIX) : zaxpy.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)

 cblas_xaxpyc.$(SUFFIX) cblas_xaxpyc.$(PSUFFIX) : zaxpy.c
 	$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)

 sscal.$(SUFFIX) sscal.$(PSUFFIX) : scal.c
 	$(CC) $(CFLAGS) -c $< -o $(@F)

 dscal.$(SUFFIX) dscal.$(PSUFFIX) : scal.c
 cblas_zaxpy.$(SUFFIX) cblas_zaxpy.$(PSUFFIX) : zaxpy.c
 	$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)

--- a/interface/max.c
+++ b/interface/max.c
@@ -145,8 +145,13 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){

 #else

 #ifdef COMPLEX
 FLOAT CNAME(blasint n, void *vx, blasint incx){
  FLOAT *x = (FLOAT*) vx;
 #else
 FLOAT CNAME(blasint n, FLOAT *x, blasint incx){

 #endif
  
  FLOAT ret;

  PRINT_DEBUG_CNAME;
--- a/kernel/loongarch64/KERNEL.LOONGSON2K1000
+++ b/kernel/loongarch64/KERNEL.LOONGSON2K1000
@@ -14,10 +14,12 @@ ZSCALKERNEL  = cscal_lsx.S
 SAMAXKERNEL =  amax_lsx.S
 DAMAXKERNEL =  amax_lsx.S
 CAMAXKERNEL =  camax_lsx.S
 ZAMAXKERNEL =  camax_lsx.S

 SAMINKERNEL =  amin_lsx.S
 DAMINKERNEL =  amin_lsx.S
 CAMINKERNEL =  camin_lsx.S
 ZAMINKERNEL =  camin_lsx.S

 SMAXKERNEL  =  max_lsx.S
 DMAXKERNEL  =  max_lsx.S
--- a/kernel/loongarch64/KERNEL.LOONGSON3R5
+++ b/kernel/loongarch64/KERNEL.LOONGSON3R5
@@ -14,10 +14,12 @@ ZSCALKERNEL  = cscal_lasx.S
 SAMAXKERNEL =  amax_lasx.S
 DAMAXKERNEL =  amax_lasx.S
 CAMAXKERNEL =  camax_lasx.S
 ZAMAXKERNEL =  camax_lasx.S

 SAMINKERNEL =  amin_lasx.S
 DAMINKERNEL =  amin_lasx.S
 CAMINKERNEL =  camin_lasx.S
 ZAMINKERNEL =  camin_lasx.S

 SMAXKERNEL  =  max_lsx.S
 DMAXKERNEL =   max_lsx.S
--- a/kernel/loongarch64/amin_lasx.S
+++ b/kernel/loongarch64/amin_lasx.S
@@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #else
    xvldrepl.w VM0, X, 0
 #endif
    XVFSUB VM0, VM0, VM0
    bne INCX, TEMP, .L20

    srai.d I, N, 4
--- a/kernel/loongarch64/amin_lsx.S
+++ b/kernel/loongarch64/amin_lsx.S
@@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #else
    vldrepl.w VM0, X, 0
 #endif
    VFSUB VM0, VM0, VM0
    bne INCX, TEMP, .L20

    srai.d I, N, 3
--- a/kernel/loongarch64/camax_lasx.S
+++ b/kernel/loongarch64/camax_lasx.S
@@ -63,42 +63,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    bge $r0, N, .L999
    bge $r0, INCX, .L999
    li.d TEMP, 1
    li.w I, -1
    slli.d TEMP, TEMP, ZBASE_SHIFT
    slli.d INCX, INCX, ZBASE_SHIFT
    xvreplgr2vr.w neg1, I
    xvffint.s.w neg1, neg1
    srai.d I, N, 3
    bne INCX, TEMP, .L20
    bge $r0, I, .L23
    .align 3

 .L10:
    xvld VX0, X, 0 * SIZE
    xvld VX1, X, 8 * SIZE
    addi.d I, I, -1
    xvld VX0, X, 0
    xvld VX1, X, 32
 #ifdef DOUBLE
    xvpickev.d x1, VX1, VX0
    xvpickod.d x2, VX1, VX0
 #else
    xvpickev.w x1, VX1, VX0
    xvpickod.w x2, VX1, VX0
    xvfmul.s x3, neg1, x1
    xvfmul.s x4, neg1, x2
    xvfcmp.clt.s VT0, x1, res0
    xvfcmp.clt.s VT1, x2, res0
    xvbitsel.v x1, x1, x3, VT0
    xvbitsel.v x2, x2, x4, VT1
 #endif
    XVFSUB x3, res0, x1
    XVFSUB x4, res0, x2
    XVFMAX x1, x1, x3
    XVFMAX x2, x2, x4
    XVFADD VM1, x1, x2
    XVFMAX VM0, VM0, VM1
 #ifdef DOUBLE
    xvld VX0, X, 64
    xvld VX1, X, 96
    xvpickev.d x1, VX1, VX0
    xvpickod.d x2, VX1, VX0
    XVFSUB x3, res0, x1
    XVFSUB x4, res0, x2
    XVFMAX x1, x1, x3
    XVFMAX x2, x2, x4
    XVFADD VM1, x1, x2
    XVFMAX VM0, VM0, VM1
 #endif
    addi.d I, I, -1
    addi.d X, X, 16 * SIZE
    xvfadd.s VM1, x1, x2
    xvfmax.s VM0, VM0, VM1
    blt $r0, I, .L10
    .align 3

 .L11:
 #ifdef DOUBLE
    xvpickve.d x1, VM0, 0
    xvpickve.d x2, VM0, 1
    XVFMAX VM0, x1, x2
 #else
    xvpickve.w x1, VM0, 0
    xvpickve.w x2, VM0, 1
    xvpickve.w x3, VM0, 2
    xvpickve.w x4, VM0, 3
    xvfmax.s VM1, x1, x2
    xvfmax.s VM0, x3, x4
    xvfmax.s VM0, VM0, VM1
    XVFMAX VM0, x1, x2
    XVFMAX VM1, x3, x4
    XVFMAX VM0, VM0, VM1
 #endif
    b .L23
    .align 3

@@ -107,66 +125,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    .align 3

 .L21:
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    addi.d I, I, -1
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s3, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s3, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s4, t1, t3
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s4, t1, t3
    blt $r0, I, .L21
    .align 3

 .L22:
    fmax.s s1, s1, s2
    fmax.s s3, s3, s4
    fmax.s s1, s1, s3
    FMAX s1, s1, s2
    FMAX s3, s3, s4
    FMAX s1, s1, s3
    .align 3

 .L23: //N<8
@@ -182,12 +200,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    FABS a1, a1
    ADD a0, a0, a1
    add.d  X, X, INCX
    fmax.s s1, a0, s1
    FMAX s1, a0, s1
    blt $r0, I, .L24
    .align 3

 .L999:
    fmov.s $f0, $f22
    MOV $f0, $f22
    jirl $r0, $r1, 0x0
    .align 3

--- a/kernel/loongarch64/camax_lsx.S
+++ b/kernel/loongarch64/camax_lsx.S
@@ -63,54 +63,87 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    bge $r0, N, .L999
    bge $r0, INCX, .L999
    li.d TEMP, 1
    li.w I, -1
    slli.d TEMP, TEMP, ZBASE_SHIFT
    slli.d INCX, INCX, ZBASE_SHIFT
    vreplgr2vr.w neg1, I
    vffint.s.w neg1, neg1
    srai.d I, N, 3
    bne INCX, TEMP, .L20
    bge $r0, I, .L23
    .align 3

 .L10:
    vld VX0, X, 0 * SIZE
    vld VX1, X, 4 * SIZE
    addi.d I, I, -1
    vld VX0, X, 0
    vld VX1, X, 16
 #ifdef DOUBLE
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
 #else
    vpickev.w x1, VX1, VX0
    vpickod.w x2, VX1, VX0
    vfmul.s x3, neg1, x1
    vfmul.s x4, neg1, x2
    vfcmp.clt.s VT0, x1, res0
    vfcmp.clt.s VT1, x2, res0
    vld VX0, X, 8 * SIZE
    vbitsel.v x1, x1, x3, VT0
    vbitsel.v x2, x2, x4, VT1
    vld VX1, X, 12 * SIZE
    vfadd.s VM1, x1, x2
 #endif
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD VM1, x1, x2

    vld VX0, X, 32
    vld VX1, X, 48
 #ifdef DOUBLE
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
 #else
    vpickev.w x1, VX1, VX0
    vpickod.w x2, VX1, VX0
    vfmul.s x3, neg1, x1
    vfmul.s x4, neg1, x2
    vfcmp.clt.s VT0, x1, res0
    vfcmp.clt.s VT1, x2, res0
 #endif
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD x1, x1, x2
    VFMAX VM1, x1, VM1
    VFMAX VM0, VM0, VM1
 #ifdef DOUBLE
    vld VX0, X, 64
    vld VX1, X, 80
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD VM1, x1, x2

    vld VX0, X, 96
    vld VX1, X, 112
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD x1, x1, x2
    VFMAX VM1, x1, VM1
    VFMAX VM0, VM0, VM1
 #endif
    addi.d X, X, 16 * SIZE
    vbitsel.v x1, x1, x3, VT0
    vbitsel.v x2, x2, x4, VT1
    vfadd.s x1, x1, x2
    vfmax.s VM1, x1, VM1
    vfmax.s VM0, VM0, VM1
    addi.d I, I, -1
    blt $r0, I, .L10
    .align 3

 .L11:
 #ifdef DOUBLE
    vreplvei.d x1, VM0, 0
    vreplvei.d x2, VM0, 1
    VFMAX VM0, x1, x2
 #else
    vreplvei.w x1, VM0, 0
    vreplvei.w x2, VM0, 1
    vreplvei.w x3, VM0, 2
    vreplvei.w x4, VM0, 3
    vfmax.s VM1, x1, x2
    vfmax.s VM0, x3, x4
    vfmax.s VM0, VM0, VM1
    VFMAX VM1, x1, x2
    VFMAX VM0, x3, x4
    VFMAX VM0, VM0, VM1
 #endif
    b .L23
    .align 3

@@ -119,66 +152,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    .align 3

 .L21:
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    addi.d I, I, -1
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s3, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s3, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmax.s s4, t1, t3
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMAX s4, t1, t3
    blt $r0, I, .L21
    .align 3

 .L22:
    fmax.s s1, s1, s2
    fmax.s s3, s3, s4
    fmax.s s1, s1, s3
    FMAX s1, s1, s2
    FMAX s3, s3, s4
    FMAX s1, s1, s3
    .align 3

 .L23: //N<8
@@ -187,19 +220,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    .align 3

 .L24:
    fld.s a0, X, 0 * SIZE
    fld.s a1, X, 1 * SIZE
    LD a0, X, 0 * SIZE
    LD a1, X, 1 * SIZE
    addi.d I, I, -1
    fabs.s a0, a0
    fabs.s a1, a1
    fadd.s a0, a0, a1
    FABS a0, a0
    FABS a1, a1
    ADD a0, a0, a1
    add.d  X, X, INCX
    fmax.s s1, a0, s1
    FMAX s1, a0, s1
    blt $r0, I, .L24
    .align 3

 .L999:
    fmov.s $f0, $f22
    MOV $f0, $f22
    jirl $r0, $r1, 0x0
    .align 3

--- a/kernel/loongarch64/camin_lasx.S
+++ b/kernel/loongarch64/camin_lasx.S
@@ -61,49 +61,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    xvxor.v res0, res0, res0
    bge $r0, N, .L999
    bge $r0, INCX, .L999
    fld.s a0, X, 0 * SIZE
    fld.s a1, X, 1 * SIZE
    fabs.s a0, a0
    fabs.s a1, a1
    fadd.s s1, a1, a0
    LD a0, X, 0 * SIZE
    LD a1, X, 1 * SIZE
    FABS a0, a0
    FABS a1, a1
    ADD s1, a1, a0
 #ifdef DOUBLE
    xvreplve0.d VM0, VM0
 #else
    xvreplve0.w VM0, VM0
 #endif
    li.d TEMP, 1
    li.w I, -1
    slli.d TEMP, TEMP, ZBASE_SHIFT
    slli.d INCX, INCX, ZBASE_SHIFT
    xvreplgr2vr.w neg1, I
    xvffint.s.w neg1, neg1
    srai.d I, N, 3
    bne INCX, TEMP, .L20
    bge $r0, I, .L23
    .align 3

 .L10:
    xvld VX0, X, 0 * SIZE
    xvld VX1, X, 8 * SIZE
    addi.d I, I, -1
    xvld VX0, X, 0
    xvld VX1, X, 32
 #ifdef DOUBLE
    xvpickev.d x1, VX1, VX0
    xvpickod.d x2, VX1, VX0
 #else
    xvpickev.w x1, VX1, VX0
    xvpickod.w x2, VX1, VX0
    xvfmul.s x3, neg1, x1
    xvfmul.s x4, neg1, x2
    xvfcmp.clt.s VT0, x1, res0
    xvfcmp.clt.s VT1, x2, res0
    xvbitsel.v x1, x1, x3, VT0
    xvbitsel.v x2, x2, x4, VT1
 #endif
    XVFSUB x3, res0, x1
    XVFSUB x4, res0, x2
    XVFMAX x1, x1, x3
    XVFMAX x2, x2, x4
    XVFADD VM1, x1, x2
    XVFMIN VM0, VM0, VM1
 #ifdef DOUBLE
    xvld VX0, X, 64
    xvld VX1, X, 96
    xvpickev.d x1, VX1, VX0
    xvpickod.d x2, VX1, VX0
    XVFSUB x3, res0, x1
    XVFSUB x4, res0, x2
    XVFMAX x1, x1, x3
    XVFMAX x2, x2, x4
    XVFADD VM1, x1, x2
    XVFMIN VM0, VM0, VM1
 #endif
    addi.d I, I, -1
    addi.d X, X, 16 * SIZE
    xvfadd.s VM1, x1, x2
    xvfmin.s VM0, VM0, VM1
    blt $r0, I, .L10
    .align 3

 .L11:
 #ifdef DOUBLE
    xvpickve.d x1, VM0, 0
    xvpickve.d x2, VM0, 1
    XVFMIN VM0, x1, x2
 #else
    xvpickve.w x1, VM0, 0
    xvpickve.w x2, VM0, 1
    xvpickve.w x3, VM0, 2
    xvpickve.w x4, VM0, 3
    xvfmin.s VM1, x1, x2
    xvfmin.s VM0, x3, x4
    xvfmin.s VM0, VM0, VM1
    XVFMIN VM0, x1, x2
    XVFMIN VM1, x3, x4
    XVFMIN VM0, VM0, VM1
 #endif
    b .L23
    .align 3

@@ -112,66 +134,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    .align 3

 .L21:
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    addi.d I, I, -1
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s3, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s3, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s4, t1, t3
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s4, t1, t3
    blt $r0, I, .L21
    .align 3

 .L22:
    fmin.s s1, s1, s2
    fmin.s s3, s3, s4
    fmin.s s1, s1, s3
    FMIN s1, s1, s2
    FMIN s3, s3, s4
    FMIN s1, s1, s3
    .align 3

 .L23: //N<8
@@ -187,12 +209,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    FABS a1, a1
    ADD a0, a0, a1
    add.d  X, X, INCX
    fmin.s s1, a0, s1
    FMIN s1, a0, s1
    blt $r0, I, .L24
    .align 3

 .L999:
    fmov.s $f0, $f22
    MOV $f0, $f22
    jirl $r0, $r1, 0x0
    .align 3

--- a/kernel/loongarch64/camin_lsx.S
+++ b/kernel/loongarch64/camin_lsx.S
@@ -61,61 +61,98 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    vxor.v res0, res0, res0
    bge $r0, N, .L999
    bge $r0, INCX, .L999
    fld.s a0, X, 0 * SIZE
    fld.s a1, X, 1 * SIZE
    fabs.s a0, a0
    fabs.s a1, a1
    fadd.s s1, a1, a0
    LD a0, X, 0 * SIZE
    LD a1, X, 1 * SIZE
    FABS a0, a0
    FABS a1, a1
    ADD s1, a1, a0
 #ifdef DOUBLE
    vreplvei.d VM0, VM0, 0
 #else
    vreplvei.w VM0, VM0, 0
 #endif
    li.d TEMP, 1
    li.w I, -1
    slli.d TEMP, TEMP, ZBASE_SHIFT
    slli.d INCX, INCX, ZBASE_SHIFT
    vreplgr2vr.w neg1, I
    vffint.s.w neg1, neg1
    srai.d I, N, 3
    bne INCX, TEMP, .L20
    bge $r0, I, .L23
    .align 3

 .L10:
    vld VX0, X, 0 * SIZE
    vld VX1, X, 4 * SIZE
    addi.d I, I, -1
    vld VX0, X, 0
    vld VX1, X, 16
 #ifdef DOUBLE
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
 #else
    vpickev.w x1, VX1, VX0
    vpickod.w x2, VX1, VX0
    vfmul.s x3, neg1, x1
    vfmul.s x4, neg1, x2
    vfcmp.clt.s VT0, x1, res0
    vfcmp.clt.s VT1, x2, res0
    vld VX0, X, 8 * SIZE
    vbitsel.v x1, x1, x3, VT0
    vbitsel.v x2, x2, x4, VT1
    vld VX1, X, 12 * SIZE
    vfadd.s VM1, x1, x2
 #endif
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD VM1, x1, x2

    vld VX0, X, 32
    vld VX1, X, 48
 #ifdef DOUBLE
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
 #else
    vpickev.w x1, VX1, VX0
    vpickod.w x2, VX1, VX0
    vfmul.s x3, neg1, x1
    vfmul.s x4, neg1, x2
    vfcmp.clt.s VT0, x1, res0
    vfcmp.clt.s VT1, x2, res0
 #endif
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD x1, x1, x2
    VFMIN VM1, x1, VM1
    VFMIN VM0, VM0, VM1
 #ifdef DOUBLE
    vld VX0, X, 64
    vld VX1, X, 80
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD VM1, x1, x2

    vld VX0, X, 96
    vld VX1, X, 112
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
    VFSUB x3, res0, x1
    VFSUB x4, res0, x2
    VFMAX x1, x1, x3
    VFMAX x2, x2, x4
    VFADD x1, x1, x2
    VFMIN VM1, x1, VM1
    VFMIN VM0, VM0, VM1
 #endif
    addi.d I, I, -1
    addi.d X, X, 16 * SIZE
    vbitsel.v x1, x1, x3, VT0
    vbitsel.v x2, x2, x4, VT1
    vfadd.s x1, x1, x2
    vfmin.s VM1, x1, VM1
    vfmin.s VM0, VM0, VM1
    blt $r0, I, .L10
    .align 3

 .L11:
 #ifdef DOUBLE
    vreplvei.d x1, VM0, 0
    vreplvei.d x2, VM0, 1
    VFMIN VM0, x1, x2
 #else
    vreplvei.w x1, VM0, 0
    vreplvei.w x2, VM0, 1
    vreplvei.w x3, VM0, 2
    vreplvei.w x4, VM0, 3
    vfmin.s VM1, x1, x2
    vfmin.s VM0, x3, x4
    vfmin.s VM0, VM0, VM1
    VFMIN VM1, x1, x2
    VFMIN VM0, x3, x4
    VFMIN VM0, VM0, VM1
 #endif
    b .L23
    .align 3

@@ -124,66 +161,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    .align 3

 .L21:
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s1, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s1, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    addi.d I, I, -1
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s3, t1, t3
    fld.s t1, X, 0 * SIZE
    fld.s t2, X, 1 * SIZE
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s3, t1, t3
    LD t1, X, 0 * SIZE
    LD t2, X, 1 * SIZE
    add.d X, X, INCX
    fld.s t3, X, 0 * SIZE
    fld.s t4, X, 1 * SIZE
    LD t3, X, 0 * SIZE
    LD t4, X, 1 * SIZE
    add.d X, X, INCX
    fabs.s t1, t1
    fabs.s t2, t2
    fabs.s t3, t3
    fabs.s t4, t4
    fadd.s t1, t1, t2
    fadd.s t3, t3, t4
    fmin.s s4, t1, t3
    FABS t1, t1
    FABS t2, t2
    FABS t3, t3
    FABS t4, t4
    ADD t1, t1, t2
    ADD t3, t3, t4
    FMIN s4, t1, t3
    blt $r0, I, .L21
    .align 3

 .L22:
    fmin.s s1, s1, s2
    fmin.s s3, s3, s4
    fmin.s s1, s1, s3
    FMIN s1, s1, s2
    FMIN s3, s3, s4
    FMIN s1, s1, s3
    .align 3

 .L23: //N<8
@@ -192,19 +229,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    .align 3

 .L24:
    fld.s a0, X, 0 * SIZE
    fld.s a1, X, 1 * SIZE
    LD a0, X, 0 * SIZE
    LD a1, X, 1 * SIZE
    addi.d I, I, -1
    fabs.s a0, a0
    fabs.s a1, a1
    fadd.s a0, a0, a1
    FABS a0, a0
    FABS a1, a1
    ADD a0, a0, a1
    add.d  X, X, INCX
    fmin.s s1, a0, s1
    FMIN s1, a0, s1
    blt $r0, I, .L24
    .align 3

 .L999:
    fmov.s $f0, $f22
    MOV $f0, $f22
    jirl $r0, $r1, 0x0
    .align 3

--- a/kernel/loongarch64/cscal_lasx.S
+++ b/kernel/loongarch64/cscal_lasx.S
@@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L113 //alpha_r != 0.0 && alpha_i == 0.0

 .L14:
    bceqz $fcc1, .L112  //alpha_r == 0.0 && alpha_i != 0.0
    bceqz $fcc1, .L114  //alpha_r == 0.0 && alpha_i != 0.0
    b .L111 //alpha_r == 0.0 && alpha_i == 0.0
    .align 3

@@ -117,38 +117,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L997
    .align 3

 .L112:  //alpha_r == 0.0 && alpha_i != 0.0
    xvld VX0, X, 0 * SIZE
 #ifdef DOUBLE
    xvld VX1, X, 4 * SIZE
    xvpickev.d x1, VX1, VX0
    xvpickod.d x2, VX1, VX0
    xvfmul.d x3, VXAI, x2
    xvfsub.d x3, VXZ, x3
    xvfmul.d x4, VXAI, x1
    xvilvl.d VX2, x4 ,x3
    xvilvh.d VX3, x4, x3
    xvst VX2, X, 0 * SIZE
    xvst VX3, X, 4 * SIZE
    addi.d X, X, 8 * SIZE
 #else
    xvld VX1, X, 8 * SIZE
    xvpickev.w x1, VX1, VX0
    xvpickod.w x2, VX1, VX0
    xvfmul.s x3, VXAI, x2
    xvfsub.s x3, VXZ, x3
    xvfmul.s x4, VXAI, x1
    xvilvl.w VX2, x4 ,x3
    xvilvh.w VX3, x4, x3
    xvst VX2, X, 0 * SIZE
    xvst VX3, X, 8 * SIZE
    addi.d X, X, 16 * SIZE
 #endif
    addi.d  I, I, -1
    blt $r0, I, .L112
    b .L997
    .align 3

 .L113: //alpha_r != 0.0 && alpha_i == 0.0
    xvld VX0, X, 0 * SIZE
 #ifdef DOUBLE
@@ -227,7 +195,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L223 //alpha_r != 0.0 && alpha_i == 0.0

 .L24:
    bceqz $fcc1, .L222  //alpha_r == 0.0 && alpha_i != 0.0
    bceqz $fcc1, .L224  //alpha_r == 0.0 && alpha_i != 0.0
    b .L221 //alpha_r == 0.0 && alpha_i == 0.0
    .align 3

@@ -275,119 +243,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L997
    .align 3

 .L222:  //alpha_r == 0.0 && alpha_i != 0.0
 #ifdef DOUBLE
    ld.d t1, X, 0 * SIZE
    ld.d t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.d t3, X, 0 * SIZE
    ld.d t4, X, 1 * SIZE
    add.d X, X, INCX
    xvinsgr2vr.d x1, t1, 0
    xvinsgr2vr.d x2, t2, 0
    xvinsgr2vr.d x1, t3, 1
    xvinsgr2vr.d x2, t4, 1
    ld.d t1, X, 0 * SIZE
    ld.d t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.d t3, X, 0 * SIZE
    ld.d t4, X, 1 * SIZE
    xvinsgr2vr.d x1, t1, 2
    xvinsgr2vr.d x2, t2, 2
    xvinsgr2vr.d x1, t3, 3
    xvinsgr2vr.d x2, t4, 3
    add.d X, X, INCX

    xvfmul.d x3, VXAI, x2
    xvfsub.d x3, VXZ, x3
    xvfmul.d x4, VXAI, x1
    addi.d  I, I, -1
    xvstelm.d x3, XX, 0 * SIZE, 0
    xvstelm.d x4, XX, 1 * SIZE, 0
    add.d XX, XX, INCX
    xvstelm.d x3, XX, 0 * SIZE, 1
    xvstelm.d x4, XX, 1 * SIZE, 1
    add.d XX, XX, INCX
    xvstelm.d x3, XX, 0 * SIZE, 2
    xvstelm.d x4, XX, 1 * SIZE, 2
    add.d XX, XX, INCX
    xvstelm.d x3, XX, 0 * SIZE, 3
    xvstelm.d x4, XX, 1 * SIZE, 3
 #else
    ld.w t1, X, 0 * SIZE
    ld.w t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.w t3, X, 0 * SIZE
    ld.w t4, X, 1 * SIZE
    add.d X, X, INCX
    xvinsgr2vr.w x1, t1, 0
    xvinsgr2vr.w x2, t2, 0
    xvinsgr2vr.w x1, t3, 1
    xvinsgr2vr.w x2, t4, 1
    ld.w t1, X, 0 * SIZE
    ld.w t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.w t3, X, 0 * SIZE
    ld.w t4, X, 1 * SIZE
    xvinsgr2vr.w x1, t1, 2
    xvinsgr2vr.w x2, t2, 2
    xvinsgr2vr.w x1, t3, 3
    xvinsgr2vr.w x2, t4, 3
    add.d X, X, INCX
    ld.w t1, X, 0 * SIZE
    ld.w t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.w t3, X, 0 * SIZE
    ld.w t4, X, 1 * SIZE
    add.d X, X, INCX
    xvinsgr2vr.w x1, t1, 4
    xvinsgr2vr.w x2, t2, 4
    xvinsgr2vr.w x1, t3, 5
    xvinsgr2vr.w x2, t4, 5
    ld.w t1, X, 0 * SIZE
    ld.w t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.w t3, X, 0 * SIZE
    ld.w t4, X, 1 * SIZE
    xvinsgr2vr.w x1, t1, 6
    xvinsgr2vr.w x2, t2, 6
    xvinsgr2vr.w x1, t3, 7
    xvinsgr2vr.w x2, t4, 7
    add.d X, X, INCX

    xvfmul.s x3, VXAI, x2
    xvfsub.s x3, VXZ, x3
    xvfmul.s x4, VXAI, x1
    addi.d  I, I, -1
    xvstelm.w x3, XX, 0 * SIZE, 0
    xvstelm.w x4, XX, 1 * SIZE, 0
    add.d XX, XX, INCX
    xvstelm.w x3, XX, 0 * SIZE, 1
    xvstelm.w x4, XX, 1 * SIZE, 1
    add.d XX, XX, INCX
    xvstelm.w x3, XX, 0 * SIZE, 2
    xvstelm.w x4, XX, 1 * SIZE, 2
    add.d XX, XX, INCX
    xvstelm.w x3, XX, 0 * SIZE, 3
    xvstelm.w x4, XX, 1 * SIZE, 3
    add.d XX, XX, INCX
    xvstelm.w x3, XX, 0 * SIZE, 4
    xvstelm.w x4, XX, 1 * SIZE, 4
    add.d XX, XX, INCX
    xvstelm.w x3, XX, 0 * SIZE, 5
    xvstelm.w x4, XX, 1 * SIZE, 5
    add.d XX, XX, INCX
    xvstelm.w x3, XX, 0 * SIZE, 6
    xvstelm.w x4, XX, 1 * SIZE, 6
    add.d XX, XX, INCX
    xvstelm.w x3, XX, 0 * SIZE, 7
    xvstelm.w x4, XX, 1 * SIZE, 7
 #endif
    add.d XX, XX, INCX
    blt $r0, I, .L222
    b .L997
    .align 3

 .L223: //alpha_r != 0.0 && alpha_i == 0.0
 #ifdef DOUBLE
    ld.d t1, X, 0 * SIZE
--- a/kernel/loongarch64/cscal_lsx.S
+++ b/kernel/loongarch64/cscal_lsx.S
@@ -97,7 +97,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L113 //alpha_r != 0.0 && alpha_i == 0.0

 .L14:
    bceqz $fcc1, .L112  //alpha_r == 0.0 && alpha_i != 0.0
    bceqz $fcc1, .L114  //alpha_r == 0.0 && alpha_i != 0.0
    b .L111 //alpha_r == 0.0 && alpha_i == 0.0
    .align 3

@@ -116,48 +116,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L997
    .align 3

 .L112:  //alpha_r == 0.0 && alpha_i != 0.0
    vld VX0, X, 0 * SIZE
 #ifdef DOUBLE
    vld VX1, X, 2 * SIZE
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
    vfmul.d x3, VXAI, x2
    vfsub.d x3, VXZ, x3
    vfmul.d x4, VXAI, x1
    vilvl.d VX2, x4 ,x3
    vilvh.d VX3, x4, x3
    vst VX2, X, 0 * SIZE
    vst VX3, X, 2 * SIZE
    vld VX0, X, 4 * SIZE
    vld VX1, X, 6 * SIZE
    vpickev.d x1, VX1, VX0
    vpickod.d x2, VX1, VX0
    vfmul.d x3, VXAI, x2
    vfsub.d x3, VXZ, x3
    vfmul.d x4, VXAI, x1
    vilvl.d VX2, x4 ,x3
    vilvh.d VX3, x4, x3
    vst VX2, X, 4 * SIZE
    vst VX3, X, 6 * SIZE
 #else
    vld VX1, X, 4 * SIZE
    vpickev.w x1, VX1, VX0
    vpickod.w x2, VX1, VX0
    vfmul.s x3, VXAI, x2
    vfsub.s x3, VXZ, x3
    vfmul.s x4, VXAI, x1
    vilvl.w VX2, x4 ,x3
    vilvh.w VX3, x4, x3
    vst VX2, X, 0 * SIZE
    vst VX3, X, 4 * SIZE
 #endif
    addi.d X, X, 8 * SIZE
    addi.d  I, I, -1
    blt $r0, I, .L112
    b .L997
    .align 3

 .L113: //alpha_r != 0.0 && alpha_i == 0.0
    vld VX0, X, 0 * SIZE
 #ifdef DOUBLE
@@ -256,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L223 //alpha_r != 0.0 && alpha_i == 0.0

 .L24:
    bceqz $fcc1, .L222  //alpha_r == 0.0 && alpha_i != 0.0
    bceqz $fcc1, .L224  //alpha_r == 0.0 && alpha_i != 0.0
    b .L221 //alpha_r == 0.0 && alpha_i == 0.0
    .align 3

@@ -292,90 +250,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    b .L997
    .align 3

 .L222:  //alpha_r == 0.0 && alpha_i != 0.0
 #ifdef DOUBLE
    ld.d t1, X, 0 * SIZE
    ld.d t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.d t3, X, 0 * SIZE
    ld.d t4, X, 1 * SIZE
    add.d X, X, INCX
    vinsgr2vr.d x1, t1, 0
    vinsgr2vr.d x2, t2, 0
    vinsgr2vr.d x1, t3, 1
    vinsgr2vr.d x2, t4, 1
    vfmul.d x3, VXAI, x2
    vfsub.d x3, VXZ, x3
    vfmul.d x4, VXAI, x1
    vstelm.d x3, XX, 0 * SIZE, 0
    vstelm.d x4, XX, 1 * SIZE, 0
    add.d XX, XX, INCX
    vstelm.d x3, XX, 0 * SIZE, 1
    vstelm.d x4, XX, 1 * SIZE, 1
    add.d XX, XX, INCX

    ld.d t1, X, 0 * SIZE
    ld.d t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.d t3, X, 0 * SIZE
    ld.d t4, X, 1 * SIZE
    vinsgr2vr.d x1, t1, 0
    vinsgr2vr.d x2, t2, 0
    vinsgr2vr.d x1, t3, 1
    vinsgr2vr.d x2, t4, 1
    add.d X, X, INCX
    vfmul.d x3, VXAI, x2
    vfsub.d x3, VXZ, x3
    vfmul.d x4, VXAI, x1
    addi.d  I, I, -1
    vstelm.d x3, XX, 0 * SIZE, 0
    vstelm.d x4, XX, 1 * SIZE, 0
    add.d XX, XX, INCX
    vstelm.d x3, XX, 0 * SIZE, 1
    vstelm.d x4, XX, 1 * SIZE, 1
 #else
    ld.w t1, X, 0 * SIZE
    ld.w t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.w t3, X, 0 * SIZE
    ld.w t4, X, 1 * SIZE
    add.d X, X, INCX
    vinsgr2vr.w x1, t1, 0
    vinsgr2vr.w x2, t2, 0
    vinsgr2vr.w x1, t3, 1
    vinsgr2vr.w x2, t4, 1
    ld.w t1, X, 0 * SIZE
    ld.w t2, X, 1 * SIZE
    add.d X, X, INCX
    ld.w t3, X, 0 * SIZE
    ld.w t4, X, 1 * SIZE
    vinsgr2vr.w x1, t1, 2
    vinsgr2vr.w x2, t2, 2
    vinsgr2vr.w x1, t3, 3
    vinsgr2vr.w x2, t4, 3
    add.d X, X, INCX

    vfmul.s x3, VXAI, x2
    vfsub.s x3, VXZ, x3
    vfmul.s x4, VXAI, x1
    addi.d  I, I, -1
    vstelm.w x3, XX, 0 * SIZE, 0
    vstelm.w x4, XX, 1 * SIZE, 0
    add.d XX, XX, INCX
    vstelm.w x3, XX, 0 * SIZE, 1
    vstelm.w x4, XX, 1 * SIZE, 1
    add.d XX, XX, INCX
    vstelm.w x3, XX, 0 * SIZE, 2
    vstelm.w x4, XX, 1 * SIZE, 2
    add.d XX, XX, INCX
    vstelm.w x3, XX, 0 * SIZE, 3
    vstelm.w x4, XX, 1 * SIZE, 3
 #endif
    add.d XX, XX, INCX
    blt $r0, I, .L222
    b .L997
    .align 3

 .L223: //alpha_r != 0.0 && alpha_i == 0.0
 #ifdef DOUBLE
    ld.d t1, X, 0 * SIZE
--- a/kernel/x86_64/zscal.c
+++ b/kernel/x86_64/zscal.c
@@ -69,16 +69,16 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha , FLOAT *x )

 	for( i=0; i<n; i+=4 )
 	{
 		t0 = da_r *x[0] - da_i *x[1];	
 		t1 = da_r *x[2] - da_i *x[3];	
 		t2 = da_r *x[4] - da_i *x[5];	
 		t3 = da_r *x[6] - da_i *x[7];	
 		t0 = da_r *x[0] - da_i *x[1];
 		t1 = da_r *x[2] - da_i *x[3];
 		t2 = da_r *x[4] - da_i *x[5];
 		t3 = da_r *x[6] - da_i *x[7];

 		x[1] = da_r * x[1] + da_i * x[0];
 		x[3] = da_r * x[3] + da_i * x[2];
 		x[5] = da_r * x[5] + da_i * x[4];
 		x[7] = da_r * x[7] + da_i * x[6];
 		

 		x[0] = t0;
 		x[2] = t1;
 		x[4] = t2;
@@ -99,16 +99,16 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha , FLOAT *x )

 	for( i=0; i<n; i+=4 )
 	{
 		t0 =  - da_i *x[1];	
 		t1 =  - da_i *x[3];	
 		t2 =  - da_i *x[5];	
 		t3 =  - da_i *x[7];	
 		t0 =  - da_i *x[1];
 		t1 =  - da_i *x[3];
 		t2 =  - da_i *x[5];
 		t3 =  - da_i *x[7];

 		x[1] =  da_i * x[0];
 		x[3] =  da_i * x[2];
 		x[5] =  da_i * x[4];
 		x[7] =  da_i * x[6];
 		

 		x[0] = t0;
 		x[2] = t1;
 		x[4] = t2;
@@ -129,16 +129,16 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha , FLOAT *x )

 	for( i=0; i<n; i+=4 )
 	{
 		t0 = da_r *x[0];	
 		t1 = da_r *x[2];	
 		t2 = da_r *x[4];	
 		t3 = da_r *x[6];	
 		t0 = da_r *x[0];
 		t1 = da_r *x[2];
 		t2 = da_r *x[4];
 		t3 = da_r *x[6];

 		x[1] = da_r * x[1];
 		x[3] = da_r * x[3];
 		x[5] = da_r * x[5];
 		x[7] = da_r * x[7];
 		

 		x[0] = t0;
 		x[2] = t1;
 		x[4] = t2;
@@ -157,14 +157,14 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha , FLOAT *x )
 	BLASLONG i;
 	for( i=0; i<n; i+=4 )
 	{
 		x[0] = 0.0;	
 		x[1] = 0.0;	
 		x[2] = 0.0;	
 		x[3] = 0.0;	
 		x[4] = 0.0;	
 		x[5] = 0.0;	
 		x[6] = 0.0;	
 		x[7] = 0.0;	
 		x[0] = 0.0;
 		x[1] = 0.0;
 		x[2] = 0.0;
 		x[3] = 0.0;
 		x[4] = 0.0;
 		x[5] = 0.0;
 		x[6] = 0.0;
 		x[7] = 0.0;
 		x+=8;
 	}

@@ -186,10 +186,10 @@ static void zscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_

 	for ( i=0; i<n; i+=4 )
 	{
 		t0 = da_r * x[0]         - da_i *x[1];	
 		t1 = da_r * x[inc_x]     - da_i *x[inc_x  + 1];	
 		t2 = da_r * x[inc_x2]    - da_i *x[inc_x2 + 1];	
 		t3 = da_r * x[inc_x3]    - da_i *x[inc_x3 + 1];	
 		t0 = da_r * x[0]         - da_i *x[1];
 		t1 = da_r * x[inc_x]     - da_i *x[inc_x  + 1];
 		t2 = da_r * x[inc_x2]    - da_i *x[inc_x2 + 1];
 		t3 = da_r * x[inc_x3]    - da_i *x[inc_x3 + 1];

 		x[1]               = da_i * x[0]       + da_r * x[1];
 		x[inc_x  +1]       = da_i * x[inc_x]   + da_r * x[inc_x  +1];
@@ -228,7 +228,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
 			{
 				while(j < n1)
 				{
 			

 					x[i]=0.0;
 					x[i+1]=0.0;
 					x[i+inc_x]=0.0;
@@ -240,7 +240,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,

 				while(j < n)
 				{
 			

 					x[i]=0.0;
 					x[i+1]=0.0;
 					i += inc_x ;
@@ -253,11 +253,17 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
 			{
 				while(j < n1)
 				{
 			
 					temp0        = -da_i * x[i+1];

 					if (isnan(x[i]) || isinf(x[i]))
 						temp0	= NAN;
 					else
 						temp0   = -da_i * x[i+1];
 					x[i+1]       =  da_i * x[i];
 					x[i]         =  temp0;
 					temp1        = -da_i * x[i+1+inc_x];
 					if (isnan(x[i+inc_x]) || isinf(x[i+inc_x]))
 						temp1	= NAN;
 					else
 						temp1   = -da_i * x[i+1+inc_x];
 					x[i+1+inc_x] =  da_i * x[i+inc_x];
 					x[i+inc_x]   =  temp1;
 					i += 2*inc_x ;
@@ -267,8 +273,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,

 				while(j < n)
 				{
 			
 					temp0        = -da_i * x[i+1];

 					if (isnan(x[i]) || isinf(x[i]))
 						temp0	= NAN;
 					else
 						temp0   = -da_i * x[i+1];
 					x[i+1]       =  da_i * x[i];
 					x[i]         =  temp0;
 					i += inc_x ;
@@ -291,7 +300,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,

 				while(j < n1)
 				{
 			

 					temp0        =  da_r * x[i];
 					x[i+1]       =  da_r * x[i+1];
 					x[i]         =  temp0;
@@ -305,7 +314,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,

 				while(j < n)
 				{
 			

 					temp0        =  da_r * x[i];
 					x[i+1]       =  da_r * x[i+1];
 					x[i]         =  temp0;
@@ -368,7 +377,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
 		}
 		i = n1 << 1;
 		j = n1;
 	

 	if ( da_r == 0.0 || da_r != da_r )
 	{
 		if ( da_i == 0.0 )
@@ -385,7 +394,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
 			}

 		}
 		else if (da_r < -FLT_MAX || da_r > FLT_MAX) { 
 		else if (da_r < -FLT_MAX || da_r > FLT_MAX) {
 			while(j < n)
 			{
 					x[i]= NAN;
@@ -404,7 +413,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
 				if (x[i] < -FLT_MAX || x[i] > FLT_MAX)
 					temp0 = NAN;
 				x[i+1]       =  da_i * x[i];
 				if ( x[i] == x[i]) //preserve NaN 
 				if ( x[i] == x[i]) //preserve NaN
 				  x[i]         =  temp0;
 				i += 2 ;
 				j++;
@@ -420,7 +429,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
 		{
 				while(j < n)
 				{
 			

 					temp0        =  da_r * x[i];
 					x[i+1]       =  da_r * x[i+1];
 					x[i]         =  temp0;
@@ -442,7 +451,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,

 			}

 		}		
 		}

 	}

--- a/utest/CMakeLists.txt
+++ b/utest/CMakeLists.txt
@@ -16,6 +16,7 @@ else ()
    test_dnrm2.c
    test_swap.c
    test_zscal.c
    test_amin.c
  )
 endif ()

--- a/utest/Makefile
+++ b/utest/Makefile
@@ -11,7 +11,8 @@ UTESTBIN=openblas_utest

 include $(TOPDIR)/Makefile.system

 OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o test_dnrm2.o test_zscal.o
 OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o test_dnrm2.o test_zscal.o \
     test_amin.o
 #test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o

 ifneq ($(NO_LAPACK), 1)
--- a/utest/test_amax.c
+++ b/utest/test_amax.c
@@ -1,5 +1,5 @@
 /*****************************************************************************
 Copyright (c) 2011-2016, The OpenBLAS Project
 Copyright (c) 2011-2024, The OpenBLAS Project
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
@@ -13,9 +13,9 @@ met:
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
   3. Neither the name of the OpenBLAS project nor the names of 
      its contributors may be used to endorse or promote products 
      derived from this software without specific prior written 
   3. Neither the name of the OpenBLAS project nor the names of
      its contributors may be used to endorse or promote products
      derived from this software without specific prior written
      permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
@@ -57,4 +57,31 @@ CTEST(amax, damax){
  ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
 }
 #endif
 #ifdef BUILD_COMPLEX
 CTEST(amax, scamax){
  blasint N = 9, inc = 1;
  float te_max = 0.0, tr_max = 0.0;
  float x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
 	        -9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
 		-7.7, 8.8 };

  te_max = BLASFUNC(scamax)(&N, x, &inc);
  tr_max = 20.0;

  ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
 }
 #endif
 #ifdef BUILD_COMPLEX16
 CTEST(amax, dzamax){
  blasint N = 9, inc = 1;
  double te_max = 0.0, tr_max = 0.0;
  double x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
 	         -9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
 		 -7.7, 8.8 };

  te_max = BLASFUNC(dzamax)(&N, x, &inc);
  tr_max = 20.0;

  ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
 }
 #endif
--- a/utest/test_amin.c
+++ b/utest/test_amin.c
@@ -0,0 +1,89 @@
 /*****************************************************************************
 Copyright (c) 2011-2024, The OpenBLAS Project
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met:

   1. Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.

   2. Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
   3. Neither the name of the OpenBLAS project nor the names of
      its contributors may be used to endorse or promote products
      derived from this software without specific prior written
      permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 **********************************************************************************/

 #include "openblas_utest.h"

 #ifdef BUILD_SINGLE
 CTEST(amin, samin){
  blasint N = 3, inc = 1;
  float te_min = 0.0, tr_min = 0.0;
  float x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
 	        -9.9 };

  te_min = BLASFUNC(samin)(&N, x, &inc);
  tr_min = 1.1;

  ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
 }
 #endif
 #ifdef BUILD_DOUBLE
 CTEST(amin, damin){
  blasint N = 3, inc = 1;
  double te_min = 0.0, tr_min = 0.0;
  double x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
 	         -9.9 };

  te_min = BLASFUNC(damin)(&N, x, &inc);
  tr_min = 1.1;

  ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
 }
 #endif
 #ifdef BUILD_COMPLEX
 CTEST(amin, scamin){
  blasint N = 9, inc = 1;
  float te_min = 0.0, tr_min = 0.0;
  float x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
 	        -9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
 		-7.7, 8.8 };

  te_min = BLASFUNC(scamin)(&N, x, &inc);
  tr_min = 3.3;

  ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
 }
 #endif
 #ifdef BUILD_COMPLEX16
 CTEST(amin, dzamin){
  blasint N = 9, inc = 1;
  double te_min = 0.0, tr_min = 0.0;
  double x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
 	         -9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
 		 -7.7, 8.8 };

  te_min = BLASFUNC(dzamin)(&N, x, &inc);
  tr_min = 3.3;

  ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
 }
 #endif
--- a/utest/test_zscal.c
+++ b/utest/test_zscal.c
@@ -20,6 +20,18 @@ CTEST(zscal, i_nan)
    ASSERT_TRUE(isnan(nan[17]));
 }

 CTEST(zscal, i_nan_inc_2)
 {
    double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
    double nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0,
                    NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0};
    cblas_zscal(9, i, &nan, 2);
    ASSERT_TRUE(isnan(nan[0]));
    ASSERT_TRUE(isnan(nan[1]));
    ASSERT_TRUE(isnan(nan[16]));
    ASSERT_TRUE(isnan(nan[17]));
 }

 CTEST(zscal, nan_i)
 {
    double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
@@ -30,7 +42,19 @@ CTEST(zscal, nan_i)
    ASSERT_TRUE(isnan(i[16]));
    ASSERT_TRUE(isnan(i[17]));
 }
 	    

 CTEST(zscal, nan_i_inc_2)
 {
    double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1,
                  0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
    double nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0};
    cblas_zscal(9, &nan, &i, 2);
    ASSERT_TRUE(isnan(i[0]));
    ASSERT_TRUE(isnan(i[1]));
    ASSERT_TRUE(isnan(i[16]));
    ASSERT_TRUE(isnan(i[17]));
 }

 CTEST(zscal, i_inf)
 {
    double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
@@ -40,7 +64,19 @@ CTEST(zscal, i_inf)
    ASSERT_TRUE(isinf(inf[1]));
    ASSERT_TRUE(isnan(inf[16]));
    ASSERT_TRUE(isinf(inf[17]));
 }    
 }

 CTEST(zscal, i_inf_inc_2)
 {
    double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
    double inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0,
                    INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0};
    cblas_zscal(9, i, &inf, 2);
    ASSERT_TRUE(isnan(inf[0]));
    ASSERT_TRUE(isinf(inf[1]));
    ASSERT_TRUE(isnan(inf[16]));
    ASSERT_TRUE(isinf(inf[17]));
 }

 CTEST(zscal, inf_i)
 {
@@ -53,4 +89,16 @@ CTEST(zscal, inf_i)
    ASSERT_TRUE(isinf(i[17]));
 }

 CTEST(zscal, inf_i_inc_2)
 {
    double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1,
                  0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
    double inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0};
    cblas_zscal(9, &inf, &i, 2);
    ASSERT_TRUE(isnan(i[0]));
    ASSERT_TRUE(isinf(i[1]));
    ASSERT_TRUE(isnan(i[16]));
    ASSERT_TRUE(isinf(i[17]));
 }

 #endif