Change-Id: Iae7800a32f5af3903c330882cdf6f292d885f266tags/v0.3.13^2
| @@ -41,6 +41,10 @@ ifeq ($(TARGET), I6500) | |||
| TARGET_FLAGS = -mips64r6 | |||
| endif | |||
| ifeq ($(TARGET), C910V) | |||
| TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v | |||
| endif | |||
| all: getarch_2nd | |||
| ./getarch_2nd 0 >> $(TARGET_MAKE) | |||
| ./getarch_2nd 1 >> $(TARGET_CONF) | |||
| @@ -0,0 +1,4 @@ | |||
| ifeq ($(CORE), C910V) | |||
| CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v | |||
| FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static | |||
| endif | |||
| @@ -724,7 +724,10 @@ endif | |||
| endif | |||
| endif | |||
| ifeq ($(ARCH), riscv64) | |||
| NO_BINARY_MODE = 1 | |||
| BINARY_DEFINED = 1 | |||
| endif | |||
| # | |||
| @@ -104,3 +104,6 @@ VORTEX | |||
| ZARCH_GENERIC | |||
| Z13 | |||
| Z14 | |||
| 10.RISC-V 64: | |||
| RISCV64_GENERIC | |||
| @@ -92,6 +92,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
| $architecture = arm if ($data =~ /ARCH_ARM/); | |||
| $architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
| $architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
| $architecture = riscv64 if ($data =~ /ARCH_RISCV64/); | |||
| $defined = 0; | |||
| @@ -136,6 +137,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { | |||
| $binary =32; | |||
| } | |||
| if ($architecture eq "riscv64") { | |||
| $defined = 1; | |||
| $binary = 64; | |||
| } | |||
| if ($compiler eq "PGI") { | |||
| $compiler_name .= " -tp p7" if ($binary eq "32"); | |||
| $compiler_name .= " -tp p7-64" if ($binary eq "64"); | |||
| @@ -437,6 +437,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||
| #include "common_mips.h" | |||
| #endif | |||
| #ifdef ARCH_RISCV64 | |||
| #include "common_riscv64.h" | |||
| #endif | |||
| #ifdef ARCH_MIPS64 | |||
| #include "common_mips64.h" | |||
| #endif | |||
| @@ -0,0 +1,98 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #ifndef COMMON_RISCV64 | |||
| #define COMMON_RISCV64 | |||
| #define MB __sync_synchronize() | |||
| #define WMB __sync_synchronize() | |||
| #define RMB __sync_synchronize() | |||
| #define INLINE inline | |||
| #ifndef ASSEMBLER | |||
| static inline int blas_quickdivide(blasint x, blasint y){ | |||
| return x / y; | |||
| } | |||
| #endif | |||
| #define BUFFER_SIZE ( 32 << 20) | |||
| #define SEEK_ADDRESS | |||
| #if defined(C910V) | |||
| #include <riscv-vector.h> | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,113 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_C910V 1 | |||
| static char *cpuname[] = { | |||
| "UNKOWN", | |||
| "C910V" | |||
| }; | |||
| int detect(void){ | |||
| return CPU_UNKNOWN; | |||
| } | |||
| char *get_corename(void){ | |||
| return cpuname[detect()]; | |||
| } | |||
| void get_architecture(void){ | |||
| printf("RISCV64"); | |||
| } | |||
| void get_subarchitecture(void){ | |||
| } | |||
| void get_subdirname(void){ | |||
| printf("riscv64"); | |||
| } | |||
| void get_cpuconfig(void){ | |||
| printf("#define UNKNOWN\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||
| printf("#define L2_SIZE 512488\n"); | |||
| printf("#define L2_LINESIZE 32\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| } | |||
| void get_libname(void){ | |||
| printf("riscv64\n"); | |||
| } | |||
| @@ -153,6 +153,10 @@ ARCH_ARM | |||
| ARCH_ARM64 | |||
| #endif | |||
| #if defined(__riscv) | |||
| ARCH_RISCV64 | |||
| #endif | |||
| #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) | |||
| HAVE_C11 | |||
| #endif | |||
| @@ -981,6 +981,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_RISCV64_GENERIC | |||
| #define FORCE | |||
| #define ARCHITECTURE "RISCV64" | |||
| #define SUBARCHITECTURE "RISCV64_GENERIC" | |||
| #define SUBDIRNAME "riscv64" | |||
| #define ARCHCONFIG "-DRISCV64_GENERIC " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
| #define LIBNAME "riscv64_generic" | |||
| #define CORENAME "RISCV64_GENERIC" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_CORTEXA15 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM" | |||
| @@ -1252,6 +1266,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "Z14" | |||
| #endif | |||
| #ifdef FORCE_C910V | |||
| #define FORCE | |||
| #define ARCHITECTURE "RISCV64" | |||
| #define SUBARCHITECTURE "C910V" | |||
| #define SUBDIRNAME "riscv64" | |||
| #define ARCHCONFIG "-DC910V " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
| #define LIBNAME "c910v" | |||
| #define CORENAME "C910V" | |||
| #else | |||
| #endif | |||
| #ifndef FORCE | |||
| #ifdef USER_TARGET | |||
| @@ -1306,6 +1335,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef __riscv | |||
| #include "cpuid_riscv64.c" | |||
| #endif | |||
| #ifdef __arm__ | |||
| #include "cpuid_arm.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| @@ -25,6 +25,10 @@ ifeq ($(ARCH), arm64) | |||
| USE_TRMM = 1 | |||
| endif | |||
| ifeq ($(ARCH), riscv64) | |||
| USE_TRMM = 1 | |||
| endif | |||
| ifeq ($(TARGET), LOONGSON3B) | |||
| USE_TRMM = 1 | |||
| endif | |||
| @@ -0,0 +1,30 @@ | |||
| ifndef SCABS_KERNEL | |||
| SCABS_KERNEL = ../generic/cabs.c | |||
| endif | |||
| ifndef DCABS_KERNEL | |||
| DCABS_KERNEL = ../generic/cabs.c | |||
| endif | |||
| ifndef QCABS_KERNEL | |||
| QCABS_KERNEL = ../generic/cabs.c | |||
| endif | |||
| ifndef LSAME_KERNEL | |||
| LSAME_KERNEL = ../generic/lsame.c | |||
| endif | |||
| ifndef SGEMM_BETA | |||
| SGEMM_BETA = ../generic/gemm_beta.c | |||
| endif | |||
| ifndef DGEMM_BETA | |||
| DGEMM_BETA = ../generic/gemm_beta.c | |||
| endif | |||
| ifndef CGEMM_BETA | |||
| CGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| ifndef ZGEMM_BETA | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| @@ -0,0 +1,190 @@ | |||
| SAMAXKERNEL = amax_vector.c | |||
| DAMAXKERNEL = amax_vector.c | |||
| CAMAXKERNEL = zamax_vector.c | |||
| ZAMAXKERNEL = zamax_vector.c | |||
| SAMINKERNEL = amin_vector.c | |||
| DAMINKERNEL = amin_vector.c | |||
| CAMINKERNEL = zamin_vector.c | |||
| ZAMINKERNEL = zamin_vector.c | |||
| SMAXKERNEL = max_vector.c | |||
| DMAXKERNEL = max_vector.c | |||
| SMINKERNEL = min_vector.c | |||
| DMINKERNEL = min_vector.c | |||
| ISAMAXKERNEL = iamax_vector.c | |||
| IDAMAXKERNEL = iamax_vector.c | |||
| ICAMAXKERNEL = izamax_vector.c | |||
| IZAMAXKERNEL = izamax_vector.c | |||
| ISAMINKERNEL = iamin_vector.c | |||
| IDAMINKERNEL = iamin_vector.c | |||
| ICAMINKERNEL = izamin_vector.c | |||
| IZAMINKERNEL = izamin_vector.c | |||
| ISMAXKERNEL = imax_vector.c | |||
| IDMAXKERNEL = imax_vector.c | |||
| ISMINKERNEL = imin_vector.c | |||
| IDMINKERNEL = imin_vector.c | |||
| SASUMKERNEL = asum_vector.c | |||
| DASUMKERNEL = asum_vector.c | |||
| CASUMKERNEL = zasum_vector.c | |||
| ZASUMKERNEL = zasum_vector.c | |||
| SSUMKERNEL = ../arm/sum.c | |||
| DSUMKERNEL = ../arm/sum.c | |||
| CSUMKERNEL = ../arm/zsum.c | |||
| ZSUMKERNEL = ../arm/zsum.c | |||
| SAXPYKERNEL = axpy_vector.c | |||
| DAXPYKERNEL = axpy_vector.c | |||
| CAXPYKERNEL = zaxpy_vector.c | |||
| ZAXPYKERNEL = zaxpy_vector.c | |||
| SAXPBYKERNEL = axpby_vector.c | |||
| DAXPBYKERNEL = axpby_vector.c | |||
| CAXPBYKERNEL = zaxpby_vector.c | |||
| ZAXPBYKERNEL = zaxpby_vector.c | |||
| SCOPYKERNEL = copy_vector.c | |||
| DCOPYKERNEL = copy_vector.c | |||
| CCOPYKERNEL = zcopy_vector.c | |||
| ZCOPYKERNEL = zcopy_vector.c | |||
| SDOTKERNEL = dot_vector.c | |||
| DDOTKERNEL = dot_vector.c | |||
| CDOTKERNEL = zdot_vector.c | |||
| ZDOTKERNEL = zdot_vector.c | |||
| SNRM2KERNEL = nrm2_vector.c | |||
| DNRM2KERNEL = nrm2_vector.c | |||
| CNRM2KERNEL = znrm2_vector.c | |||
| ZNRM2KERNEL = znrm2_vector.c | |||
| SROTKERNEL = rot_vector.c | |||
| DROTKERNEL = rot_vector.c | |||
| CROTKERNEL = zrot_vector.c | |||
| ZROTKERNEL = zrot_vector.c | |||
| SSCALKERNEL = scal_vector.c | |||
| DSCALKERNEL = scal_vector.c | |||
| CSCALKERNEL = zscal_vector.c | |||
| ZSCALKERNEL = zscal_vector.c | |||
| SSWAPKERNEL = swap_vector.c | |||
| DSWAPKERNEL = swap_vector.c | |||
| CSWAPKERNEL = zswap_vector.c | |||
| ZSWAPKERNEL = zswap_vector.c | |||
| SGEMVNKERNEL = gemv_n_vector.c | |||
| DGEMVNKERNEL = gemv_n_vector.c | |||
| CGEMVNKERNEL = zgemv_n_vector.c | |||
| ZGEMVNKERNEL = zgemv_n_vector.c | |||
| SGEMVTKERNEL = gemv_t_vector.c | |||
| DGEMVTKERNEL = gemv_t_vector.c | |||
| CGEMVTKERNEL = zgemv_t_vector.c | |||
| ZGEMVTKERNEL = zgemv_t_vector.c | |||
| STRMMKERNEL = ../generic/trmmkernel_16x4.c | |||
| DTRMMKERNEL = ../generic/trmmkernel_8x4.c | |||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = sgemm_kernel_16x4_c910v.c | |||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = dgemm_kernel_8x4_c910v.c | |||
| ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||
| DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| SSYMV_U_KERNEL = symv_U_vector.c | |||
| SSYMV_L_KERNEL = symv_L_vector.c | |||
| DSYMV_U_KERNEL = symv_U_vector.c | |||
| DSYMV_L_KERNEL = symv_L_vector.c | |||
| CSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
| CSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
| ZSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
| ZSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
| CHEMV_L_KERNEL = zhemv_LM_vector.c | |||
| CHEMV_M_KERNEL = zhemv_LM_vector.c | |||
| CHEMV_U_KERNEL = zhemv_UV_vector.c | |||
| CHEMV_V_KERNEL = zhemv_UV_vector.c | |||
| ZHEMV_L_KERNEL = zhemv_LM_vector.c | |||
| ZHEMV_M_KERNEL = zhemv_LM_vector.c | |||
| ZHEMV_U_KERNEL = zhemv_UV_vector.c | |||
| ZHEMV_V_KERNEL = zhemv_UV_vector.c | |||
| LSAME_KERNEL = ../generic/lsame.c | |||
| SCABS_KERNEL = ../generic/cabs.c | |||
| DCABS_KERNEL = ../generic/cabs.c | |||
| QCABS_KERNEL = ../generic/cabs.c | |||
| ifndef SGEMM_BETA | |||
| SGEMM_BETA = ../generic/gemm_beta.c | |||
| endif | |||
| ifndef DGEMM_BETA | |||
| DGEMM_BETA = ../generic/gemm_beta.c | |||
| endif | |||
| ifndef CGEMM_BETA | |||
| CGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| ifndef ZGEMM_BETA | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| @@ -0,0 +1,164 @@ | |||
| SAMAXKERNEL = ../riscv64/amax.c | |||
| DAMAXKERNEL = ../riscv64/amax.c | |||
| CAMAXKERNEL = ../riscv64/zamax.c | |||
| ZAMAXKERNEL = ../riscv64/zamax.c | |||
| SAMINKERNEL = ../riscv64/amin.c | |||
| DAMINKERNEL = ../riscv64/amin.c | |||
| CAMINKERNEL = ../riscv64/zamin.c | |||
| ZAMINKERNEL = ../riscv64/zamin.c | |||
| SMAXKERNEL = ../riscv64/max.c | |||
| DMAXKERNEL = ../riscv64/max.c | |||
| SMINKERNEL = ../riscv64/min.c | |||
| DMINKERNEL = ../riscv64/min.c | |||
| ISAMAXKERNEL = ../riscv64/iamax.c | |||
| IDAMAXKERNEL = ../riscv64/iamax.c | |||
| ICAMAXKERNEL = ../riscv64/izamax.c | |||
| IZAMAXKERNEL = ../riscv64/izamax.c | |||
| ISAMINKERNEL = ../riscv64/iamin.c | |||
| IDAMINKERNEL = ../riscv64/iamin.c | |||
| ICAMINKERNEL = ../riscv64/izamin.c | |||
| IZAMINKERNEL = ../riscv64/izamin.c | |||
| ISMAXKERNEL = ../riscv64/imax.c | |||
| IDMAXKERNEL = ../riscv64/imax.c | |||
| ISMINKERNEL = ../riscv64/imin.c | |||
| IDMINKERNEL = ../riscv64/imin.c | |||
| SASUMKERNEL = ../riscv64/asum.c | |||
| DASUMKERNEL = ../riscv64/asum.c | |||
| CASUMKERNEL = ../riscv64/zasum.c | |||
| ZASUMKERNEL = ../riscv64/zasum.c | |||
| SSUMKERNEL = ../arm/sum.c | |||
| DSUMKERNEL = ../arm/sum.c | |||
| CSUMKERNEL = ../arm/zsum.c | |||
| ZSUMKERNEL = ../arm/zsum.c | |||
| SAXPYKERNEL = ../riscv64/axpy.c | |||
| DAXPYKERNEL = ../riscv64/axpy.c | |||
| CAXPYKERNEL = ../riscv64/zaxpy.c | |||
| ZAXPYKERNEL = ../riscv64/zaxpy.c | |||
| SCOPYKERNEL = ../riscv64/copy.c | |||
| DCOPYKERNEL = ../riscv64/copy.c | |||
| CCOPYKERNEL = ../riscv64/zcopy.c | |||
| ZCOPYKERNEL = ../riscv64/zcopy.c | |||
| SDOTKERNEL = ../riscv64/dot.c | |||
| DDOTKERNEL = ../riscv64/dot.c | |||
| CDOTKERNEL = ../riscv64/zdot.c | |||
| ZDOTKERNEL = ../riscv64/zdot.c | |||
| SNRM2KERNEL = ../riscv64/nrm2.c | |||
| DNRM2KERNEL = ../riscv64/nrm2.c | |||
| CNRM2KERNEL = ../riscv64/znrm2.c | |||
| ZNRM2KERNEL = ../riscv64/znrm2.c | |||
| SROTKERNEL = ../riscv64/rot.c | |||
| DROTKERNEL = ../riscv64/rot.c | |||
| CROTKERNEL = ../riscv64/zrot.c | |||
| ZROTKERNEL = ../riscv64/zrot.c | |||
| SSCALKERNEL = ../riscv64/scal.c | |||
| DSCALKERNEL = ../riscv64/scal.c | |||
| CSCALKERNEL = ../riscv64/zscal.c | |||
| ZSCALKERNEL = ../riscv64/zscal.c | |||
| SSWAPKERNEL = ../riscv64/swap.c | |||
| DSWAPKERNEL = ../riscv64/swap.c | |||
| CSWAPKERNEL = ../riscv64/zswap.c | |||
| ZSWAPKERNEL = ../riscv64/zswap.c | |||
| SGEMVNKERNEL = ../riscv64/gemv_n.c | |||
| DGEMVNKERNEL = ../riscv64/gemv_n.c | |||
| CGEMVNKERNEL = ../riscv64/zgemv_n.c | |||
| ZGEMVNKERNEL = ../riscv64/zgemv_n.c | |||
| SGEMVTKERNEL = ../riscv64/gemv_t.c | |||
| DGEMVTKERNEL = ../riscv64/gemv_t.c | |||
| CGEMVTKERNEL = ../riscv64/zgemv_t.c | |||
| ZGEMVTKERNEL = ../riscv64/zgemv_t.c | |||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| SSYMV_U_KERNEL = ../generic/symv_k.c | |||
| SSYMV_L_KERNEL = ../generic/symv_k.c | |||
| DSYMV_U_KERNEL = ../generic/symv_k.c | |||
| DSYMV_L_KERNEL = ../generic/symv_k.c | |||
| CSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
| CSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
| ZSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
| ZSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
| LSAME_KERNEL = ../generic/lsame.c | |||
| SCABS_KERNEL = ../generic/cabs.c | |||
| DCABS_KERNEL = ../generic/cabs.c | |||
| QCABS_KERNEL = ../generic/cabs.c | |||
| ifndef SGEMM_BETA | |||
| SGEMM_BETA = ../generic/gemm_beta.c | |||
| endif | |||
| ifndef DGEMM_BETA | |||
| DGEMM_BETA = ../generic/gemm_beta.c | |||
| endif | |||
| ifndef CGEMM_BETA | |||
| CGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| ifndef ZGEMM_BETA | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| endif | |||
| @@ -0,0 +1,75 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(maxf); | |||
| maxf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( ABS(x[ix]) > maxf ) | |||
| { | |||
| maxf = ABS(x[ix]); | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(maxf); | |||
| } | |||
| @@ -0,0 +1,245 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(maxf); | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_max; | |||
| MASK_T mask0, mask1; | |||
| FLOAT zero = 0.0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| v_max = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_max = VFMAXVV_FLOAT(v_max, v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| //v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_max = VFMAXVV_FLOAT(v_max, v1, gvl); | |||
| j += gvl*2; | |||
| } | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v_max, v0, gvl); | |||
| maxf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v1 = VFMVVF_FLOAT(0, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] > maxf) | |||
| maxf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| v_max = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_max = VFMAXVV_FLOAT(v_max, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| //v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_max = VFMAXVV_FLOAT(v_max, v1, gvl); | |||
| j += gvl*2; | |||
| ix += inc_xv*2; | |||
| } | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v_max, v0, gvl); | |||
| maxf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v1 = VFMVVF_FLOAT(0, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] > maxf) | |||
| maxf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(maxf); | |||
| } | |||
| @@ -0,0 +1,75 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT minf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(minf); | |||
| minf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( ABS(x[ix]) < minf ) | |||
| { | |||
| minf = ABS(x[ix]); | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(minf); | |||
| } | |||
| @@ -0,0 +1,241 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| FLOAT minf=FLT_MAX; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_min; | |||
| MASK_T mask0, mask1; | |||
| FLOAT zero = 0.0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_min = VFMINVV_FLOAT(v_min, v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| //v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_min = VFMINVV_FLOAT(v_min, v1, gvl); | |||
| j += gvl*2; | |||
| } | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v_min, v1, gvl); | |||
| minf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] < minf) | |||
| minf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG idx = 0, inc_xv = inc_x * gvl; | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_min = VFMINVV_FLOAT(v_min, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[idx+inc_xv], stride_x, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| //v1 = VFRSUBVF_MASK_FLOAT(v1, 0, mask1, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v_min = VFMINVV_FLOAT(v_min, v1, gvl); | |||
| j += gvl*2; | |||
| idx += inc_xv*2; | |||
| } | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v_min, v1, gvl); | |||
| minf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| //v0 = VFRSUBVF_MASK_FLOAT(v0, 0, mask0, gvl); | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(v0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] < minf) | |||
| minf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(minf); | |||
| } | |||
| @@ -0,0 +1,67 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| FLOAT sumf = 0.0; | |||
| if (n <= 0 || inc_x <= 0) return(sumf); | |||
| n *= inc_x; | |||
| while(i < n) | |||
| { | |||
| sumf += ABS(x[i]); | |||
| i += inc_x; | |||
| } | |||
| return(sumf); | |||
| } | |||
| @@ -0,0 +1,131 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDSUMVS_FLOAT vfredsumvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDSUMVS_FLOAT vfredsumvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0; | |||
| FLOAT asumf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(asumf); | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_zero,v_sum; | |||
| MASK_T mask0, mask1; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| if(gvl <= n/2){ | |||
| v_sum = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl); | |||
| asumf += v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v0 = VFREDSUMVS_FLOAT(v0, v_zero, gvl); | |||
| asumf += v0[0]; | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| if(gvl <= n/2){ | |||
| v_sum = VFMVVF_FLOAT(0, gvl); | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v1, gvl); | |||
| j += gvl * 2; | |||
| inc_xv += inc_xv * 2; | |||
| } | |||
| v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl); | |||
| asumf += v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v0 = VFREDSUMVS_FLOAT(v0, v_zero, gvl); | |||
| asumf += v0[0]; | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(asumf); | |||
| } | |||
| @@ -0,0 +1,96 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix,iy; | |||
| if ( n < 0 ) return(0); | |||
| ix = 0; | |||
| iy = 0; | |||
| if ( beta == 0.0 ) | |||
| { | |||
| if ( alpha == 0.0 ) | |||
| { | |||
| while(i < n) | |||
| { | |||
| y[iy] = 0.0 ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| while(i < n) | |||
| { | |||
| y[iy] = alpha * x[ix] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| } | |||
| } | |||
| else | |||
| { | |||
| if ( alpha == 0.0 ) | |||
| { | |||
| while(i < n) | |||
| { | |||
| y[iy] = beta * y[iy] ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| while(i < n) | |||
| { | |||
| y[iy] = alpha * x[ix] + beta * y[iy] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,378 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSEV_FLOAT vsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFMULVF_FLOAT vfmulvf_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSEV_FLOAT vsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFMULVF_FLOAT vfmulvf_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| if (n < 0) return(0); | |||
| BLASLONG i=0, j=0; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T vx0, vx1; | |||
| FLOAT_V_T vy0, vy1; | |||
| BLASLONG stride_x, stride_y, ix = 0, iy = 0; | |||
| if(beta == 0.0){ | |||
| if(alpha == 0.0){//alpha == 0 && beta == 0 | |||
| if(inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| vy0 = VFMVVF_FLOAT(0.0, gvl); | |||
| for(i=0,j=0;i<n/(gvl*2);i++){ | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy0, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vy0 = VFMVVF_FLOAT(0.0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| vy0 = VFMVVF_FLOAT(0.0, gvl); | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0;i<n/(gvl*2);i++){ | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy0, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vy0 = VFMVVF_FLOAT(0.0, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| }else{//alpha != 0 && beta == 0, y = ax | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vy1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else if(inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| vy1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| j += gvl * 2; | |||
| ix += inc_xv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vy1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{//inc_x !=1 && inc_y != 1 | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| vy1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| ix += inc_xv * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| } | |||
| }else{//beta != 0 | |||
| if(alpha == 0.0){//alpha == 0 && beta != 0; y = by | |||
| if(inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMULVF_FLOAT(vy0, beta, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| vy1 = VFMULVF_FLOAT(vy1, beta, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMULVF_FLOAT(vy0, beta, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy0 = VFMULVF_FLOAT(vy0, beta, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl); | |||
| vy1 = VFMULVF_FLOAT(vy1, beta, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| vy0 = VFMULVF_FLOAT(vy0, beta, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| }else{//alpha != 0 && beta != 0; y = ax + by | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vx1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| vy1 = VFMACCVF_FLOAT(vx1, beta, vy1,gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else if(inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| vx1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| vy1 = VFMACCVF_FLOAT(vx1, beta, vy1, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| j += gvl * 2; | |||
| ix += inc_xv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vx1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vx1, beta, vy1, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{//inc_x != 1 && inc_y != 1 | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0;i<n/(2*gvl);i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| vx1 = VFMULVF_FLOAT(vx1, alpha, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vx1, beta, vy1, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| ix += inc_xv * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vx0 = VFMULVF_FLOAT(vx0, alpha, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vx0, beta, vy0, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,64 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix,iy; | |||
| if ( n < 0 ) return(0); | |||
| if ( da == 0.0 ) return(0); | |||
| ix = 0; | |||
| iy = 0; | |||
| while(i < n) | |||
| { | |||
| y[iy] += da * x[ix] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,179 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSEV_FLOAT vsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSEV_FLOAT vsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i=0, j=0, jx=0, jy=0; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T vx0, vx1; | |||
| FLOAT_V_T vy0, vy1; | |||
| BLASLONG stride_x, stride_y; | |||
| if (n < 0) return(0); | |||
| if (da == 0.0) return(0); | |||
| if (inc_x == 1 && inc_y == 1) { | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if (gvl <= n/2) { | |||
| for (i = 0, j=0; i < n/(2*gvl); i++, j+=2*gvl) { | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| } | |||
| } | |||
| //tail | |||
| for (; j < n; ) { | |||
| gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else if (inc_y == 1) { | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| vx0 = VLSEV_FLOAT(&x[jx], stride_x, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[jx+inc_xv], stride_x, gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| j += gvl * 2; | |||
| jx += inc_xv * 2; | |||
| } | |||
| } | |||
| for (; j<n; ) { | |||
| gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLSEV_FLOAT(&y[jy], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSSEV_FLOAT(&y[jy], stride_y, vy0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vy1 = VLSEV_FLOAT(&y[jy+inc_yv], stride_y, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl); | |||
| VSSEV_FLOAT(&y[jy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| jy += inc_yv * 2; | |||
| } | |||
| } | |||
| for (; j<n; ) { | |||
| gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| vx0 = VLSEV_FLOAT(&x[jx], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[jy], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSSEV_FLOAT(&y[jy], stride_y, vy0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[jx+inc_xv], stride_x, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[jy+inc_yv], stride_y, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da, vx1, gvl); | |||
| VSSEV_FLOAT(&y[jy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| jx += inc_xv * 2; | |||
| jy += inc_yv * 2; | |||
| } | |||
| } | |||
| for (; j<n; ) { | |||
| gvl = vsetvli(n - j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da, vx0, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,59 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| if ( n < 0 ) return(0); | |||
| while(i < n) | |||
| { | |||
| y[iy] = x[ix] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,148 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VSEV_FLOAT vsev_float32xm8 | |||
| #define VSSEV_FLOAT vssev_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VSEV_FLOAT vsev_float64xm8 | |||
| #define VSSEV_FLOAT vssev_float64xm8 | |||
| #endif | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0,iy=0; | |||
| if(n < 0) return(0); | |||
| BLASLONG stride_x, stride_y; | |||
| FLOAT_V_T v0, v1, v2, v3; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| memcpy(&y[0], &x[0], n*sizeof(FLOAT)); | |||
| }else if (inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/4){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| BLASLONG gvl3 = gvl * 3; | |||
| BLASLONG inc_xv3 = inc_xv * 3; | |||
| for(i=0,j=0; i<n/(4*gvl); i++){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| VSEV_FLOAT(&y[j], v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], v1, gvl); | |||
| v2 = VLSEV_FLOAT(&x[ix+inc_xv*2], stride_x, gvl); | |||
| VSEV_FLOAT(&y[j+gvl*2], v2, gvl); | |||
| v3 = VLSEV_FLOAT(&x[ix+inc_xv3], stride_x, gvl); | |||
| VSEV_FLOAT(&y[j+gvl3], v3, gvl); | |||
| j += gvl * 4; | |||
| ix += inc_xv * 4; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| VSEV_FLOAT(&y[j], v0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/4){ | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| BLASLONG inc_yv3 = inc_yv * 3; | |||
| BLASLONG gvl3 = gvl * 3; | |||
| for(i=0,j=0; i<n/(4*gvl); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, v1, gvl); | |||
| v2 = VLEV_FLOAT(&x[j+gvl*2], gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv*2], stride_y, v2, gvl); | |||
| v3 = VLEV_FLOAT(&x[j+gvl3], gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv3], stride_y, v3, gvl); | |||
| j += gvl * 4; | |||
| iy += inc_yv * 4; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, v0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/4){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| BLASLONG inc_xv3 = inc_xv * 3; | |||
| BLASLONG inc_yv3 = inc_yv * 3; | |||
| for(i=0,j=0; i<n/(4*gvl); i++){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, v1, gvl); | |||
| v2 = VLSEV_FLOAT(&x[ix+inc_xv*2], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv*2], stride_y, v2, gvl); | |||
| v3 = VLSEV_FLOAT(&x[ix+inc_xv3], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv3], stride_y, v3, gvl); | |||
| j += gvl * 4; | |||
| ix += inc_xv * 4; | |||
| iy += inc_yv * 4; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, v0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,977 @@ | |||
| #include "common.h" | |||
| #include <riscv-vector.h> | |||
| #define KERNEL8x4_I \ | |||
| "addi t1, %[PB], 1*8 \n\t"\ | |||
| "addi t2, %[PB], 2*8 \n\t"\ | |||
| "addi t3, %[PB], 3*8 \n\t"\ | |||
| "fld ft0, (%[PB]) \n\t"\ | |||
| "fld ft1, (t1) \n\t"\ | |||
| "fld ft2, (t2) \n\t"\ | |||
| "fld ft3, (t3) \n\t"\ | |||
| "vle.v v0, (%[PA]) \n\t"\ | |||
| "addi t4, %[PA], 2*8 \n\t"\ | |||
| "addi t5, %[PA], 4*8 \n\t"\ | |||
| "vfmv.v.f v8, ft0 \n\t"\ | |||
| "addi t6, %[PA], 6*8 \n\t"\ | |||
| "addi %[PA], %[PA], 8*8 \n\t"\ | |||
| "vle.v v1, (t4) \n\t"\ | |||
| "addi t4, t4, 8*8 \n\t"\ | |||
| "vfmv.v.f v9, ft1 \n\t"\ | |||
| "vle.v v2, (t5) \n\t"\ | |||
| "addi t5, t5, 8*8 \n\t"\ | |||
| "vle.v v3, (t6) \n\t"\ | |||
| "addi t6, t6, 8*8 \n\t"\ | |||
| "vfmv.v.f v10, ft2 \n\t"\ | |||
| "addi %[PB], %[PB], 4*8 \n\t"\ | |||
| "vle.v v4, (%[PA]) \n\t"\ | |||
| "addi %[PA], %[PA], 8*8 \n\t"\ | |||
| "vfmv.v.f v11, ft3 \n\t"\ | |||
| "vfmacc.vv v16, v8, v0 \n\t"\ | |||
| "addi t1, t1, 4*8 \n\t"\ | |||
| "vle.v v5, (t4) \n\t"\ | |||
| "addi t4, t4, 8*8 \n\t"\ | |||
| "vfmacc.vv v17, v8, v1 \n\t"\ | |||
| "addi t2, t2, 4*8 \n\t"\ | |||
| "vle.v v6, (t5) \n\t"\ | |||
| "addi t5, t5, 8*8 \n\t"\ | |||
| "vfmacc.vv v18, v8, v2 \n\t"\ | |||
| "addi t3, t3, 4*8 \n\t"\ | |||
| "vle.v v7, (t6) \n\t"\ | |||
| "addi t6, t6, 8*8 \n\t"\ | |||
| "vfmacc.vv v19, v8, v3 \n\t"\ | |||
| "fld ft4, (%[PB]) \n\t"\ | |||
| "vfmacc.vv v20, v9, v0 \n\t"\ | |||
| "fld ft5, (t1) \n\t"\ | |||
| "vfmacc.vv v21, v9, v1 \n\t"\ | |||
| "fld ft6, (t2) \n\t"\ | |||
| "vfmacc.vv v22, v9, v2 \n\t"\ | |||
| "fld ft7, (t3) \n\t"\ | |||
| "vfmacc.vv v23, v9, v3 \n\t"\ | |||
| "vfmv.v.f v12, ft4 \n\t"\ | |||
| "vfmacc.vv v24, v10, v0 \n\t"\ | |||
| "vfmv.v.f v13, ft5 \n\t"\ | |||
| "vfmacc.vv v25, v10, v1 \n\t"\ | |||
| "vfmv.v.f v14, ft6 \n\t"\ | |||
| "vfmacc.vv v26, v10, v2 \n\t"\ | |||
| "vfmv.v.f v15, ft7 \n\t"\ | |||
| "vfmacc.vv v27, v10, v3 \n\t"\ | |||
| "addi %[PB], %[PB], 4*8 \n\t"\ | |||
| "vfmacc.vv v28, v11, v0 \n\t"\ | |||
| "addi t1, t1, 4*8 \n\t"\ | |||
| "vfmacc.vv v29, v11, v1 \n\t"\ | |||
| "addi t2, t2, 4*8 \n\t"\ | |||
| "vfmacc.vv v30, v11, v2 \n\t"\ | |||
| "addi t3, t3, 4*8 \n\t"\ | |||
| "vfmacc.vv v31, v11, v3 \n\t" | |||
| #define KERNEL8x4_M1 \ | |||
| "vfmacc.vv v16, v8, v0 \n\t"\ | |||
| "vle.v v4, (%[PA]) \n\t"\ | |||
| "addi %[PA], %[PA], 8*8 \n\t"\ | |||
| "vfmacc.vv v17, v8, v1 \n\t"\ | |||
| "vle.v v5, (t4) \n\t"\ | |||
| "addi t4, t4, 8*8 \n\t"\ | |||
| "vfmacc.vv v18, v8, v2 \n\t"\ | |||
| "vle.v v6, (t5) \n\t"\ | |||
| "addi t5, t5, 8*8 \n\t"\ | |||
| "vfmacc.vv v19, v8, v3 \n\t"\ | |||
| "vle.v v7, (t6) \n\t"\ | |||
| "addi t6, t6, 8*8 \n\t"\ | |||
| "vfmacc.vv v20, v9, v0 \n\t"\ | |||
| "fld ft4, (%[PB]) \n\t"\ | |||
| "vfmacc.vv v21, v9, v1 \n\t"\ | |||
| "fld ft5, (t1) \n\t"\ | |||
| "vfmacc.vv v22, v9, v2 \n\t"\ | |||
| "fld ft6, (t2) \n\t"\ | |||
| "vfmacc.vv v23, v9, v3 \n\t"\ | |||
| "fld ft7, (t3) \n\t"\ | |||
| "addi %[PB], %[PB], 4*8 \n\t"\ | |||
| "vfmacc.vv v24, v10, v0 \n\t"\ | |||
| "addi t1, t1, 4*8 \n\t"\ | |||
| "vfmacc.vv v25, v10, v1 \n\t"\ | |||
| "vfmv.v.f v12, ft4 \n\t"\ | |||
| "vfmacc.vv v26, v10, v2 \n\t"\ | |||
| "addi t2, t2, 4*8 \n\t"\ | |||
| "vfmacc.vv v27, v10, v3 \n\t"\ | |||
| "vfmv.v.f v13, ft5 \n\t"\ | |||
| "vfmacc.vv v28, v11, v0 \n\t"\ | |||
| "addi t3, t3, 4*8 \n\t"\ | |||
| "vfmacc.vv v29, v11, v1 \n\t"\ | |||
| "vfmv.v.f v14, ft6 \n\t"\ | |||
| "vfmacc.vv v30, v11, v2 \n\t"\ | |||
| "vfmacc.vv v31, v11, v3 \n\t"\ | |||
| "vfmv.v.f v15, ft7 \n\t" | |||
| #define KERNEL8x4_M2 \ | |||
| "vfmacc.vv v16, v12, v4 \n\t"\ | |||
| "vle.v v0, (%[PA]) \n\t"\ | |||
| "addi %[PA], %[PA], 8*8 \n\t"\ | |||
| "vfmacc.vv v17, v12, v5 \n\t"\ | |||
| "vle.v v1, (t4) \n\t"\ | |||
| "addi t4, t4, 8*8 \n\t"\ | |||
| "vfmacc.vv v18, v12, v6 \n\t"\ | |||
| "vle.v v2, (t5) \n\t"\ | |||
| "addi t5, t5, 8*8 \n\t"\ | |||
| "vfmacc.vv v19, v12, v7 \n\t"\ | |||
| "vle.v v3, (t6) \n\t"\ | |||
| "addi t6, t6, 8*8 \n\t"\ | |||
| "vfmacc.vv v20, v13, v4 \n\t"\ | |||
| "fld ft0, (%[PB]) \n\t"\ | |||
| "vfmacc.vv v21, v13, v5 \n\t"\ | |||
| "fld ft1, (t1) \n\t"\ | |||
| "vfmacc.vv v22, v13, v6 \n\t"\ | |||
| "fld ft2, (t2) \n\t"\ | |||
| "vfmacc.vv v23, v13, v7 \n\t"\ | |||
| "fld ft3, (t3) \n\t"\ | |||
| "addi %[PB], %[PB], 4*8 \n\t"\ | |||
| "vfmacc.vv v24, v14, v4 \n\t"\ | |||
| "addi t1, t1, 4*8 \n\t"\ | |||
| "vfmacc.vv v25, v14, v5 \n\t"\ | |||
| "vfmv.v.f v8, ft0 \n\t"\ | |||
| "vfmacc.vv v26, v14, v6 \n\t"\ | |||
| "addi t2, t2, 4*8 \n\t"\ | |||
| "vfmacc.vv v27, v14, v7 \n\t"\ | |||
| "vfmv.v.f v9, ft1 \n\t"\ | |||
| "vfmacc.vv v28, v15, v4 \n\t"\ | |||
| "addi t3, t3, 4*8 \n\t"\ | |||
| "vfmacc.vv v29, v15, v5 \n\t"\ | |||
| "vfmv.v.f v10, ft2 \n\t"\ | |||
| "vfmacc.vv v30, v15, v6 \n\t"\ | |||
| "vfmacc.vv v31, v15, v7 \n\t"\ | |||
| "vfmv.v.f v11, ft3 \n\t" | |||
| #define KERNEL8x4_E \ | |||
| "vfmacc.vv v16, v12, v4 \n\t"\ | |||
| "vfmacc.vv v17, v12, v5 \n\t"\ | |||
| "vfmacc.vv v18, v12, v6 \n\t"\ | |||
| "vfmacc.vv v19, v12, v7 \n\t"\ | |||
| "vfmacc.vv v20, v13, v4 \n\t"\ | |||
| "vfmacc.vv v21, v13, v5 \n\t"\ | |||
| "vfmacc.vv v22, v13, v6 \n\t"\ | |||
| "vfmacc.vv v23, v13, v7 \n\t"\ | |||
| "vfmacc.vv v24, v14, v4 \n\t"\ | |||
| "vfmacc.vv v25, v14, v5 \n\t"\ | |||
| "vfmacc.vv v26, v14, v6 \n\t"\ | |||
| "vfmacc.vv v27, v14, v7 \n\t"\ | |||
| "vfmacc.vv v28, v15, v4 \n\t"\ | |||
| "vfmacc.vv v29, v15, v5 \n\t"\ | |||
| "vfmacc.vv v30, v15, v6 \n\t"\ | |||
| "vfmacc.vv v31, v15, v7 \n\t" | |||
| int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc | |||
| #ifdef TRMMKERNEL | |||
| ,BLASLONG offset | |||
| #endif | |||
| ) | |||
| { | |||
| BLASLONG i,j,k; | |||
| FLOAT *C0,*C1,*C2,*C3; | |||
| FLOAT *ptrba,*ptrbb; | |||
| FLOAT loadb0,loadb1,loadb2,loadb3; | |||
| FLOAT load0,load1,load2,load3,load4,load5,load6,load7; | |||
| FLOAT res0,res1,res2,res3; | |||
| FLOAT res4,res5,res6,res7; | |||
| FLOAT res8,res9,res10,res11; | |||
| FLOAT res12,res13,res14,res15; | |||
| for (j=0; j<bn/4; j+=1){ | |||
| C0 = C; | |||
| C1 = C0+ldc; | |||
| C2 = C1+ldc; | |||
| C3 = C2+ldc; | |||
| ptrba = ba; | |||
| for(i=0; i<bm/8; i+=1){ | |||
| ptrbb = bb; | |||
| //t0 for k | |||
| //ft0-ft3,ft4-ft7,v8-v15 for B, t1-t3 for PB1-3 | |||
| //v0-v3,v4-v7 for A, t4-t6 for PA1-3 | |||
| //v16-v31 for temp C | |||
| asm volatile( | |||
| "vsetvli zero, zero, e64,m1 \n\t" | |||
| "fmv.w.x ft11, zero \n\t" | |||
| "mv t0, %[BK] \n\t" | |||
| "vfmv.v.f v16, ft11 \n\t" | |||
| "vfmv.v.f v17, ft11 \n\t" | |||
| "vfmv.v.f v18, ft11 \n\t" | |||
| "vfmv.v.f v19, ft11 \n\t" | |||
| "vfmv.v.f v20, ft11 \n\t" | |||
| "vfmv.v.f v21, ft11 \n\t" | |||
| "vfmv.v.f v22, ft11 \n\t" | |||
| "vfmv.v.f v23, ft11 \n\t" | |||
| "vfmv.v.f v24, ft11 \n\t" | |||
| "vfmv.v.f v25, ft11 \n\t" | |||
| "vfmv.v.f v26, ft11 \n\t" | |||
| "vfmv.v.f v27, ft11 \n\t" | |||
| "vfmv.v.f v28, ft11 \n\t" | |||
| "vfmv.v.f v29, ft11 \n\t" | |||
| "vfmv.v.f v30, ft11 \n\t" | |||
| "vfmv.v.f v31, ft11 \n\t" | |||
| //unloop 8 | |||
| "srli t0, %[BK], 3 \n\t" | |||
| "blez t0, M8x4_TAIL \n\t" | |||
| //preloop | |||
| KERNEL8x4_I | |||
| KERNEL8x4_M2 | |||
| KERNEL8x4_M1 | |||
| KERNEL8x4_M2 | |||
| "addi t0, t0, -1 \n\t" | |||
| "blez t0, M8x4_MAINLOOP_TAIL \n\t" | |||
| ".align 4 \n\t" | |||
| "M8x4_MAINLOOP: \n\t" | |||
| KERNEL8x4_M1 | |||
| KERNEL8x4_M2 | |||
| KERNEL8x4_M1 | |||
| KERNEL8x4_M2 | |||
| KERNEL8x4_M1 | |||
| KERNEL8x4_M2 | |||
| KERNEL8x4_M1 | |||
| KERNEL8x4_M2 | |||
| "addi t0, t0, -1 \n\t" | |||
| "bgtz t0, M8x4_MAINLOOP \n\t" | |||
| "M8x4_MAINLOOP_TAIL: \n\t" | |||
| KERNEL8x4_M1 | |||
| KERNEL8x4_M2 | |||
| KERNEL8x4_M1 | |||
| KERNEL8x4_E | |||
| //tail | |||
| "M8x4_TAIL: \n\t" | |||
| "andi t0, %[BK], 7 \n\t" | |||
| "blez t0, M8x4_SAVERESULT \n\t" | |||
| "addi t4, %[PA], 2*8 \n\t" | |||
| "addi t5, %[PA], 4*8 \n\t" | |||
| "addi t6, %[PA], 6*8 \n\t" | |||
| "addi t1, %[PB], 1*8 \n\t" | |||
| "addi t2, %[PB], 2*8 \n\t" | |||
| "addi t3, %[PB], 3*8 \n\t" | |||
| ".align 4 \n\t" | |||
| "M8x4_TAILLOOP: \n\t" | |||
| "fld ft0, (%[PB]) \n\t" | |||
| "addi %[PB], %[PB], 4*8 \n\t" | |||
| "vle.v v0, (%[PA]) \n\t" | |||
| "add %[PA], %[PA], 8*8 \n\t" | |||
| "vle.v v1, (t4) \n\t" | |||
| "addi t4, t4, 8*8 \n\t" | |||
| "vfmv.v.f v8, ft0 \n\t" | |||
| "fld ft1, (t1) \n\t" | |||
| "addi t1, t1, 4*8 \n\t" | |||
| "vle.v v2, (t5) \n\t" | |||
| "addi t5, t5, 8*8 \n\t" | |||
| "vle.v v3, (t6) \n\t" | |||
| "addi t6, t6, 8*8 \n\t" | |||
| "vfmacc.vv v16, v8, v0 \n\t" | |||
| "fld ft2, (t2) \n\t" | |||
| "addi t2, t2, 4*8 \n\t" | |||
| "vfmacc.vv v17, v8, v1 \n\t" | |||
| "vfmacc.vv v18, v8, v2 \n\t" | |||
| "vfmv.v.f v9, ft1 \n\t" | |||
| "vfmacc.vv v19, v8, v3 \n\t" | |||
| "vfmacc.vv v20, v9, v0 \n\t" | |||
| "fld ft3, (t3) \n\t" | |||
| "addi t3, t3, 4*8 \n\t" | |||
| "vfmacc.vv v21, v9, v1 \n\t" | |||
| "vfmacc.vv v22, v9, v2 \n\t" | |||
| "vfmv.v.f v10, ft2 \n\t" | |||
| "vfmacc.vv v23, v9, v3 \n\t" | |||
| "vfmv.v.f v11, ft3 \n\t" | |||
| "vfmacc.vv v24, v10, v0 \n\t" | |||
| "vfmacc.vv v25, v10, v1 \n\t" | |||
| "vfmacc.vv v26, v10, v2 \n\t" | |||
| "vfmacc.vv v27, v10, v3 \n\t" | |||
| "vfmacc.vv v28, v11, v0 \n\t" | |||
| "vfmacc.vv v29, v11, v1 \n\t" | |||
| "vfmacc.vv v30, v11, v2 \n\t" | |||
| "vfmacc.vv v31, v11, v3 \n\t" | |||
| "addi t0, t0, -1 \n\t" | |||
| "bgtz t0, M8x4_TAILLOOP \n\t" | |||
| //Save result | |||
| //load C | |||
| "M8x4_SAVERESULT: \n\t" | |||
| //use v8 to store alpha | |||
| "vfmv.v.f v8, %[ALPHA] \n\t" | |||
| "vle.v v0, (%[C0]) \n\t" | |||
| "addi t4, %[C0], 2*8 \n\t" | |||
| "vle.v v1, (%[C1]) \n\t" | |||
| "addi t5, %[C1], 2*8 \n\t" | |||
| "vle.v v2, (%[C2]) \n\t" | |||
| "addi t6, %[C2], 2*8 \n\t" | |||
| "vle.v v3, (%[C3]) \n\t" | |||
| "addi t3, %[C3], 2*8 \n\t" | |||
| //Multiply Alpha | |||
| "vfmacc.vv v0, v8, v16 \n\t" | |||
| "vle.v v4, (t4) \n\t" | |||
| "vfmacc.vv v1, v8, v20 \n\t" | |||
| "vle.v v5, (t5) \n\t" | |||
| "vfmacc.vv v2, v8, v24 \n\t" | |||
| "vle.v v6, (t6) \n\t" | |||
| "vfmacc.vv v3, v8, v28 \n\t" | |||
| "vle.v v7, (t3) \n\t" | |||
| "vfmacc.vv v4, v8, v17 \n\t" | |||
| "vse.v v0, (%[C0]) \n\t" | |||
| "add %[C0], %[C0], 4*8 \n\t" | |||
| "vfmacc.vv v5, v8, v21 \n\t" | |||
| "vse.v v1, (%[C1]) \n\t" | |||
| "add %[C1], %[C1], 4*8 \n\t" | |||
| "vfmacc.vv v6, v8, v25 \n\t" | |||
| "vse.v v2, (%[C2]) \n\t" | |||
| "add %[C2], %[C2], 4*8 \n\t" | |||
| "vfmacc.vv v7, v8, v29 \n\t" | |||
| "vse.v v3, (%[C3]) \n\t" | |||
| "add %[C3], %[C3], 4*8 \n\t" | |||
| "vle.v v0, (%[C0]) \n\t" | |||
| "vse.v v4, (t4) \n\t" | |||
| "add t4, t4, 4*8 \n\t" | |||
| "vle.v v1, (%[C1]) \n\t" | |||
| "vse.v v5, (t5) \n\t" | |||
| "add t5, t5, 4*8 \n\t" | |||
| "vle.v v2, (%[C2]) \n\t" | |||
| "vse.v v6, (t6) \n\t" | |||
| "add t6, t6, 4*8 \n\t" | |||
| "vle.v v3, (%[C3]) \n\t" | |||
| "vse.v v7, (t3) \n\t" | |||
| "add t3, t3, 4*8 \n\t" | |||
| "vfmacc.vv v0, v8, v18 \n\t" | |||
| "vle.v v4, (t4) \n\t" | |||
| "vfmacc.vv v1, v8, v22 \n\t" | |||
| "vle.v v5, (t5) \n\t" | |||
| "vfmacc.vv v2, v8, v26 \n\t" | |||
| "vle.v v6, (t6) \n\t" | |||
| "vfmacc.vv v3, v8, v30 \n\t" | |||
| "vle.v v7, (t3) \n\t" | |||
| "vfmacc.vv v4, v8, v19 \n\t" | |||
| "vse.v v0, (%[C0]) \n\t" | |||
| "add %[C0], %[C0], 4*8 \n\t" | |||
| "vfmacc.vv v5, v8, v23 \n\t" | |||
| "vse.v v1, (%[C1]) \n\t" | |||
| "add %[C1], %[C1], 4*8 \n\t" | |||
| "vfmacc.vv v6, v8, v27 \n\t" | |||
| "vse.v v2, (%[C2]) \n\t" | |||
| "add %[C2], %[C2], 4*8 \n\t" | |||
| "vfmacc.vv v7, v8, v31 \n\t" | |||
| "vse.v v3, (%[C3]) \n\t" | |||
| "add %[C3], %[C3], 4*8 \n\t" | |||
| "vse.v v4, (t4) \n\t" | |||
| "vse.v v5, (t5) \n\t" | |||
| "vse.v v6, (t6) \n\t" | |||
| "vse.v v7, (t3) \n\t" | |||
| "M8x4_END: \n\t" | |||
| :[C0]"+r"(C0),[C1]"+r"(C1),[C2]"+r"(C2),[C3]"+r"(C3), | |||
| [PA]"+r"(ptrba), [PB]"+r"(ptrbb) | |||
| :[ALPHA]"f"(alpha), [BK]"r"(bk) | |||
| :"cc", "t0", "t4","t5","t6","t3","t1","t2", | |||
| "ft11", "ft0", "ft1", "ft2","ft3","ft4", "ft5", "ft6","ft7", | |||
| "v0", "v1", "v2", "v3","v4", "v5", "v6", "v7", | |||
| "v8", "v9", "v10", "v11","v12", "v13", "v14", "v15", | |||
| "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | |||
| "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); | |||
| } | |||
| if(bm&4){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| res2 = 0; | |||
| res3 = 0; | |||
| res4 = 0; | |||
| res5 = 0; | |||
| res6 = 0; | |||
| res7 = 0; | |||
| res8 = 0; | |||
| res9 = 0; | |||
| res10 = 0; | |||
| res11 = 0; | |||
| res12 = 0; | |||
| res13 = 0; | |||
| res14 = 0; | |||
| res15 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| loadb1 = ptrbb[1]; | |||
| load0 = ptrba[0]; | |||
| load1 = ptrba[1]; | |||
| load2 = ptrba[2]; | |||
| load3 = ptrba[3]; | |||
| res0 = res0 + load0 * loadb0; | |||
| res1 = res1 + load1 * loadb0; | |||
| res2 = res2 + load2 * loadb0; | |||
| res3 = res3 + load3 * loadb0; | |||
| res4 = res4 + load0 * loadb1; | |||
| res5 = res5 + load1 * loadb1; | |||
| res6 = res6 + load2 * loadb1; | |||
| res7 = res7 + load3 * loadb1; | |||
| loadb2 = ptrbb[2]; | |||
| loadb3 = ptrbb[3]; | |||
| res8 = res8 + load0 * loadb2; | |||
| res9 = res9 + load1 * loadb2; | |||
| res10 = res10 + load2 * loadb2; | |||
| res11 = res11 + load3 * loadb2; | |||
| res12 = res12 + load0 * loadb3; | |||
| res13 = res13 + load1 * loadb3; | |||
| res14 = res14 + load2 * loadb3; | |||
| res15 = res15 + load3 * loadb3; | |||
| ptrba += 4; | |||
| ptrbb += 4; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| res2 = res2 * alpha; | |||
| res3 = res3 * alpha; | |||
| res4 = res4 * alpha; | |||
| res5 = res5 * alpha; | |||
| res6 = res6 * alpha; | |||
| res7 = res7 * alpha; | |||
| res8 = res8 * alpha; | |||
| res9 = res9 * alpha; | |||
| res10 = res10 * alpha; | |||
| res11 = res11 * alpha; | |||
| res12 = res12 * alpha; | |||
| res13 = res13 * alpha; | |||
| res14 = res14 * alpha; | |||
| res15 = res15 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C0[2] += res2; | |||
| C0[3] += res3; | |||
| C1[0] += res4; | |||
| C1[1] += res5; | |||
| C1[2] += res6; | |||
| C1[3] += res7; | |||
| C2[0] += res8; | |||
| C2[1] += res9; | |||
| C2[2] += res10; | |||
| C2[3] += res11; | |||
| C3[0] += res12; | |||
| C3[1] += res13; | |||
| C3[2] += res14; | |||
| C3[3] += res15; | |||
| C0 += 4; | |||
| C1 += 4; | |||
| C2 += 4; | |||
| C3 += 4; | |||
| } | |||
| if(bm&2){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| res4 = 0; | |||
| res5 = 0; | |||
| res8 = 0; | |||
| res9 = 0; | |||
| res12 = 0; | |||
| res13 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| loadb1 = ptrbb[1]; | |||
| load0 = ptrba[0]; | |||
| load1 = ptrba[1]; | |||
| res0 = res0 + load0 * loadb0; | |||
| res1 = res1 + load1 * loadb0; | |||
| res4 = res4 + load0 * loadb1; | |||
| res5 = res5 + load1 * loadb1; | |||
| loadb2 = ptrbb[2]; | |||
| loadb3 = ptrbb[3]; | |||
| res8 = res8 + load0 * loadb2; | |||
| res9 = res9 + load1 * loadb2; | |||
| res12 = res12 + load0 * loadb3; | |||
| res13 = res13 + load1 * loadb3; | |||
| ptrba += 2; | |||
| ptrbb += 4; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| res4 = res4 * alpha; | |||
| res5 = res5 * alpha; | |||
| res8 = res8 * alpha; | |||
| res9 = res9 * alpha; | |||
| res12 = res12 * alpha; | |||
| res13 = res13 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C1[0] += res4; | |||
| C1[1] += res5; | |||
| C2[0] += res8; | |||
| C2[1] += res9; | |||
| C3[0] += res12; | |||
| C3[1] += res13; | |||
| C0 += 2; | |||
| C1 += 2; | |||
| C2 += 2; | |||
| C3 += 2; | |||
| } | |||
| if(bm&1){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res4 = 0; | |||
| res8 = 0; | |||
| res12 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| loadb1 = ptrbb[1]; | |||
| load0 = ptrba[0]; | |||
| res0 = res0 + load0 * loadb0; | |||
| res4 = res4 + load0 * loadb1; | |||
| loadb2 = ptrbb[2]; | |||
| loadb3 = ptrbb[3]; | |||
| res8 = res8 + load0 * loadb2; | |||
| res12 = res12 + load0 * loadb3; | |||
| ptrba += 1; | |||
| ptrbb += 4; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res4 = res4 * alpha; | |||
| res8 = res8 * alpha; | |||
| res12 = res12 * alpha; | |||
| C0[0] += res0; | |||
| C1[0] += res4; | |||
| C2[0] += res8; | |||
| C3[0] += res12; | |||
| C0 += 1; | |||
| C1 += 1; | |||
| C2 += 1; | |||
| C3 += 1; | |||
| } | |||
| k = bk<<2; | |||
| bb = bb+k; | |||
| i = ldc<<2; | |||
| C = C+i; | |||
| } | |||
| if(bn&2){ | |||
| C0 = C; | |||
| C1 = C0+ldc; | |||
| ptrba = ba; | |||
| for(i=0; i<bm/8; i+=1){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| res2 = 0; | |||
| res3 = 0; | |||
| res4 = 0; | |||
| res5 = 0; | |||
| res6 = 0; | |||
| res7 = 0; | |||
| res8 = 0; | |||
| res9 = 0; | |||
| res10 = 0; | |||
| res11 = 0; | |||
| res12 = 0; | |||
| res13 = 0; | |||
| res14 = 0; | |||
| res15 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| loadb1 = ptrbb[1]; | |||
| load0 = ptrba[0]; | |||
| load1 = ptrba[1]; | |||
| load2 = ptrba[2]; | |||
| load3 = ptrba[3]; | |||
| load4 = ptrba[4]; | |||
| load5 = ptrba[5]; | |||
| load6 = ptrba[6]; | |||
| load7 = ptrba[7]; | |||
| res0 = res0 + load0 * loadb0; | |||
| res1 = res1 + load1 * loadb0; | |||
| res2 = res2 + load2 * loadb0; | |||
| res3 = res3 + load3 * loadb0; | |||
| res4 = res4 + load4 * loadb0; | |||
| res5 = res5 + load5 * loadb0; | |||
| res6 = res6 + load6 * loadb0; | |||
| res7 = res7 + load7 * loadb0; | |||
| res8 = res8 + load0 * loadb1; | |||
| res9 = res9 + load1 * loadb1; | |||
| res10 = res10 + load2 * loadb1; | |||
| res11 = res11 + load3 * loadb1; | |||
| res12 = res12 + load4 * loadb1; | |||
| res13 = res13 + load5 * loadb1; | |||
| res14 = res14 + load6 * loadb1; | |||
| res15 = res15 + load7 * loadb1; | |||
| ptrba += 8; | |||
| ptrbb += 2; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| res2 = res2 * alpha; | |||
| res3 = res3 * alpha; | |||
| res4 = res4 * alpha; | |||
| res5 = res5 * alpha; | |||
| res6 = res6 * alpha; | |||
| res7 = res7 * alpha; | |||
| res8 = res8 * alpha; | |||
| res9 = res9 * alpha; | |||
| res10 = res10 * alpha; | |||
| res11 = res11 * alpha; | |||
| res12 = res12 * alpha; | |||
| res13 = res13 * alpha; | |||
| res14 = res14 * alpha; | |||
| res15 = res15 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C0[2] += res2; | |||
| C0[3] += res3; | |||
| C0[4] += res4; | |||
| C0[5] += res5; | |||
| C0[6] += res6; | |||
| C0[7] += res7; | |||
| C1[0] += res8; | |||
| C1[1] += res9; | |||
| C1[2] += res10; | |||
| C1[3] += res11; | |||
| C1[4] += res12; | |||
| C1[5] += res13; | |||
| C1[6] += res14; | |||
| C1[7] += res15; | |||
| C0 += 8; | |||
| C1 += 8; | |||
| } | |||
| if(bm&4){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| res2 = 0; | |||
| res3 = 0; | |||
| res8 = 0; | |||
| res9 = 0; | |||
| res10 = 0; | |||
| res11 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| loadb1 = ptrbb[1]; | |||
| load0 = ptrba[0]; | |||
| load1 = ptrba[1]; | |||
| load2 = ptrba[2]; | |||
| load3 = ptrba[3]; | |||
| res0 = res0 + load0 * loadb0; | |||
| res1 = res1 + load1 * loadb0; | |||
| res2 = res2 + load2 * loadb0; | |||
| res3 = res3 + load3 * loadb0; | |||
| res8 = res8 + load0 * loadb1; | |||
| res9 = res9 + load1 * loadb1; | |||
| res10 = res10 + load2 * loadb1; | |||
| res11 = res11 + load3 * loadb1; | |||
| ptrba += 4; | |||
| ptrbb += 2; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| res2 = res2 * alpha; | |||
| res3 = res3 * alpha; | |||
| res8 = res8 * alpha; | |||
| res9 = res9 * alpha; | |||
| res10 = res10 * alpha; | |||
| res11 = res11 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C0[2] += res2; | |||
| C0[3] += res3; | |||
| C1[0] += res8; | |||
| C1[1] += res9; | |||
| C1[2] += res10; | |||
| C1[3] += res11; | |||
| C0 += 4; | |||
| C1 += 4; | |||
| } | |||
| if(bm&2){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| res8 = 0; | |||
| res9 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| loadb1 = ptrbb[1]; | |||
| load0 = ptrba[0]; | |||
| load1 = ptrba[1]; | |||
| res0 = res0 + load0 * loadb0; | |||
| res1 = res1 + load1 * loadb0; | |||
| res8 = res8 + load0 * loadb1; | |||
| res9 = res9 + load1 * loadb1; | |||
| ptrba += 2; | |||
| ptrbb += 2; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| res8 = res8 * alpha; | |||
| res9 = res9 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C1[0] += res8; | |||
| C1[1] += res9; | |||
| C0 += 2; | |||
| C1 += 2; | |||
| } | |||
| if(bm&1){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res8 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| loadb1 = ptrbb[1]; | |||
| load0 = ptrba[0]; | |||
| res0 = res0 + load0 * loadb0; | |||
| res8 = res8 + load0 * loadb1; | |||
| ptrba += 1; | |||
| ptrbb += 2; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res8 = res8 * alpha; | |||
| C0[0] += res0; | |||
| C1[0] += res8; | |||
| C0 += 1; | |||
| C1 += 1; | |||
| } | |||
| k = bk<<1; | |||
| bb = bb+k; | |||
| i = ldc<<1; | |||
| C = C+i; | |||
| } | |||
| if (bn&1){ | |||
| C0 = C; | |||
| ptrba = ba; | |||
| for(i=0; i<bm/8; i+=1){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| res2 = 0; | |||
| res3 = 0; | |||
| res4 = 0; | |||
| res5 = 0; | |||
| res6 = 0; | |||
| res7 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| res0 = res0 + ptrba[0] * loadb0; | |||
| res1 = res1 + ptrba[1] * loadb0; | |||
| res2 = res2 + ptrba[2] * loadb0; | |||
| res3 = res3 + ptrba[3] * loadb0; | |||
| res4 = res4 + ptrba[4] * loadb0; | |||
| res5 = res5 + ptrba[5] * loadb0; | |||
| res6 = res6 + ptrba[6] * loadb0; | |||
| res7 = res7 + ptrba[7] * loadb0; | |||
| ptrba += 8; | |||
| ptrbb += 1; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| res2 = res2 * alpha; | |||
| res3 = res3 * alpha; | |||
| res4 = res4 * alpha; | |||
| res5 = res5 * alpha; | |||
| res6 = res6 * alpha; | |||
| res7 = res7 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C0[2] += res2; | |||
| C0[3] += res3; | |||
| C0[4] += res4; | |||
| C0[5] += res5; | |||
| C0[6] += res6; | |||
| C0[7] += res7; | |||
| C0 += 8; | |||
| } | |||
| if(bm&4){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| res2 = 0; | |||
| res3 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| res0 = res0 + ptrba[0] * loadb0; | |||
| res1 = res1 + ptrba[1] * loadb0; | |||
| res2 = res2 + ptrba[2] * loadb0; | |||
| res3 = res3 + ptrba[3] * loadb0; | |||
| ptrba += 4; | |||
| ptrbb += 1; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| res2 = res2 * alpha; | |||
| res3 = res3 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C0[2] += res2; | |||
| C0[3] += res3; | |||
| C0 += 4; | |||
| } | |||
| if(bm&2){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| res1 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| res0 = res0 + ptrba[0] * loadb0; | |||
| res1 = res1 + ptrba[1] * loadb0; | |||
| ptrba += 2; | |||
| ptrbb += 1; | |||
| } | |||
| res0 = res0 * alpha; | |||
| res1 = res1 * alpha; | |||
| C0[0] += res0; | |||
| C0[1] += res1; | |||
| C0 += 2; | |||
| } | |||
| if(bm&1){ | |||
| ptrbb = bb; | |||
| res0 = 0; | |||
| for(k=0; k<bk; k+=1){ | |||
| loadb0 = ptrbb[0]; | |||
| res0 = res0 + ptrba[0] * loadb0; | |||
| ptrba += 1; | |||
| ptrbb += 1; | |||
| } | |||
| res0 = res0 * alpha; | |||
| C0[0] += res0; | |||
| C0 += 1; | |||
| } | |||
| k = bk; | |||
| bb = bb+k; | |||
| C = C+ldc; | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,64 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #if defined(DSDOT) | |||
| double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| #else | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| #endif | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| double dot = 0.0 ; | |||
| if ( n < 0 ) return(dot); | |||
| while(i < n) | |||
| { | |||
| dot += y[iy] * x[ix] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| return(dot); | |||
| } | |||
| @@ -0,0 +1,172 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float64xm4 | |||
| #endif | |||
| #if defined(DSDOT) | |||
| double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| #else | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| #endif | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| double dot = 0.0 ; | |||
| if ( n < 0 ) return(dot); | |||
| FLOAT_V_T vr, vx, vy; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, vy, gvl); | |||
| j += gvl; | |||
| } | |||
| if(j > 0){ | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vx, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); | |||
| //vr = VFDOTVV_FLOAT(vx, vy, gvl); | |||
| vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vz, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| }else if(inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, vy, gvl); | |||
| j += gvl; | |||
| } | |||
| if(j > 0){ | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vx, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); | |||
| //vr = VFDOTVV_FLOAT(vx, vy, gvl); | |||
| vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vz, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| unsigned int stride_y = inc_y * sizeof(FLOAT); | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, vy, gvl); | |||
| j += gvl; | |||
| } | |||
| if(j > 0){ | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vx, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); | |||
| //vr = VFDOTVV_FLOAT(vx, vy, gvl); | |||
| vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vz, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| unsigned int stride_y = inc_y * sizeof(FLOAT); | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, vy, gvl); | |||
| j += gvl; | |||
| } | |||
| if(j > 0){ | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vx, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| FLOAT_V_T vz = VFMVVF_FLOAT(0, gvl); | |||
| //vr = VFDOTVV_FLOAT(vx, vy, gvl); | |||
| vr = VFMACCVV_FLOAT(vz, vx, vy, gvl); | |||
| vx = VFREDSUM_FLOAT(vr, vz, gvl); | |||
| dot += vx[0]; | |||
| } | |||
| } | |||
| return(dot); | |||
| } | |||
| @@ -0,0 +1,67 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * * 2013/09/14 Saar | |||
| * * BLASTEST float : OK | |||
| * * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * * | |||
| * **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix,iy; | |||
| BLASLONG j; | |||
| FLOAT *a_ptr; | |||
| FLOAT temp; | |||
| ix = 0; | |||
| a_ptr = a; | |||
| for (j=0; j<n; j++) | |||
| { | |||
| temp = alpha * x[ix]; | |||
| iy = 0; | |||
| for (i=0; i<m; i++) | |||
| { | |||
| y[iy] += temp * a_ptr[i]; | |||
| iy += inc_y; | |||
| } | |||
| a_ptr += lda; | |||
| ix += inc_x; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,146 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSEV_FLOAT vsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSEV_FLOAT vsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i = 0, j = 0, k = 0; | |||
| BLASLONG ix = 0, iy = 0; | |||
| if(n < 0) return(0); | |||
| FLOAT *a_ptr = a; | |||
| FLOAT temp = 0.0; | |||
| FLOAT_V_T va0, va1, vy0, vy1; | |||
| unsigned int gvl = 0; | |||
| if(inc_y == 1){ | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= m/2){ | |||
| for(k=0,j=0; k<m/(2*gvl); k++){ | |||
| a_ptr = a; | |||
| ix = 0; | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| for(i = 0; i < n; i++){ | |||
| temp = alpha * x[ix]; | |||
| va0 = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl); | |||
| va1 = VLEV_FLOAT(&a_ptr[j+gvl], gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp, va1, gvl); | |||
| a_ptr += lda; | |||
| ix += inc_x; | |||
| } | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vy1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| } | |||
| //tail | |||
| for(;j < m;){ | |||
| gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M); | |||
| a_ptr = a; | |||
| ix = 0; | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| for(i = 0; i < n; i++){ | |||
| temp = alpha * x[ix]; | |||
| va0 = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl); | |||
| a_ptr += lda; | |||
| ix += inc_x; | |||
| } | |||
| VSEV_FLOAT(&y[j], vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| BLASLONG stride_y = inc_y * sizeof(FLOAT); | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= m/2){ | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(k=0,j=0; k<m/(2*gvl); k++){ | |||
| a_ptr = a; | |||
| ix = 0; | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl); | |||
| for(i = 0; i < n; i++){ | |||
| temp = alpha * x[ix]; | |||
| va0 = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl); | |||
| va1 = VLEV_FLOAT(&a_ptr[j+gvl], gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp, va1, gvl); | |||
| a_ptr += lda; | |||
| ix += inc_x; | |||
| } | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| //tail | |||
| for(;j < m;){ | |||
| gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M); | |||
| a_ptr = a; | |||
| ix = 0; | |||
| vy0 = VLSEV_FLOAT(&y[j*inc_y], stride_y, gvl); | |||
| for(i = 0; i < n; i++){ | |||
| temp = alpha * x[ix]; | |||
| va0 = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp, va0, gvl); | |||
| a_ptr += lda; | |||
| ix += inc_x; | |||
| } | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,68 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * * 2013/09/14 Saar | |||
| * * BLASTEST float : OK | |||
| * * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * * | |||
| * **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix,iy; | |||
| BLASLONG j; | |||
| FLOAT *a_ptr; | |||
| FLOAT temp; | |||
| iy = 0; | |||
| a_ptr = a; | |||
| for (j=0; j<n; j++) | |||
| { | |||
| temp = 0.0; | |||
| ix = 0; | |||
| for (i=0; i<m; i++) | |||
| { | |||
| temp += a_ptr[i] * x[ix]; | |||
| ix += inc_x; | |||
| } | |||
| y[iy] += alpha * temp; | |||
| iy += inc_y; | |||
| a_ptr += lda; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,126 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float32xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float64xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i = 0, j = 0, k = 0; | |||
| BLASLONG ix = 0, iy = 0; | |||
| FLOAT *a_ptr = a; | |||
| FLOAT temp; | |||
| FLOAT_V_T va, vr, vx; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1){ | |||
| for(i = 0; i < n; i++){ | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| j = 0; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < m/gvl; k++){ | |||
| va = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, va, vx, gvl); | |||
| j += gvl; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp = va[0]; | |||
| if(j < m){ | |||
| gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M); | |||
| va = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vr = VFMULVV_FLOAT(va, vx, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp += va[0]; | |||
| } | |||
| y[iy] += alpha * temp; | |||
| iy += inc_y; | |||
| a_ptr += lda; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| for(i = 0; i < n; i++){ | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| j = 0; | |||
| ix = 0; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < m/gvl; k++){ | |||
| va = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, va, vx, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp = va[0]; | |||
| if(j < m){ | |||
| gvl = vsetvli(m-j, RVV_EFLOAT, RVV_M); | |||
| va = VLEV_FLOAT(&a_ptr[j], gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMULVV_FLOAT(va, vx, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp += va[0]; | |||
| } | |||
| y[iy] += alpha * temp; | |||
| iy += inc_y; | |||
| a_ptr += lda; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,77 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf=0.0; | |||
| BLASLONG max=0; | |||
| if (n <= 0 || inc_x <= 0) return(max); | |||
| maxf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( ABS(x[ix]) > maxf ) | |||
| { | |||
| max = i; | |||
| maxf = ABS(x[ix]); | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(max+1); | |||
| } | |||
| @@ -0,0 +1,191 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float64xm8 | |||
| #define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 | |||
| #define VMFIRSTM vmfirstm_e64xm8 | |||
| #define UINT_V_T uint64xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint64xm8 | |||
| #define VIDV_UINT vidv_uint64xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 | |||
| #define VADDVX_UINT vaddvx_uint64xm8 | |||
| #define VMVVX_UINT vmvvx_uint64xm8 | |||
| #else | |||
| #define ABS fabsf | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float32xm8 | |||
| #define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 | |||
| #define VMFIRSTM vmfirstm_e32xm8 | |||
| #define UINT_V_T uint32xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint32xm8 | |||
| #define VIDV_UINT vidv_uint32xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 | |||
| #define VADDVX_UINT vaddvx_uint32xm8 | |||
| #define VMVVX_UINT vmvvx_uint32xm8 | |||
| #endif | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| FLOAT maxf=0.0; | |||
| unsigned int max_index = 0; | |||
| if (n <= 0 || inc_x <= 0) return(max_index); | |||
| FLOAT_V_T vx, v_max; | |||
| UINT_V_T v_max_index; | |||
| MASK_T mask; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_max_index = VMVVX_UINT(0, gvl); | |||
| v_max = VFMVVF_FLOAT(-1, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| //index where element greater than v_max | |||
| mask = VMFLTVV_FLOAT(v_max, vx, gvl); | |||
| v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); | |||
| v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); | |||
| //update v_max and start_index j | |||
| v_max = VFMAXVV_FLOAT(v_max, vx, gvl); | |||
| j += gvl; | |||
| } | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| maxf = vx[0]; | |||
| mask = VMFGEVF_FLOAT(v_max, maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| FLOAT cur_maxf = vx[0]; | |||
| if(cur_maxf > maxf){ | |||
| //tail index | |||
| v_max_index = VIDV_UINT(gvl); | |||
| v_max_index = VADDVX_UINT(v_max_index, j, gvl); | |||
| mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| } | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| unsigned int idx = 0, inc_v = gvl * inc_x; | |||
| v_max_index = VMVVX_UINT(0, gvl); | |||
| v_max = VFMVVF_FLOAT(-1, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| //index where element greater than v_max | |||
| mask = VMFLTVV_FLOAT(v_max, vx, gvl); | |||
| v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); | |||
| v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); | |||
| //update v_max and start_index j | |||
| v_max = VFMAXVV_FLOAT(v_max, vx, gvl); | |||
| j += gvl; | |||
| idx += inc_v; | |||
| } | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| maxf = vx[0]; | |||
| mask = VMFGEVF_FLOAT(v_max, maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| v_max = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| vx = VFMVVF_FLOAT(0, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| FLOAT cur_maxf = vx[0]; | |||
| if(cur_maxf > maxf){ | |||
| //tail index | |||
| v_max_index = VIDV_UINT(gvl); | |||
| v_max_index = VADDVX_UINT(v_max_index, j, gvl); | |||
| mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| } | |||
| } | |||
| } | |||
| return(max_index+1); | |||
| } | |||
| @@ -0,0 +1,77 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT minf=0.0; | |||
| BLASLONG min=0; | |||
| if (n <= 0 || inc_x <= 0) return(min); | |||
| minf=ABS(x[0]); | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( ABS(x[ix]) < ABS(minf) ) | |||
| { | |||
| min = i; | |||
| minf = ABS(x[ix]); | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(min+1); | |||
| } | |||
| @@ -0,0 +1,192 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float64xm8 | |||
| #define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 | |||
| #define VMFIRSTM vmfirstm_e64xm8 | |||
| #define UINT_V_T uint64xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint64xm8 | |||
| #define VIDV_UINT vidv_uint64xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 | |||
| #define VADDVX_UINT vaddvx_uint64xm8 | |||
| #define VMVVX_UINT vmvvx_uint64xm8 | |||
| #else | |||
| #define ABS fabsf | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float32xm8 | |||
| #define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 | |||
| #define VMFIRSTM vmfirstm_e32xm8 | |||
| #define UINT_V_T uint32xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint32xm8 | |||
| #define VIDV_UINT vidv_uint32xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 | |||
| #define VADDVX_UINT vaddvx_uint32xm8 | |||
| #define VMVVX_UINT vmvvx_uint32xm8 | |||
| #endif | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| FLOAT minf=FLT_MAX; | |||
| unsigned int min_index = 0; | |||
| if (n <= 0 || inc_x <= 0) return(min_index); | |||
| FLOAT_V_T vx, v_min; | |||
| UINT_V_T v_min_index; | |||
| MASK_T mask; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v_min_index = VMVVX_UINT(0, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| //index where element less than v_min | |||
| mask = VMFLTVV_FLOAT(vx, v_min, gvl); | |||
| v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); | |||
| v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); | |||
| //update v_min and start_index j | |||
| v_min = VFMINVV_FLOAT(v_min, vx, gvl); | |||
| j += gvl; | |||
| } | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| minf = vx[0]; | |||
| mask = VMFLEVF_FLOAT(v_min, minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| FLOAT cur_minf = vx[0]; | |||
| if(cur_minf < minf){ | |||
| //tail index | |||
| v_min_index = VIDV_UINT(gvl); | |||
| v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
| mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| } | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| unsigned int idx = 0, inc_v = gvl * inc_x; | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v_min_index = VMVVX_UINT(0, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| vx = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| //index where element less than v_min | |||
| mask = VMFLTVV_FLOAT(vx, v_min, gvl); | |||
| v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); | |||
| v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); | |||
| //update v_min and start_index j | |||
| v_min = VFMINVV_FLOAT(v_min, vx, gvl); | |||
| j += gvl; | |||
| idx += inc_v; | |||
| } | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| minf = vx[0]; | |||
| mask = VMFLEVF_FLOAT(v_min, minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(vx, 0, gvl); | |||
| v_min = VFRSUBVF_MASK_FLOAT(vx, vx, 0, mask, gvl); | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| FLOAT cur_minf = vx[0]; | |||
| if(cur_minf < minf){ | |||
| //tail index | |||
| v_min_index = VIDV_UINT(gvl); | |||
| v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
| mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| } | |||
| } | |||
| } | |||
| return(min_index+1); | |||
| } | |||
| @@ -0,0 +1,69 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf=0.0; | |||
| BLASLONG max=0; | |||
| if (n <= 0 || inc_x <= 0) return(max); | |||
| maxf=x[0]; | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( x[ix] > maxf ) | |||
| { | |||
| max = i; | |||
| maxf = x[ix]; | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(max+1); | |||
| } | |||
| @@ -0,0 +1,176 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float64xm8 | |||
| #define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 | |||
| #define VMFIRSTM vmfirstm_e64xm8 | |||
| #define UINT_V_T uint64xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint64xm8 | |||
| #define VIDV_UINT vidv_uint64xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 | |||
| #define VADDVX_UINT vaddvx_uint64xm8 | |||
| #define VMVVX_UINT vmvvx_uint64xm8 | |||
| #else | |||
| #define ABS fabsf | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float32xm8 | |||
| #define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 | |||
| #define VMFIRSTM vmfirstm_e32xm8 | |||
| #define UINT_V_T uint32xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint32xm8 | |||
| #define VIDV_UINT vidv_uint32xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 | |||
| #define VADDVX_UINT vaddvx_uint32xm8 | |||
| #define VMVVX_UINT vmvvx_uint32xm8 | |||
| #endif | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| unsigned int max_index = 0; | |||
| if (n <= 0 || inc_x <= 0) return(max_index); | |||
| FLOAT maxf=-FLT_MAX; | |||
| FLOAT_V_T vx, v_max; | |||
| UINT_V_T v_max_index; | |||
| MASK_T mask; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_max_index = VMVVX_UINT(0, gvl); | |||
| v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| //index where element greater than v_max | |||
| mask = VMFLTVV_FLOAT(v_max, vx, gvl); | |||
| v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); | |||
| v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); | |||
| //update v_max and start_index j | |||
| v_max = VFMAXVV_FLOAT(v_max, vx, gvl); | |||
| j += gvl; | |||
| } | |||
| vx = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| maxf = vx[0]; | |||
| mask = VMFGEVF_FLOAT(v_max, maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v_max = VLEV_FLOAT(&x[j], gvl); | |||
| vx = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| FLOAT cur_maxf = vx[0]; | |||
| if(cur_maxf > maxf){ | |||
| //tail index | |||
| v_max_index = VIDV_UINT(gvl); | |||
| v_max_index = VADDVX_UINT(v_max_index, j, gvl); | |||
| mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| } | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| unsigned int idx = 0, inc_v = gvl * inc_x; | |||
| v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| v_max_index = VMVVX_UINT(0, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //index where element greater than v_max | |||
| mask = VMFLTVV_FLOAT(v_max, vx, gvl); | |||
| v_max_index = VIDV_MASK_UINT(v_max_index, mask, gvl); | |||
| v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask, gvl); | |||
| //update v_max and start_index j | |||
| v_max = VFMAXVV_FLOAT(v_max, vx, gvl); | |||
| j += gvl; | |||
| idx += inc_v; | |||
| } | |||
| vx = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| maxf = vx[0]; | |||
| mask = VMFGEVF_FLOAT(v_max, maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v_max = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| vx = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| vx = VFREDMAXVS_FLOAT(v_max, vx, gvl); | |||
| FLOAT cur_maxf = vx[0]; | |||
| if(cur_maxf > maxf){ | |||
| //tail index | |||
| v_max_index = VIDV_UINT(gvl); | |||
| v_max_index = VADDVX_UINT(v_max_index, j, gvl); | |||
| mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); | |||
| max_index = VMFIRSTM(mask,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| } | |||
| } | |||
| } | |||
| return(max_index+1); | |||
| } | |||
| @@ -0,0 +1,67 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/08/19 Saar | |||
| * BLASTEST float | |||
| * BLASTEST double | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT minf=0.0; | |||
| BLASLONG min=0; | |||
| if (n <= 0 || inc_x <= 0) return(min); | |||
| minf=x[0]; | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( x[ix] < minf ) | |||
| { | |||
| min = i; | |||
| minf = x[ix]; | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(min+1); | |||
| } | |||
| @@ -0,0 +1,212 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float64xm8 | |||
| #define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 | |||
| #define VMFIRSTM vmfirstm_e64xm8 | |||
| #define UINT_V_T uint64xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint64xm8 | |||
| #define VIDV_UINT vidv_uint64xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 | |||
| #define VADDVX_UINT vaddvx_uint64xm8 | |||
| #define VMVVX_UINT vmvvx_uint64xm8 | |||
| #else | |||
| #define ABS fabsf | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float32xm8 | |||
| #define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 | |||
| #define VMFIRSTM vmfirstm_e32xm8 | |||
| #define UINT_V_T uint32xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint32xm8 | |||
| #define VIDV_UINT vidv_uint32xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 | |||
| #define VADDVX_UINT vaddvx_uint32xm8 | |||
| #define VMVVX_UINT vmvvx_uint32xm8 | |||
| #endif | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| FLOAT minf=FLT_MAX; | |||
| unsigned int min_index = 0; | |||
| if (n <= 0 || inc_x <= 0) return(min_index); | |||
| FLOAT_V_T vx, v_min; | |||
| UINT_V_T v_min_index; | |||
| MASK_T mask; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v_min_index = VMVVX_UINT(0, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| //index where element less than v_min | |||
| mask = VMFLTVV_FLOAT(vx, v_min, gvl); | |||
| v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e64,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_min_index) | |||
| :"v"(mask), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e32,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_min_index) | |||
| :"v"(mask), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); | |||
| //update v_min and start_index j | |||
| v_min = VFMINVV_FLOAT(v_min, vx, gvl); | |||
| j += gvl; | |||
| } | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| minf = vx[0]; | |||
| mask = VMFLEVF_FLOAT(v_min, minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v_min = VLEV_FLOAT(&x[j], gvl); | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| FLOAT cur_minf = vx[0]; | |||
| if(cur_minf < minf){ | |||
| //tail index | |||
| v_min_index = VIDV_UINT(gvl); | |||
| v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
| mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| } | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| unsigned int idx = 0, inc_v = gvl * inc_x; | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v_min_index = VMVVX_UINT(0, gvl); | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //index where element less than v_min | |||
| mask = VMFLTVV_FLOAT(vx, v_min, gvl); | |||
| v_min_index = VIDV_MASK_UINT(v_min_index, mask, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e64,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_min_index) | |||
| :"v"(mask), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e32,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_min_index) | |||
| :"v"(mask), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask, gvl); | |||
| //update v_min and start_index j | |||
| v_min = VFMINVV_FLOAT(v_min, vx, gvl); | |||
| j += gvl; | |||
| idx += inc_v; | |||
| } | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| minf = vx[0]; | |||
| mask = VMFLEVF_FLOAT(v_min, minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v_min = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| vx = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx = VFREDMINVS_FLOAT(v_min, vx, gvl); | |||
| FLOAT cur_minf = vx[0]; | |||
| if(cur_minf < minf){ | |||
| //tail index | |||
| v_min_index = VIDV_UINT(gvl); | |||
| v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
| mask = VMFLEVF_FLOAT(v_min, cur_minf, gvl); | |||
| min_index = VMFIRSTM(mask,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| } | |||
| } | |||
| } | |||
| return(min_index+1); | |||
| } | |||
| @@ -0,0 +1,81 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| #define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf; | |||
| BLASLONG max=0; | |||
| BLASLONG inc_x2; | |||
| if (n <= 0 || inc_x <= 0) return(max); | |||
| inc_x2 = 2 * inc_x; | |||
| maxf = CABS1(x,0); | |||
| ix += inc_x2; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( CABS1(x,ix) > maxf ) | |||
| { | |||
| max = i; | |||
| maxf = CABS1(x,ix); | |||
| } | |||
| ix += inc_x2; | |||
| i++; | |||
| } | |||
| return(max+1); | |||
| } | |||
| @@ -0,0 +1,246 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float64xm8 | |||
| #define VMFGEVF_FLOAT vmfgevf_e64xm8_float64xm8 | |||
| #define VMFIRSTM vmfirstm_e64xm8 | |||
| #define UINT_V_T uint64xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint64xm8 | |||
| #define VIDV_UINT vidv_uint64xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 | |||
| #define VADDVX_UINT vaddvx_uint64xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float64xm8 | |||
| #define VMVVX_UINT vmvvx_uint64xm8 | |||
| #else | |||
| #define ABS fabsf | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float32xm8 | |||
| #define VMFGEVF_FLOAT vmfgevf_e32xm8_float32xm8 | |||
| #define VMFIRSTM vmfirstm_e32xm8 | |||
| #define UINT_V_T uint32xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint32xm8 | |||
| #define VIDV_UINT vidv_uint32xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 | |||
| #define VADDVX_UINT vaddvx_uint32xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float32xm8 | |||
| #define VMVVX_UINT vmvvx_uint32xm8 | |||
| #endif | |||
| #define RVV_M RVV_M8 | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| FLOAT maxf=0.0; | |||
| unsigned int max_index = 0; | |||
| if (n <= 0 || inc_x <= 0) return(max_index); | |||
| FLOAT_V_T vx0, vx1, v_max; | |||
| UINT_V_T v_max_index; | |||
| MASK_T mask0, mask1; | |||
| unsigned int gvl = 0; | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_max_index = VMVVX_UINT(0, gvl); | |||
| v_max = VFMVVF_FLOAT(-1, gvl); | |||
| BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); | |||
| BLASLONG inc_xv = gvl * inc_x * 2; | |||
| BLASLONG ix = 0; | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); | |||
| vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); | |||
| vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| vx0 = VFADDVV_FLOAT(vx0, vx1, gvl); | |||
| //index where element greater than v_max | |||
| mask0 = VMFLTVV_FLOAT(v_max, vx0, gvl); | |||
| v_max_index = VIDV_MASK_UINT(v_max_index, mask0, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e64,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_max_index) | |||
| :"v"(mask0), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e32,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_max_index) | |||
| :"v"(mask0), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| v_max_index = VADDVX_MASK_UINT(v_max_index, v_max_index, j, mask0, gvl); | |||
| //update v_max and start_index j | |||
| v_max = VFMAXVV_FLOAT(v_max, vx0, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| vx0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl); | |||
| maxf = vx0[0]; | |||
| mask0 = VMFGEVF_FLOAT(v_max, maxf, gvl); | |||
| max_index = VMFIRSTM(mask0,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v_max_index = VMVVX_UINT(0, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); | |||
| vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); | |||
| vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| v_max = VFADDVV_FLOAT(vx0, vx1, gvl); | |||
| vx0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDMAXVS_FLOAT(v_max, vx0, gvl); | |||
| FLOAT cur_maxf = vx0[0]; | |||
| if(cur_maxf > maxf){ | |||
| //tail index | |||
| v_max_index = VIDV_UINT(gvl); | |||
| v_max_index = VADDVX_UINT(v_max_index, j, gvl); | |||
| mask0 = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); | |||
| max_index = VMFIRSTM(mask0,gvl); | |||
| max_index = v_max_index[max_index]; | |||
| } | |||
| } | |||
| return(max_index+1); | |||
| } | |||
| @@ -0,0 +1,81 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| #define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT minf; | |||
| BLASLONG min=0; | |||
| BLASLONG inc_x2; | |||
| if (n <= 0 || inc_x <= 0) return(min); | |||
| inc_x2 = 2 * inc_x; | |||
| minf = CABS1(x,0); | |||
| ix += inc_x2; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( CABS1(x,ix) < minf ) | |||
| { | |||
| min = i; | |||
| minf = CABS1(x,ix); | |||
| } | |||
| ix += inc_x2; | |||
| i++; | |||
| } | |||
| return(min+1); | |||
| } | |||
| @@ -0,0 +1,247 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float64xm8 | |||
| #define VMFLEVF_FLOAT vmflevf_e64xm8_float64xm8 | |||
| #define VMFIRSTM vmfirstm_e64xm8 | |||
| #define UINT_V_T uint64xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint64xm8 | |||
| #define VIDV_UINT vidv_uint64xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint64xm8 | |||
| #define VADDVX_UINT vaddvx_uint64xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float64xm8 | |||
| #define VMVVX_UINT vmvvx_uint64xm8 | |||
| #else | |||
| #define ABS fabsf | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VMFLTVV_FLOAT vmfltvv_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float32xm8 | |||
| #define VMFLEVF_FLOAT vmflevf_e32xm8_float32xm8 | |||
| #define VMFIRSTM vmfirstm_e32xm8 | |||
| #define UINT_V_T uint32xm8_t | |||
| #define VIDV_MASK_UINT vidv_mask_uint32xm8 | |||
| #define VIDV_UINT vidv_uint32xm8 | |||
| #define VADDVX_MASK_UINT vaddvx_mask_uint32xm8 | |||
| #define VADDVX_UINT vaddvx_uint32xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float32xm8 | |||
| #define VMVVX_UINT vmvvx_uint32xm8 | |||
| #endif | |||
| #define RVV_M RVV_M8 | |||
| BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| FLOAT minf=FLT_MAX; | |||
| unsigned int min_index = 0; | |||
| if (n <= 0 || inc_x <= 0) return(min_index); | |||
| FLOAT_V_T vx0, vx1, v_min; | |||
| UINT_V_T v_min_index; | |||
| MASK_T mask0, mask1; | |||
| unsigned int gvl = 0; | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_min_index = VMVVX_UINT(0, gvl); | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); | |||
| BLASLONG inc_xv = gvl * inc_x * 2; | |||
| BLASLONG ix = 0; | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); | |||
| vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); | |||
| vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| vx0 = VFADDVV_FLOAT(vx0, vx1, gvl); | |||
| //index where element less than v_min | |||
| mask0 = VMFLTVV_FLOAT(vx0, v_min, gvl); | |||
| v_min_index = VIDV_MASK_UINT(v_min_index, mask0, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e64,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_min_index) | |||
| :"v"(mask0), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1 \n\t" | |||
| "vsetvli x0, %2, e32,m8 \n\t" | |||
| "vid.v %0, v0.t \n\t" | |||
| :"+v"(v_min_index) | |||
| :"v"(mask0), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| v_min_index = VADDVX_MASK_UINT(v_min_index, v_min_index, j, mask0, gvl); | |||
| //update v_min and start_index j | |||
| v_min = VFMINVV_FLOAT(v_min, vx0, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| vx0 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl); | |||
| minf = vx0[0]; | |||
| mask0 = VMFLEVF_FLOAT(v_min, minf, gvl); | |||
| min_index = VMFIRSTM(mask0,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v_min_index = VMVVX_UINT(0, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask0 = VMFLTVF_FLOAT(vx0, 0, gvl); | |||
| vx0 = VFRSUBVF_MASK_FLOAT(vx0, vx0, 0, mask0, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx0) | |||
| :"v"(mask0), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask1 = VMFLTVF_FLOAT(vx1, 0, gvl); | |||
| vx1 = VFRSUBVF_MASK_FLOAT(vx1, vx1, 0, mask1, gvl); | |||
| /* | |||
| #if defined(DOUBLE) | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e64,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #else | |||
| asm volatile( | |||
| "vor.vv v0, %1, %1\n\t" | |||
| "vsetvli x0, %3, e32,m8 \n\t" | |||
| "vfrsub.vf %0, %0, %2, v0.t \n\t" | |||
| :"+v"(vx1) | |||
| :"v"(mask1), "f"(zero), "r"(gvl) | |||
| :"v0"); | |||
| #endif | |||
| */ | |||
| v_min = VFADDVV_FLOAT(vx0, vx1, gvl); | |||
| vx0 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| vx0 = VFREDMINVS_FLOAT(v_min, vx0, gvl); | |||
| FLOAT cur_minf = vx0[0]; | |||
| if(cur_minf < minf){ | |||
| //tail index | |||
| v_min_index = VIDV_UINT(gvl); | |||
| v_min_index = VADDVX_UINT(v_min_index, j, gvl); | |||
| mask0 = VMFLEVF_FLOAT(v_min, cur_minf, gvl); | |||
| min_index = VMFIRSTM(mask0,gvl); | |||
| min_index = v_min_index[min_index]; | |||
| } | |||
| } | |||
| return(min_index+1); | |||
| } | |||
| @@ -0,0 +1,65 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(maxf); | |||
| maxf=x[0]; | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( x[ix] > maxf ) | |||
| { | |||
| maxf = x[ix]; | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(maxf); | |||
| } | |||
| @@ -0,0 +1,116 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| FLOAT maxf=-FLT_MAX; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_max; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| v_max = VFMAXVV_FLOAT(v_max, v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| v_max = VFMAXVV_FLOAT(v_max, v1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| v1 = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v_max, v1, gvl); | |||
| maxf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| v1 = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] > maxf) | |||
| maxf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| v_max = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| BLASLONG idx = 0, inc_xv = inc_x * gvl; | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| v_max = VFMAXVV_FLOAT(v_max, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[idx+inc_xv], stride_x, gvl); | |||
| v_max = VFMAXVV_FLOAT(v_max, v1, gvl); | |||
| j += gvl * 2; | |||
| idx += inc_xv * 2; | |||
| } | |||
| v1 = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v_max, v1, gvl); | |||
| maxf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| v1 = VFMVVF_FLOAT(-FLT_MAX, gvl); | |||
| v0 = VFREDMAXVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] > maxf) | |||
| maxf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(maxf); | |||
| } | |||
| @@ -0,0 +1,65 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : NoTest | |||
| * BLASTEST double : NoTest | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT minf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(minf); | |||
| minf=x[0]; | |||
| ix += inc_x; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( x[ix] < minf ) | |||
| { | |||
| minf = x[ix]; | |||
| } | |||
| ix += inc_x; | |||
| i++; | |||
| } | |||
| return(minf); | |||
| } | |||
| @@ -0,0 +1,116 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| FLOAT minf=FLT_MAX; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_min; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| v_min = VFMINVV_FLOAT(v_min, v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| v_min = VFMINVV_FLOAT(v_min, v1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v_min, v1, gvl); | |||
| minf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] < minf) | |||
| minf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| BLASLONG idx = 0, inc_xv = inc_x * gvl; | |||
| for(i=0,j=0; i<n/(gvl*2); i++){ | |||
| v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| v_min = VFMINVV_FLOAT(v_min, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[idx+inc_xv], stride_x, gvl); | |||
| v_min = VFMINVV_FLOAT(v_min, v1, gvl); | |||
| j += gvl * 2; | |||
| idx += inc_xv * 2; | |||
| } | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v_min, v1, gvl); | |||
| minf = v0[0]; | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| v1 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v0 = VFREDMINVS_FLOAT(v0, v1, gvl); | |||
| if(v0[0] < minf) | |||
| minf = v0[0]; | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(minf); | |||
| } | |||
| @@ -0,0 +1,88 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/13 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| FLOAT scale = 0.0; | |||
| FLOAT ssq = 1.0; | |||
| FLOAT absxi = 0.0; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| if ( n == 1 ) return( ABS(x[0]) ); | |||
| n *= inc_x; | |||
| while(i < n) | |||
| { | |||
| if ( x[i] != 0.0 ) | |||
| { | |||
| absxi = ABS( x[i] ); | |||
| if ( scale < absxi ) | |||
| { | |||
| ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); | |||
| scale = absxi ; | |||
| } | |||
| else | |||
| { | |||
| ssq += ( absxi/scale ) * ( absxi/scale ); | |||
| } | |||
| } | |||
| i += inc_x; | |||
| } | |||
| scale = scale * sqrt( ssq ); | |||
| return(scale); | |||
| } | |||
| @@ -0,0 +1,220 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float32xm4 | |||
| #define ABS fabsf | |||
| #define MASK_T e32xm4_t | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4 | |||
| #define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4 | |||
| #define VMFIRSTM vmfirstm_e32xm4 | |||
| #define VFDIVVF_FLOAT vfdivvf_float32xm4 | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float64xm4 | |||
| #define ABS fabs | |||
| #define MASK_T e64xm4_t | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4 | |||
| #define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4 | |||
| #define VMFIRSTM vmfirstm_e64xm4 | |||
| #define VFDIVVF_FLOAT vfdivvf_float64xm4 | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| if ( n < 0 ) return(0.0); | |||
| if(n == 1) return (ABS(x[0])); | |||
| FLOAT_V_T vr, v0, v_zero; | |||
| unsigned int gvl = 0; | |||
| FLOAT scale = 0.0, ssq = 0.0; | |||
| MASK_T mask; | |||
| BLASLONG index = 0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0){ | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| } | |||
| }else{//found greater element | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq before current vector | |||
| ssq += vr[0]; | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| //ssq in vector vr | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| j += gvl; | |||
| } | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0) | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| }else{//found greater element | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| } | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| int idx = 0, inc_v = inc_x * gvl; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0){ | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| } | |||
| }else{//found greater element | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq before current vector | |||
| ssq += vr[0]; | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| //ssq in vector vr | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| j += gvl; | |||
| idx += inc_v; | |||
| } | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0) | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| }else{//found greater element | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| } | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| } | |||
| } | |||
| return(scale * sqrt(ssq)); | |||
| } | |||
| @@ -0,0 +1,128 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm8 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFDOTVV_FLOAT vfdotvv_float32xm8 | |||
| #define ABS fabsf | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm8 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFDOTVV_FLOAT vfdotvv_float64xm8 | |||
| #define ABS fabs | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| double len = 0.0 ; | |||
| if ( n < 0 ) return(0.0); | |||
| if(n == 1) return (ABS(x[0])); | |||
| FLOAT_V_T vr, v0, v1; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl < n/2){ | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| j += gvl; | |||
| v1 = VLEV_FLOAT(&x[j], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v1, v1, gvl); | |||
| j += gvl; | |||
| } | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| v0 = VFREDSUM_FLOAT(vr, v0, gvl); | |||
| len += v0[0]; | |||
| } | |||
| //tail | |||
| for(;j < n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| //v1 = 0 | |||
| v1 = VFMVVF_FLOAT(0, gvl); | |||
| //vr = VFDOTVV_FLOAT(v0, v0, gvl); | |||
| vr = VFMACCVV_FLOAT(v1, v0, v0, gvl); | |||
| v0 = VFREDSUM_FLOAT(vr, v1, gvl); | |||
| len += v0[0]; | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl < n/2){ | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| j += gvl; | |||
| v1 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v1, v1, gvl); | |||
| j += gvl; | |||
| } | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| v0 = VFREDSUM_FLOAT(vr, v0, gvl); | |||
| len += v0[0]; | |||
| } | |||
| //tail | |||
| for(;j < n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| //v1 = 0 | |||
| v1 = VFMVVF_FLOAT(0, gvl); | |||
| //vr = VFDOTVV_FLOAT(v0, v0, gvl); | |||
| vr = VFMACCVV_FLOAT(v1, v0, v0, gvl); | |||
| v0 = VFREDSUM_FLOAT(vr, v1, gvl); | |||
| len += v0[0]; | |||
| j += gvl; | |||
| } | |||
| } | |||
| return(sqrt(len)); | |||
| } | |||
| @@ -0,0 +1,90 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order ColMajor | |||
| * No Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| if ( alpha == 0.0 ) | |||
| { | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[j] = 0.0; | |||
| } | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| if ( alpha == 1.0 ) | |||
| { | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[j] = aptr[j]; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[j] = alpha * aptr[j]; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,89 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order ColMajor | |||
| * Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| if ( alpha == 0.0 ) | |||
| { | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| bptr = &b[i]; | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[j*ldb] = 0.0; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| if ( alpha == 1.0 ) | |||
| { | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| bptr = &b[i]; | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[j*ldb] = aptr[j]; | |||
| } | |||
| aptr += lda; | |||
| } | |||
| return(0); | |||
| } | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| bptr = &b[i]; | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[j*ldb] = alpha * aptr[j]; | |||
| } | |||
| aptr += lda; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,90 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order rowMajor | |||
| * No Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| if ( alpha == 0.0 ) | |||
| { | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[j] = 0.0; | |||
| } | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| if ( alpha == 1.0 ) | |||
| { | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[j] = aptr[j]; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[j] = alpha * aptr[j]; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,62 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order rowMajor | |||
| * Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| bptr = &b[i]; | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[j*ldb] = alpha * aptr[j]; | |||
| } | |||
| aptr += lda; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,62 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| FLOAT temp; | |||
| if ( n <= 0 ) return(0); | |||
| while(i < n) | |||
| { | |||
| temp = c*x[ix] + s*y[iy] ; | |||
| y[iy] = c*y[iy] - s*x[ix] ; | |||
| x[ix] = temp ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,196 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSEV_FLOAT vsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFMULVF_FLOAT vfmulvf_float32xm4 | |||
| #define VFMSACVF_FLOAT vfmsacvf_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSEV_FLOAT vsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFMULVF_FLOAT vfmulvf_float64xm4 | |||
| #define VFMSACVF_FLOAT vfmsacvf_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0,iy=0; | |||
| if(n <= 0) return(0); | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, vx, vy; | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSEV_FLOAT(&y[j], v1, gvl); | |||
| j += gvl; | |||
| } | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSEV_FLOAT(&y[j], v1, gvl); | |||
| } | |||
| }else if(inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSEV_FLOAT(&y[j], v1, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl); | |||
| vy = VLEV_FLOAT(&y[j], gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSSEV_FLOAT(&x[j*inc_x], stride_x, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSEV_FLOAT(&y[j], v1, gvl); | |||
| } | |||
| }else if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_y = inc_y * sizeof(FLOAT); | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v1, gvl); | |||
| j += gvl; | |||
| iy += inc_yv; | |||
| } | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLEV_FLOAT(&x[j], gvl); | |||
| vy = VLSEV_FLOAT(&y[j*inc_y],stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, v1, gvl); | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| BLASLONG stride_y = inc_y * sizeof(FLOAT); | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v1, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| } | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx = VLSEV_FLOAT(&x[j*inc_x],stride_x, gvl); | |||
| vy = VLSEV_FLOAT(&y[j*inc_y],stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vx, c, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, s, vy, gvl); | |||
| VSSEV_FLOAT(&x[j*inc_x], stride_x, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx, s, gvl); | |||
| v1 = VFMSACVF_FLOAT(v1, c, vy, gvl); | |||
| VSSEV_FLOAT(&y[j*inc_y], stride_y, v1, gvl); | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,63 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i=0,j=0; | |||
| if ( (n <= 0) || (inc_x <= 0)) | |||
| return(0); | |||
| while(j < n) | |||
| { | |||
| if ( da == 0.0 ) | |||
| x[i]=0.0; | |||
| else | |||
| x[i] = da * x[i] ; | |||
| i += inc_x ; | |||
| j++; | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,133 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VSEV_FLOAT vsev_float32xm8 | |||
| #define VSSEV_FLOAT vssev_float32xm8 | |||
| #define VFMULVF_FLOAT vfmulvf_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VSEV_FLOAT vsev_float64xm8 | |||
| #define VSSEV_FLOAT vssev_float64xm8 | |||
| #define VFMULVF_FLOAT vfmulvf_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #endif | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i=0,j=0; | |||
| if ( (n <= 0) || (inc_x <= 0)) | |||
| return(0); | |||
| FLOAT_V_T v0, v1; | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1){ | |||
| if(da == 0.0){ | |||
| memset(&x[0], 0, n * sizeof(FLOAT)); | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n / 2){ | |||
| for(i = 0, j = 0; i < n/(2*gvl); i++, j+=2*gvl){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| v0 = VFMULVF_FLOAT(v0, da,gvl); | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| v1 = VFMULVF_FLOAT(v1, da, gvl); | |||
| VSEV_FLOAT(&x[j+gvl], v1, gvl); | |||
| } | |||
| } | |||
| //tail | |||
| for(; j <n; ){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| v0 = VFMULVF_FLOAT(v0, da, gvl); | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| j += gvl; | |||
| } | |||
| } | |||
| }else{ | |||
| if(da == 0.0){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n / 2){ | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| for(i = 0, j = 0; i < n/(2*gvl); i++, j+=2*gvl){ | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| VSEV_FLOAT(&x[j+gvl], v0, gvl); | |||
| } | |||
| } | |||
| //tail | |||
| for(; j <n; ){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| VSEV_FLOAT(&x[j], v0, gvl); | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT); | |||
| BLASLONG ix = 0; | |||
| if(gvl < n / 2){ | |||
| BLASLONG inc_xv = gvl * inc_x; | |||
| for(i = 0, j = 0; i < n/(2*gvl); i++, j+=2*gvl){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| v0 = VFMULVF_FLOAT(v0, da,gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| v1 = VFMULVF_FLOAT(v1, da, gvl); | |||
| VSSEV_FLOAT(&x[ix+inc_xv], stride_x, v1, gvl); | |||
| ix += inc_xv * 2; | |||
| } | |||
| } | |||
| //tail | |||
| for(; j <n; ){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| v0 = VFMULVF_FLOAT(v0, da, gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, v0, gvl); | |||
| j += gvl; | |||
| ix += inc_x * gvl; | |||
| } | |||
| } | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,62 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/08/20 Saar | |||
| * BLASTEST float OK | |||
| * BLASTEST double OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <stdio.h> | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| FLOAT temp; | |||
| if ( n < 0 ) return(0); | |||
| while(i < n) | |||
| { | |||
| temp = x[ix] ; | |||
| x[ix] = y[iy] ; | |||
| y[iy] = temp ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,173 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <stdio.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VSEV_FLOAT vsev_float32xm8 | |||
| #define VSSEV_FLOAT vssev_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VSEV_FLOAT vsev_float64xm8 | |||
| #define VSSEV_FLOAT vssev_float64xm8 | |||
| #endif | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i = 0, j = 0; | |||
| BLASLONG ix = 0,iy = 0; | |||
| BLASLONG stride_x, stride_y; | |||
| FLOAT_V_T vx0, vx1, vy0, vy1; | |||
| unsigned int gvl = 0; | |||
| if (n < 0) return(0); | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| VSEV_FLOAT(&x[j], vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vx0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| VSEV_FLOAT(&x[j+gvl], vy1, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vx1, gvl); | |||
| j+=gvl * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| VSEV_FLOAT(&x[j], vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vx0, gvl); | |||
| j+=gvl; | |||
| } | |||
| }else if (inc_y == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vx0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| vy1 = VLEV_FLOAT(&y[j+gvl], gvl); | |||
| VSSEV_FLOAT(&x[ix+inc_xv], stride_x, vy1, gvl); | |||
| VSEV_FLOAT(&y[j+gvl], vx1, gvl); | |||
| j += gvl * 2; | |||
| ix += inc_xv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy0 = VLEV_FLOAT(&y[j], gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl); | |||
| VSEV_FLOAT(&y[j], vx0, gvl); | |||
| j += gvl; | |||
| ix += inc_x * gvl; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| VSEV_FLOAT(&x[j], vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl); | |||
| vx1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl); | |||
| VSEV_FLOAT(&x[j+gvl], vy1, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vx1, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLEV_FLOAT(&x[j], gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| VSEV_FLOAT(&x[j], vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl); | |||
| j += gvl; | |||
| iy += inc_y * gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl; | |||
| BLASLONG inc_yv = inc_y * gvl; | |||
| for(i=0,j=0; i<n/(2*gvl); i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+inc_yv], stride_y, gvl); | |||
| VSSEV_FLOAT(&x[ix+inc_xv], stride_x, vy1, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vx1, gvl); | |||
| j += gvl * 2; | |||
| ix += inc_xv * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| VSSEV_FLOAT(&x[ix], stride_x, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl); | |||
| j += gvl; | |||
| ix += inc_x * gvl; | |||
| iy += inc_y * gvl; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,70 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix,iy; | |||
| BLASLONG jx,jy; | |||
| BLASLONG j; | |||
| FLOAT temp1; | |||
| FLOAT temp2; | |||
| #if 0 | |||
| if ( m != offset ) | |||
| printf("Symv_L: m=%d offset=%d\n",m,offset); | |||
| #endif | |||
| jx = 0; | |||
| jy = 0; | |||
| for (j=0; j<offset; j++) | |||
| { | |||
| temp1 = alpha * x[jx]; | |||
| temp2 = 0.0; | |||
| y[jy] += temp1 * a[j*lda+j]; | |||
| iy = jy; | |||
| ix = jx; | |||
| for (i=j+1; i<m; i++) | |||
| { | |||
| ix += inc_x; | |||
| iy += inc_y; | |||
| y[iy] += temp1 * a[j*lda+i]; | |||
| temp2 += a[j*lda+i] * x[ix]; | |||
| } | |||
| y[jy] += alpha * temp2; | |||
| jx += inc_x; | |||
| jy += inc_y; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,265 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSEV_FLOAT vsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSEV_FLOAT vsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i, j, k; | |||
| BLASLONG ix,iy; | |||
| BLASLONG jx,jy; | |||
| FLOAT temp1; | |||
| FLOAT temp2; | |||
| FLOAT *a_ptr = a; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T va, vx, vy, vr; | |||
| BLASLONG stride_x, stride_y, inc_xv, inc_yv, len; | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| for (j=0; j<offset; j++) | |||
| { | |||
| temp1 = alpha * x[j]; | |||
| temp2 = 0.0; | |||
| y[j] += temp1 * a_ptr[j]; | |||
| i = j + 1; | |||
| len = m - i; | |||
| if(len > 0){ | |||
| gvl = vsetvli(len, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < len / gvl; k++){ | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < m){ | |||
| gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[j] += alpha * temp2; | |||
| a_ptr += lda; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| jy = 0; | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| for (j=0; j<offset; j++) | |||
| { | |||
| temp1 = alpha * x[j]; | |||
| temp2 = 0.0; | |||
| y[jy] += temp1 * a_ptr[j]; | |||
| iy = jy + inc_y; | |||
| i = j + 1; | |||
| len = m - i; | |||
| if(len > 0){ | |||
| gvl = vsetvli(len, RVV_EFLOAT, RVV_M); | |||
| inc_yv = inc_y * gvl; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < len / gvl; k++){ | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| iy += inc_yv; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < m){ | |||
| gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[jy] += alpha * temp2; | |||
| jy += inc_y; | |||
| a_ptr += lda; | |||
| } | |||
| }else if(inc_y == 1){ | |||
| jx = 0; | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| for (j=0; j<offset; j++) | |||
| { | |||
| temp1 = alpha * x[jx]; | |||
| temp2 = 0.0; | |||
| y[j] += temp1 * a_ptr[j]; | |||
| ix = jx + inc_x; | |||
| i = j + 1; | |||
| len = m - i; | |||
| if(len > 0){ | |||
| gvl = vsetvli(len, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| inc_xv = inc_x * gvl; | |||
| for(k = 0; k < len / gvl; k++){ | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < m){ | |||
| gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[j] += alpha * temp2; | |||
| jx += inc_x; | |||
| a_ptr += lda; | |||
| } | |||
| }else{ | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| jx = 0; | |||
| jy = 0; | |||
| for (j=0; j<offset; j++) | |||
| { | |||
| temp1 = alpha * x[jx]; | |||
| temp2 = 0.0; | |||
| y[jy] += temp1 * a_ptr[j]; | |||
| ix = jx + inc_x; | |||
| iy = jy + inc_y; | |||
| i = j + 1; | |||
| len = m - i; | |||
| if(len > 0){ | |||
| gvl = vsetvli(len, RVV_EFLOAT, RVV_M); | |||
| inc_xv = inc_x * gvl; | |||
| inc_yv = inc_y * gvl; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < len / gvl; k++){ | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < m){ | |||
| gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[jy] += alpha * temp2; | |||
| jx += inc_x; | |||
| jy += inc_y; | |||
| a_ptr += lda; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,71 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix,iy; | |||
| BLASLONG jx,jy; | |||
| BLASLONG j; | |||
| FLOAT temp1; | |||
| FLOAT temp2; | |||
| #if 0 | |||
| if( m != offset ) | |||
| printf("Symv_U: m=%d offset=%d\n",m,offset); | |||
| #endif | |||
| BLASLONG m1 = m - offset; | |||
| jx = m1 * inc_x; | |||
| jy = m1 * inc_y; | |||
| for (j=m1; j<m; j++) | |||
| { | |||
| temp1 = alpha * x[jx]; | |||
| temp2 = 0.0; | |||
| iy = 0; | |||
| ix = 0; | |||
| for (i=0; i<j; i++) | |||
| { | |||
| y[iy] += temp1 * a[j*lda+i]; | |||
| temp2 += a[j*lda+i] * x[ix]; | |||
| ix += inc_x; | |||
| iy += inc_y; | |||
| } | |||
| y[jy] += temp1 * a[j*lda+j] + alpha * temp2; | |||
| jx += inc_x; | |||
| jy += inc_y; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,264 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSEV_FLOAT vsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float32xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSEV_FLOAT vsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float64xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i, j, k; | |||
| BLASLONG ix,iy; | |||
| BLASLONG jx,jy; | |||
| FLOAT temp1; | |||
| FLOAT temp2; | |||
| FLOAT *a_ptr = a; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T va, vx, vy, vr; | |||
| BLASLONG stride_x, stride_y, inc_xv, inc_yv; | |||
| BLASLONG m1 = m - offset; | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| a_ptr += m1 * lda; | |||
| for (j=m1; j<m; j++) | |||
| { | |||
| temp1 = alpha * x[j]; | |||
| temp2 = 0.0; | |||
| if(j > 0){ | |||
| i = 0; | |||
| gvl = vsetvli(j, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < j / gvl; k++){ | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < j){ | |||
| gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[j] += temp1 * a_ptr[j] + alpha * temp2; | |||
| a_ptr += lda; | |||
| } | |||
| }else if(inc_x == 1){ | |||
| jy = m1 * inc_y; | |||
| a_ptr += m1 * lda; | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| for (j=m1; j<m; j++) | |||
| { | |||
| temp1 = alpha * x[j]; | |||
| temp2 = 0.0; | |||
| if(j > 0){ | |||
| iy = 0; | |||
| i = 0; | |||
| gvl = vsetvli(j, RVV_EFLOAT, RVV_M); | |||
| inc_yv = inc_y * gvl; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < j / gvl; k++){ | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| iy += inc_yv; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < j){ | |||
| gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLEV_FLOAT(&x[i], gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[jy] += temp1 * a_ptr[j] + alpha * temp2; | |||
| a_ptr += lda; | |||
| jy += inc_y; | |||
| } | |||
| }else if(inc_y == 1){ | |||
| jx = m1 * inc_x; | |||
| a_ptr += m1 * lda; | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| for (j=m1; j<m; j++) | |||
| { | |||
| temp1 = alpha * x[jx]; | |||
| temp2 = 0.0; | |||
| if(j > 0){ | |||
| ix = 0; | |||
| i = 0; | |||
| gvl = vsetvli(j, RVV_EFLOAT, RVV_M); | |||
| inc_xv = inc_x * gvl; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < j / gvl; k++){ | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < j){ | |||
| gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLEV_FLOAT(&y[i], gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSEV_FLOAT(&y[i], vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[j] += temp1 * a_ptr[j] + alpha * temp2; | |||
| a_ptr += lda; | |||
| jx += inc_x; | |||
| } | |||
| }else{ | |||
| jx = m1 * inc_x; | |||
| jy = m1 * inc_y; | |||
| a_ptr += m1 * lda; | |||
| stride_x = inc_x * sizeof(FLOAT); | |||
| stride_y = inc_y * sizeof(FLOAT); | |||
| for (j=m1; j<m; j++) | |||
| { | |||
| temp1 = alpha * x[jx]; | |||
| temp2 = 0.0; | |||
| if(j > 0){ | |||
| ix = 0; | |||
| iy = 0; | |||
| i = 0; | |||
| gvl = vsetvli(j, RVV_EFLOAT, RVV_M); | |||
| inc_xv = inc_x * gvl; | |||
| inc_yv = inc_y * gvl; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < j / gvl; k++){ | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, vx, va, gvl); | |||
| i += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| } | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 = va[0]; | |||
| if(i < j){ | |||
| gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); | |||
| vy = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| va = VLEV_FLOAT(&a_ptr[i], gvl); | |||
| vy = VFMACCVF_FLOAT(vy, temp1, va, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy, gvl); | |||
| vx = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vr = VFMULVV_FLOAT(vx, va, gvl); | |||
| va = VFMVVF_FLOAT(0, gvl); | |||
| va = VFREDSUM_FLOAT(vr, va, gvl); | |||
| temp2 += va[0]; | |||
| } | |||
| } | |||
| y[jy] += temp1 * a_ptr[j] + alpha * temp2; | |||
| a_ptr += lda; | |||
| jx += inc_x; | |||
| jy += inc_y; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,79 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| #define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf; | |||
| BLASLONG inc_x2; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| inc_x2 = 2 * inc_x; | |||
| maxf = CABS1(x,0); | |||
| ix += inc_x2; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( CABS1(x,ix) > maxf ) | |||
| { | |||
| maxf = CABS1(x,ix); | |||
| } | |||
| ix += inc_x2; | |||
| i++; | |||
| } | |||
| return(maxf); | |||
| } | |||
| @@ -0,0 +1,104 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float32xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMAXVV_FLOAT vfmaxvv_float64xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0; | |||
| FLOAT maxf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(maxf); | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_max; | |||
| MASK_T mask0, mask1; | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_max = VFMVVF_FLOAT(0, gvl); | |||
| BLASLONG inc_xv = inc_x * gvl * 2; | |||
| for(; i<n/gvl; i++){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v0 = VFADDVV_FLOAT(v0, v1, gvl); | |||
| v_max = VFMAXVV_FLOAT(v_max, v0, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| v_max = VFREDMAXVS_FLOAT(v_max, v0, gvl); | |||
| maxf = v_max[0]; | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v1 = VFADDVV_FLOAT(v0, v1, gvl); | |||
| v0 = VFMVVF_FLOAT(0, gvl); | |||
| v_max = VFREDMAXVS_FLOAT(v1, v0, gvl); | |||
| if(v_max[0] > maxf) | |||
| maxf = v_max[0]; | |||
| } | |||
| return(maxf); | |||
| } | |||
| @@ -0,0 +1,79 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : NoTest | |||
| * TEST : NoTest | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| #define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0; | |||
| FLOAT minf; | |||
| BLASLONG inc_x2; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| inc_x2 = 2 * inc_x; | |||
| minf = CABS1(x,0); | |||
| ix += inc_x2; | |||
| i++; | |||
| while(i < n) | |||
| { | |||
| if( CABS1(x,ix) < minf ) | |||
| { | |||
| minf = CABS1(x,ix); | |||
| } | |||
| ix += inc_x2; | |||
| i++; | |||
| } | |||
| return(minf); | |||
| } | |||
| @@ -0,0 +1,104 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #include <float.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float32xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDMINVS_FLOAT vfredminvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFMINVV_FLOAT vfminvv_float64xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| FLOAT minf=FLT_MAX; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_min; | |||
| MASK_T mask0, mask1; | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| v_min = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| BLASLONG inc_xv = inc_x * gvl * 2; | |||
| for(; i<n/gvl; i++){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v0 = VFADDVV_FLOAT(v0, v1, gvl); | |||
| v_min = VFMINVV_FLOAT(v_min, v0, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| v0 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v_min = VFREDMINVS_FLOAT(v_min, v0, gvl); | |||
| minf = v_min[0]; | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v1 = VFADDVV_FLOAT(v0, v1, gvl); | |||
| v0 = VFMVVF_FLOAT(FLT_MAX, gvl); | |||
| v_min = VFREDMINVS_FLOAT(v1, v0, gvl); | |||
| if(v_min[0] < minf) | |||
| minf = v_min[0]; | |||
| } | |||
| return(minf); | |||
| } | |||
| @@ -0,0 +1,72 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| #define CABS1(x,i) ABS(x[i])+ABS(x[i+1]) | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| FLOAT sumf = 0.0; | |||
| BLASLONG inc_x2; | |||
| if (n <= 0 || inc_x <= 0) return(sumf); | |||
| inc_x2 = 2 * inc_x; | |||
| n *= inc_x2; | |||
| while(i < n) | |||
| { | |||
| sumf += CABS1(x,i); | |||
| i += inc_x2; | |||
| } | |||
| return(sumf); | |||
| } | |||
| @@ -0,0 +1,136 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float32xm8_t | |||
| #define VLEV_FLOAT vlev_float32xm8 | |||
| #define VLSEV_FLOAT vlsev_float32xm8 | |||
| #define VFREDSUMVS_FLOAT vfredsumvs_float32xm8 | |||
| #define MASK_T e32xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm8_float32xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float32xm8 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M8 | |||
| #define FLOAT_V_T float64xm8_t | |||
| #define VLEV_FLOAT vlev_float64xm8 | |||
| #define VLSEV_FLOAT vlsev_float64xm8 | |||
| #define VFREDSUMVS_FLOAT vfredsumvs_float64xm8 | |||
| #define MASK_T e64xm8_t | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm8_float64xm8 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm8 | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm8 | |||
| #define VFADDVV_FLOAT vfaddvv_float64xm8 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0; | |||
| FLOAT asumf=0.0; | |||
| if (n <= 0 || inc_x <= 0) return(asumf); | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T v0, v1, v_zero,v_sum; | |||
| MASK_T mask0, mask1; | |||
| if(inc_x == 1){ | |||
| BLASLONG n2 = n * 2; | |||
| gvl = vsetvli(n2, RVV_EFLOAT, RVV_M); | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| if(gvl <= n2/2){ | |||
| v_sum = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n2/(gvl*2); i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v0, gvl); | |||
| v1 = VLEV_FLOAT(&x[j+gvl], gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v1, gvl); | |||
| j += gvl * 2; | |||
| } | |||
| v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl); | |||
| asumf += v0[0]; | |||
| } | |||
| for(;j<n2;){ | |||
| gvl = vsetvli(n2-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v0 = VFREDSUMVS_FLOAT(v0, v_zero, gvl); | |||
| asumf += v0[0]; | |||
| j += gvl; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT) * 2; | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| BLASLONG inc_xv = inc_x * 2 * gvl; | |||
| v_sum = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v_sum = VFADDVV_FLOAT(v_sum, v1, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| } | |||
| v0 = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl); | |||
| asumf += v0[0]; | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| mask0 = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask0, gvl); | |||
| v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| mask1 = VMFLTVF_FLOAT(v1, 0, gvl); | |||
| v1 = VFRSUBVF_MASK_FLOAT(v1, v1, 0, mask1, gvl); | |||
| v_sum = VFADDVV_FLOAT(v0, v1, gvl); | |||
| v_sum = VFREDSUMVS_FLOAT(v_sum, v_zero, gvl); | |||
| asumf += v_sum[0]; | |||
| } | |||
| } | |||
| return(asumf); | |||
| } | |||
| @@ -0,0 +1,118 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /*************************************************************************** | |||
| * 2014/06/07 Saar | |||
| * | |||
| ***************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i,FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix,iy; | |||
| FLOAT temp; | |||
| BLASLONG inc_x2, inc_y2; | |||
| if ( n <= 0 ) return(0); | |||
| ix = 0; | |||
| iy = 0; | |||
| inc_x2 = 2 * inc_x; | |||
| inc_y2 = 2 * inc_y; | |||
| if ( beta_r == 0.0 && beta_i == 0.0) | |||
| { | |||
| if ( alpha_r == 0.0 && alpha_i == 0.0 ) | |||
| { | |||
| while(i < n) | |||
| { | |||
| y[iy] = 0.0 ; | |||
| y[iy+1] = 0.0 ; | |||
| iy += inc_y2 ; | |||
| i++ ; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| while(i < n) | |||
| { | |||
| y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ; | |||
| y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ; | |||
| ix += inc_x2 ; | |||
| iy += inc_y2 ; | |||
| i++ ; | |||
| } | |||
| } | |||
| } | |||
| else | |||
| { | |||
| if ( alpha_r == 0.0 && alpha_i == 0.0 ) | |||
| { | |||
| while(i < n) | |||
| { | |||
| temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ; | |||
| y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ; | |||
| y[iy] = temp; | |||
| iy += inc_y2 ; | |||
| i++ ; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| while(i < n) | |||
| { | |||
| temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ; | |||
| y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ; | |||
| y[iy] = temp; | |||
| ix += inc_x2 ; | |||
| iy += inc_y2 ; | |||
| i++ ; | |||
| } | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,197 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFMULVF_FLOAT vfmulvf_float32xm4 | |||
| #define VFMSACVF_FLOAT vfmsacvf_float32xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFMULVF_FLOAT vfmulvf_float64xm4 | |||
| #define VFMSACVF_FLOAT vfmsacvf_float64xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| if (n <= 0) return(0); | |||
| BLASLONG i=0, j=0; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T vx0, vx1; | |||
| FLOAT_V_T vy0, vy1; | |||
| BLASLONG stride_x, stride_y, ix = 0, iy = 0; | |||
| stride_x = inc_x * 2 * sizeof(FLOAT); | |||
| stride_y = inc_y * 2 * sizeof(FLOAT); | |||
| if(beta_r == 0.0 && beta_i == 0.0){ | |||
| if(alpha_r == 0.0 && alpha_i == 0.0){ | |||
| if(inc_y == 1){ | |||
| memset(&y[0], 0, 2 * n * sizeof(FLOAT)); | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| if(gvl <= n/2){ | |||
| vy0 = VFMVVF_FLOAT(0.0, gvl); | |||
| BLASLONG inc_yv = inc_y * gvl * 2; | |||
| for(i=0,j=0;i<n/(gvl*2);i++){ | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1+inc_yv], stride_y, vy0, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vy0 = VFMVVF_FLOAT(0.0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy0, gvl); | |||
| j += gvl; | |||
| iy += inc_y * gvl * 2; | |||
| } | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG inc_xv = inc_x * gvl * 2; | |||
| BLASLONG inc_yv = inc_y * gvl * 2; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VFMULVF_FLOAT(vx1, alpha_i, gvl); | |||
| vy0 = VFMSACVF_FLOAT(vy0, alpha_r, vx0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| vy1 = VFMULVF_FLOAT(vx1, alpha_r, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, alpha_i, vx0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| } | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VFMULVF_FLOAT(vx1, alpha_i, gvl); | |||
| vy0 = VFMSACVF_FLOAT(vy0, alpha_r, vx0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| vy1 = VFMULVF_FLOAT(vx1, alpha_r, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, alpha_i, vx0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| } | |||
| } | |||
| }else{ | |||
| FLOAT_V_T v0, v1; | |||
| if(alpha_r == 0.0 && alpha_i == 0.0){ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG inc_yv = inc_y * gvl * 2; | |||
| for(i=0,j=0;i<n/gvl;i++){ | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vy1, beta_i, gvl); | |||
| v0 = VFMSACVF_FLOAT(v0, beta_r, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vy1, beta_r, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl); | |||
| j += gvl; | |||
| iy += inc_yv; | |||
| } | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vy1, beta_i, gvl); | |||
| v0 = VFMSACVF_FLOAT(v0, beta_r, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vy1, beta_r, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl); | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG inc_xv = inc_x * gvl * 2; | |||
| BLASLONG inc_yv = inc_y * gvl * 2; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vx0, alpha_r, gvl); | |||
| v0 = VFNMSACVF_FLOAT(v0, alpha_i, vx1, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, beta_r, vy0, gvl); | |||
| v0 = VFNMSACVF_FLOAT(v0, beta_i, vy1, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx1, alpha_r, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, alpha_i, vx0, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, beta_r, vy1, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| } | |||
| if(j<n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| v0 = VFMULVF_FLOAT(vx0, alpha_r, gvl); | |||
| v0 = VFNMSACVF_FLOAT(v0, alpha_i, vx1, gvl); | |||
| v0 = VFMACCVF_FLOAT(v0, beta_r, vy0, gvl); | |||
| v0 = VFNMSACVF_FLOAT(v0, beta_i, vy1, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, v0, gvl); | |||
| v1 = VFMULVF_FLOAT(vx1, alpha_r, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, alpha_i, vx0, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, beta_r, vy1, gvl); | |||
| v1 = VFMACCVF_FLOAT(v1, beta_i, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, v1, gvl); | |||
| } | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,74 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/15 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix,iy; | |||
| BLASLONG inc_x2; | |||
| BLASLONG inc_y2; | |||
| if ( n < 0 ) return(0); | |||
| if ( da_r == 0.0 && da_i == 0.0 ) return(0); | |||
| ix = 0; | |||
| iy = 0; | |||
| inc_x2 = 2 * inc_x; | |||
| inc_y2 = 2 * inc_y; | |||
| while(i < n) | |||
| { | |||
| #if !defined(CONJ) | |||
| y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ; | |||
| y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ; | |||
| #else | |||
| y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ; | |||
| y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ; | |||
| #endif | |||
| ix += inc_x2 ; | |||
| iy += inc_y2 ; | |||
| i++ ; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,107 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) | |||
| { | |||
| BLASLONG i = 0, j = 0; | |||
| BLASLONG ix = 0,iy = 0; | |||
| if(n < 0) return(0); | |||
| if(da_r == 0.0 && da_i == 0.0) return(0); | |||
| unsigned int gvl = 0; | |||
| BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); | |||
| BLASLONG stride_y = inc_y * 2 * sizeof(FLOAT); | |||
| FLOAT_V_T vx0, vx1, vy0, vy1; | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG inc_xv = inc_x * 2 * gvl; | |||
| BLASLONG inc_yv = inc_y * 2 * gvl; | |||
| for(i=0,j=0; i < n/gvl; i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| #if !defined(CONJ) | |||
| vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, da_i, vx1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da_r, vx1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da_i, vx1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, da_r, vx1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl); | |||
| #endif | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| } | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| #if !defined(CONJ) | |||
| vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, da_i, vx1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da_r, vx1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, da_r, vx0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, da_i, vx1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, da_r, vx1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, da_i, vx0, gvl); | |||
| #endif | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,65 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| BLASLONG inc_x2; | |||
| BLASLONG inc_y2; | |||
| if ( n < 0 ) return(0); | |||
| inc_x2 = 2 * inc_x; | |||
| inc_y2 = 2 * inc_y; | |||
| while(i < n) | |||
| { | |||
| y[iy] = x[ix] ; | |||
| y[iy+1] = x[ix+1] ; | |||
| ix += inc_x2; | |||
| iy += inc_y2; | |||
| i++ ; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,92 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i = 0, j = 0; | |||
| BLASLONG ix = 0,iy = 0; | |||
| if(n < 0) return(0); | |||
| unsigned int gvl = 0; | |||
| if(inc_x == 1 && inc_y == 1){ | |||
| memcpy(&y[0], &x[0], n * 2 * sizeof(FLOAT)); | |||
| }else{ | |||
| FLOAT_V_T vx0, vx1, vx2, vx3; | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); | |||
| BLASLONG stride_y = inc_y * 2 * sizeof(FLOAT); | |||
| if(gvl <= n/2){ | |||
| BLASLONG inc_xv = inc_x * gvl * 2; | |||
| BLASLONG inc_yv = inc_y * gvl * 2; | |||
| for(i=0,j=0; i < n/(2*gvl); i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vx1, gvl); | |||
| vx2 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl); | |||
| vx3 = VLSEV_FLOAT(&x[ix+1+inc_xv], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[iy+inc_yv], stride_y, vx2, gvl); | |||
| VSSEV_FLOAT(&y[iy+1+inc_yv], stride_y, vx3, gvl); | |||
| j += gvl * 2; | |||
| ix += inc_xv * 2; | |||
| iy += inc_yv * 2; | |||
| } | |||
| } | |||
| for(;j<n;){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| VSSEV_FLOAT(&y[iy], stride_y, vx0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vx1, gvl); | |||
| j += gvl; | |||
| ix += inc_x * 2 * gvl; | |||
| iy += inc_y * 2 * gvl; | |||
| } | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,80 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/14 Saar | |||
| * BLASTEST float : FAIL | |||
| * BLASTEST double : FAIL | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| FLOAT dot[2]; | |||
| OPENBLAS_COMPLEX_FLOAT result; | |||
| BLASLONG inc_x2; | |||
| BLASLONG inc_y2; | |||
| dot[0]=0.0; | |||
| dot[1]=0.0; | |||
| CREAL(result) = 0.0 ; | |||
| CIMAG(result) = 0.0 ; | |||
| if ( n < 1 ) return(result); | |||
| inc_x2 = 2 * inc_x ; | |||
| inc_y2 = 2 * inc_y ; | |||
| while(i < n) | |||
| { | |||
| #if !defined(CONJ) | |||
| dot[0] += ( x[ix] * y[iy] - x[ix+1] * y[iy+1] ) ; | |||
| dot[1] += ( x[ix+1] * y[iy] + x[ix] * y[iy+1] ) ; | |||
| #else | |||
| dot[0] += ( x[ix] * y[iy] + x[ix+1] * y[iy+1] ) ; | |||
| dot[1] -= ( x[ix+1] * y[iy] - x[ix] * y[iy+1] ) ; | |||
| #endif | |||
| ix += inc_x2 ; | |||
| iy += inc_y2 ; | |||
| i++ ; | |||
| } | |||
| CREAL(result) = dot[0]; | |||
| CIMAG(result) = dot[1]; | |||
| return(result); | |||
| } | |||
| @@ -0,0 +1,135 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float32xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float32xm4 | |||
| #define VFMSACVV_FLOAT vfmsacvv_float32xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float64xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float64xm4 | |||
| #define VFMSACVV_FLOAT vfmsacvv_float64xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float64xm4 | |||
| #endif | |||
| OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| BLASLONG ix=0,iy=0; | |||
| FLOAT dot[2]; | |||
| OPENBLAS_COMPLEX_FLOAT result; | |||
| dot[0]=0.0; | |||
| dot[1]=0.0; | |||
| CREAL(result) = 0.0; | |||
| CIMAG(result) = 0.0; | |||
| if ( n < 1 ) return(result); | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T vr0, vr1, vx0, vx1, vy0, vy1; | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr0 = VFMVVF_FLOAT(0, gvl); | |||
| vr1 = VFMVVF_FLOAT(0, gvl); | |||
| BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT); | |||
| BLASLONG stride_y = inc_y * 2 * sizeof(FLOAT); | |||
| BLASLONG inc_xv = inc_x * 2 * gvl; | |||
| BLASLONG inc_yv = inc_y * 2 * gvl; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, gvl); | |||
| #if !defined(CONJ) | |||
| vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, gvl); | |||
| #else | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, gvl); | |||
| vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, gvl); | |||
| #endif | |||
| j += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| } | |||
| vx0 = VFMVVF_FLOAT(0, gvl); | |||
| vr0 = VFREDSUM_FLOAT(vr0, vx0, gvl); | |||
| dot[0] += vr0[0]; | |||
| vr1 = VFREDSUM_FLOAT(vr1, vx0, gvl); | |||
| dot[1] += vr1[0]; | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| #if !defined(CONJ) | |||
| vr0 = VFMULVV_FLOAT(vx1, vy1, gvl); | |||
| vr0 = VFMSACVV_FLOAT(vr0, vx0, vy0, gvl); | |||
| vr1 = VFMULVV_FLOAT(vx0, vy1, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, gvl); | |||
| #else | |||
| vr0 = VFMULVV_FLOAT(vx0, vy0, gvl); | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, gvl); | |||
| vr1 = VFMULVV_FLOAT(vx1, vy0, gvl); | |||
| vr1 = VFMSACVV_FLOAT(vr1, vx0, vy1, gvl); | |||
| #endif | |||
| vx0 = VFMVVF_FLOAT(0, gvl); | |||
| vr0 = VFREDSUM_FLOAT(vr0, vx0, gvl); | |||
| dot[0] += vr0[0]; | |||
| vr1 = VFREDSUM_FLOAT(vr1, vx0, gvl); | |||
| dot[1] += vr1[0]; | |||
| } | |||
| CREAL(result) = dot[0]; | |||
| CIMAG(result) = dot[1]; | |||
| return(result); | |||
| } | |||
| @@ -0,0 +1,157 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * * 2013/11/23 Saar | |||
| * * BLASTEST float : OK | |||
| * * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * * | |||
| * **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix,iy; | |||
| BLASLONG j; | |||
| FLOAT *a_ptr; | |||
| FLOAT temp_r,temp_i; | |||
| BLASLONG inc_x2,inc_y2; | |||
| BLASLONG lda2; | |||
| BLASLONG i2; | |||
| lda2 = 2*lda; | |||
| ix = 0; | |||
| a_ptr = a; | |||
| if ( inc_x == 1 && inc_y == 1 ) | |||
| { | |||
| for (j=0; j<n; j++) | |||
| { | |||
| #if !defined(XCONJ) | |||
| temp_r = alpha_r * x[ix] - alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] + alpha_i * x[ix]; | |||
| #else | |||
| temp_r = alpha_r * x[ix] + alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] - alpha_i * x[ix]; | |||
| #endif | |||
| iy = 0; | |||
| i2=0; | |||
| for (i=0; i<m; i++) | |||
| { | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1]; | |||
| y[iy+1] += temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2]; | |||
| #else | |||
| y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1]; | |||
| y[iy+1] += temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2]; | |||
| #endif | |||
| #else | |||
| #if !defined(XCONJ) | |||
| y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1]; | |||
| y[iy+1] -= temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2]; | |||
| #else | |||
| y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1]; | |||
| y[iy+1] -= temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2]; | |||
| #endif | |||
| #endif | |||
| i2 += 2; | |||
| iy += 2; | |||
| } | |||
| a_ptr += lda2; | |||
| ix += 2; | |||
| } | |||
| return(0); | |||
| } | |||
| inc_x2 = 2 * inc_x; | |||
| inc_y2 = 2 * inc_y; | |||
| for (j=0; j<n; j++) | |||
| { | |||
| #if !defined(XCONJ) | |||
| temp_r = alpha_r * x[ix] - alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] + alpha_i * x[ix]; | |||
| #else | |||
| temp_r = alpha_r * x[ix] + alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] - alpha_i * x[ix]; | |||
| #endif | |||
| iy = 0; | |||
| i2=0; | |||
| for (i=0; i<m; i++) | |||
| { | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1]; | |||
| y[iy+1] += temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2]; | |||
| #else | |||
| y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1]; | |||
| y[iy+1] += temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2]; | |||
| #endif | |||
| #else | |||
| #if !defined(XCONJ) | |||
| y[iy] += temp_r * a_ptr[i2] + temp_i * a_ptr[i2+1]; | |||
| y[iy+1] -= temp_r * a_ptr[i2+1] - temp_i * a_ptr[i2]; | |||
| #else | |||
| y[iy] += temp_r * a_ptr[i2] - temp_i * a_ptr[i2+1]; | |||
| y[iy+1] -= temp_r * a_ptr[i2+1] + temp_i * a_ptr[i2]; | |||
| #endif | |||
| #endif | |||
| i2 += 2; | |||
| iy += inc_y2; | |||
| } | |||
| a_ptr += lda2; | |||
| ix += inc_x2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,175 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSEV_FLOAT vsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSEV_FLOAT vsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i = 0, j = 0, k = 0; | |||
| BLASLONG ix = 0, iy = 0; | |||
| FLOAT *a_ptr = a; | |||
| FLOAT temp_r = 0.0, temp_i = 0.0; | |||
| FLOAT_V_T va0, va1, vy0, vy1; | |||
| unsigned int gvl = 0; | |||
| BLASLONG stride_a = sizeof(FLOAT) * 2; | |||
| BLASLONG stride_y = inc_y * sizeof(FLOAT) * 2; | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| BLASLONG inc_yv = inc_y * gvl * 2; | |||
| BLASLONG inc_x2 = inc_x * 2; | |||
| BLASLONG lda2 = lda * 2; | |||
| for(k=0,j=0; k<m/gvl; k++){ | |||
| a_ptr = a; | |||
| ix = 0; | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| for(i = 0; i < n; i++){ | |||
| #if !defined(XCONJ) | |||
| temp_r = alpha_r * x[ix] - alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] + alpha_i * x[ix]; | |||
| #else | |||
| temp_r = alpha_r * x[ix] + alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] - alpha_i * x[ix]; | |||
| #endif | |||
| va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl); | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #endif | |||
| #else | |||
| #if !defined(XCONJ) | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #endif | |||
| #endif | |||
| a_ptr += lda2; | |||
| ix += inc_x2; | |||
| } | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| j += gvl * 2; | |||
| iy += inc_yv; | |||
| } | |||
| //tail | |||
| if(j/2 < m){ | |||
| gvl = vsetvli(m-j/2, RVV_EFLOAT, RVV_M); | |||
| a_ptr = a; | |||
| ix = 0; | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| for(i = 0; i < n; i++){ | |||
| #if !defined(XCONJ) | |||
| temp_r = alpha_r * x[ix] - alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] + alpha_i * x[ix]; | |||
| #else | |||
| temp_r = alpha_r * x[ix] + alpha_i * x[ix+1]; | |||
| temp_i = alpha_r * x[ix+1] - alpha_i * x[ix]; | |||
| #endif | |||
| va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl); | |||
| #if !defined(CONJ) | |||
| #if !defined(XCONJ) | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #endif | |||
| #else | |||
| #if !defined(XCONJ) | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_i, va0, gvl); | |||
| #endif | |||
| #endif | |||
| a_ptr += lda2; | |||
| ix += inc_x2; | |||
| } | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,140 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * * 2013/11/23 Saar | |||
| * * BLASTEST float : OK | |||
| * * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * * | |||
| * **************************************************************************************/ | |||
| #include "common.h" | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix,iy; | |||
| BLASLONG j; | |||
| FLOAT *a_ptr; | |||
| FLOAT temp_r,temp_i; | |||
| BLASLONG inc_x2,inc_y2; | |||
| BLASLONG lda2; | |||
| BLASLONG i2; | |||
| lda2 = 2*lda; | |||
| iy = 0; | |||
| a_ptr = a; | |||
| if ( inc_x == 1 && inc_y == 1 ) | |||
| { | |||
| for (j=0; j<n; j++) | |||
| { | |||
| temp_r = 0.0; | |||
| temp_i = 0.0; | |||
| ix = 0; | |||
| i2=0; | |||
| for (i=0; i<m; i++) | |||
| { | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1]; | |||
| temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix]; | |||
| #else | |||
| temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1]; | |||
| temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix]; | |||
| #endif | |||
| i2 += 2; | |||
| ix += 2; | |||
| } | |||
| #if !defined(XCONJ) | |||
| y[iy] += alpha_r * temp_r - alpha_i * temp_i; | |||
| y[iy+1] += alpha_r * temp_i + alpha_i * temp_r; | |||
| #else | |||
| y[iy] += alpha_r * temp_r + alpha_i * temp_i; | |||
| y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r; | |||
| #endif | |||
| a_ptr += lda2; | |||
| iy += 2; | |||
| } | |||
| return(0); | |||
| } | |||
| inc_x2 = 2 * inc_x; | |||
| inc_y2 = 2 * inc_y; | |||
| for (j=0; j<n; j++) | |||
| { | |||
| temp_r = 0.0; | |||
| temp_i = 0.0; | |||
| ix = 0; | |||
| i2=0; | |||
| for (i=0; i<m; i++) | |||
| { | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| temp_r += a_ptr[i2] * x[ix] - a_ptr[i2+1] * x[ix+1]; | |||
| temp_i += a_ptr[i2] * x[ix+1] + a_ptr[i2+1] * x[ix]; | |||
| #else | |||
| temp_r += a_ptr[i2] * x[ix] + a_ptr[i2+1] * x[ix+1]; | |||
| temp_i += a_ptr[i2] * x[ix+1] - a_ptr[i2+1] * x[ix]; | |||
| #endif | |||
| i2 += 2; | |||
| ix += inc_x2; | |||
| } | |||
| #if !defined(XCONJ) | |||
| y[iy] += alpha_r * temp_r - alpha_i * temp_i; | |||
| y[iy+1] += alpha_r * temp_i + alpha_i * temp_r; | |||
| #else | |||
| y[iy] += alpha_r * temp_r + alpha_i * temp_i; | |||
| y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r; | |||
| #endif | |||
| a_ptr += lda2; | |||
| iy += inc_y2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,134 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) | |||
| { | |||
| BLASLONG i = 0, j = 0, k = 0; | |||
| BLASLONG ix = 0, iy = 0; | |||
| FLOAT *a_ptr = a; | |||
| FLOAT temp_r, temp_i; | |||
| FLOAT_V_T va0, va1, vx0, vx1, vr, vi; | |||
| unsigned int gvl = 0; | |||
| BLASLONG stride_x = inc_x * sizeof(FLOAT) * 2; | |||
| BLASLONG stride_a = sizeof(FLOAT) * 2; | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| BLASLONG inc_xv = inc_x * gvl * 2; | |||
| BLASLONG inc_av = gvl * 2; | |||
| BLASLONG inc_y2 = inc_y * 2; | |||
| BLASLONG lda2 = lda * 2; | |||
| for(i = 0; i < n; i++){ | |||
| gvl = vsetvli(m, RVV_EFLOAT, RVV_M); | |||
| j = 0; | |||
| ix = 0; | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| vi = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < m/gvl; k++){ | |||
| va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| vr = VFMACCVV_FLOAT(vr, va0, vx0, gvl); | |||
| vr = VFNMSACVV_FLOAT(vr, va1, vx1, gvl); | |||
| vi = VFMACCVV_FLOAT(vi, va0, vx1, gvl); | |||
| vi = VFMACCVV_FLOAT(vi, va1, vx0, gvl); | |||
| #else | |||
| vr = VFMACCVV_FLOAT(vr, va0, vx0, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl); | |||
| vi = VFMACCVV_FLOAT(vi, va0, vx1, gvl); | |||
| vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); | |||
| #endif | |||
| j += inc_av; | |||
| ix += inc_xv; | |||
| } | |||
| va0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDSUM_FLOAT(vr, va0, gvl); | |||
| temp_r = vx0[0]; | |||
| vx1 = VFREDSUM_FLOAT(vi, va0, gvl); | |||
| temp_i = vx1[0]; | |||
| if(j/2 < m){ | |||
| gvl = vsetvli(m-j/2, RVV_EFLOAT, RVV_M); | |||
| va0 = VLSEV_FLOAT(&a_ptr[j], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[j+1], stride_a, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) | |||
| vr = VFMULVV_FLOAT(va0, vx0, gvl); | |||
| vr = VFNMSACVV_FLOAT(vr, va1, vx1, gvl); | |||
| vi = VFMULVV_FLOAT(va0, vx1, gvl); | |||
| vi = VFMACCVV_FLOAT(vi, va1, vx0, gvl); | |||
| #else | |||
| vr = VFMULVV_FLOAT(va0, vx0, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl); | |||
| vi = VFMULVV_FLOAT(va0, vx1, gvl); | |||
| vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); | |||
| #endif | |||
| va0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDSUM_FLOAT(vr, va0, gvl); | |||
| temp_r += vx0[0]; | |||
| vx1 = VFREDSUM_FLOAT(vi, va0, gvl); | |||
| temp_i += vx1[0]; | |||
| } | |||
| #if !defined(XCONJ) | |||
| y[iy] += alpha_r * temp_r - alpha_i * temp_i; | |||
| y[iy+1] += alpha_r * temp_i + alpha_i * temp_r; | |||
| #else | |||
| y[iy] += alpha_r * temp_r + alpha_i * temp_i; | |||
| y[iy+1] -= alpha_r * temp_i - alpha_i * temp_r; | |||
| #endif | |||
| iy += inc_y2; | |||
| a_ptr += lda2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,191 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float32xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float32xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float64xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float64xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ | |||
| BLASLONG i, j, k; | |||
| BLASLONG ix, iy, ia; | |||
| BLASLONG jx, jy, ja; | |||
| FLOAT temp_r1, temp_i1; | |||
| FLOAT temp_r2, temp_i2; | |||
| FLOAT *a_ptr = a; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T va0, va1, vx0, vx1, vy0, vy1, vr0, vr1; | |||
| BLASLONG stride_x, stride_y, stride_a, inc_xv, inc_yv, inc_av, len, lda2; | |||
| BLASLONG inc_x2 = incx * 2; | |||
| BLASLONG inc_y2 = incy * 2; | |||
| stride_x = inc_x2 * sizeof(FLOAT); | |||
| stride_y = inc_y2 * sizeof(FLOAT); | |||
| stride_a = 2 * sizeof(FLOAT); | |||
| lda2 = lda * 2; | |||
| jx = 0; | |||
| jy = 0; | |||
| ja = 0; | |||
| for(j = 0; j < offset; j++){ | |||
| temp_r1 = alpha_r * x[jx] - alpha_i * x[jx+1];; | |||
| temp_i1 = alpha_r * x[jx+1] + alpha_i * x[jx]; | |||
| temp_r2 = 0; | |||
| temp_i2 = 0; | |||
| y[jy] += temp_r1 * a_ptr[ja]; | |||
| y[jy+1] += temp_i1 * a_ptr[ja]; | |||
| ix = jx + inc_x2; | |||
| iy = jy + inc_y2; | |||
| ia = ja + 2; | |||
| i = j + 1; | |||
| len = m - i; | |||
| if(len > 0){ | |||
| gvl = vsetvli(len, RVV_EFLOAT, RVV_M); | |||
| inc_xv = incx * gvl * 2; | |||
| inc_yv = incy * gvl * 2; | |||
| inc_av = gvl * 2; | |||
| vr0 = VFMVVF_FLOAT(0, gvl); | |||
| vr1 = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < len / gvl; k++){ | |||
| va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| #ifndef HEMVREV | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #endif | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| #ifndef HEMVREV | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); | |||
| vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #else | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); | |||
| vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #endif | |||
| i += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| ia += inc_av; | |||
| } | |||
| va0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); | |||
| temp_r2 = vx0[0]; | |||
| vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); | |||
| temp_i2 = vx1[0]; | |||
| if(i < m){ | |||
| gvl = vsetvli(m-i, RVV_EFLOAT, RVV_M); | |||
| va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| #ifndef HEMVREV | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #endif | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| #ifndef HEMVREV | |||
| vr0 = VFMULVV_FLOAT(vx0, va0, gvl); | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMULVV_FLOAT(vx1, va0, gvl); | |||
| vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #else | |||
| vr0 = VFMULVV_FLOAT(vx0, va0, gvl); | |||
| vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMULVV_FLOAT(vx1, va0, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #endif | |||
| va0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); | |||
| temp_r2 += vx0[0]; | |||
| vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); | |||
| temp_i2 += vx1[0]; | |||
| } | |||
| } | |||
| y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2; | |||
| y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2; | |||
| jx += inc_x2; | |||
| jy += inc_y2; | |||
| ja += 2; | |||
| a_ptr += lda2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,192 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VSSEV_FLOAT vssev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float32xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float32xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VSSEV_FLOAT vssev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMACCVF_FLOAT vfmaccvf_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFMULVV_FLOAT vfmulvv_float64xm4 | |||
| #define VFNMSACVF_FLOAT vfnmsacvf_float64xm4 | |||
| #define VFNMSACVV_FLOAT vfnmsacvv_float64xm4 | |||
| #endif | |||
| int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ | |||
| BLASLONG i, j, k; | |||
| BLASLONG ix, iy, ia; | |||
| BLASLONG jx, jy, ja; | |||
| FLOAT temp_r1, temp_i1; | |||
| FLOAT temp_r2, temp_i2; | |||
| FLOAT *a_ptr = a; | |||
| unsigned int gvl = 0; | |||
| FLOAT_V_T va0, va1, vx0, vx1, vy0, vy1, vr0, vr1; | |||
| BLASLONG stride_x, stride_y, stride_a, inc_xv, inc_yv, inc_av, lda2; | |||
| BLASLONG inc_x2 = incx * 2; | |||
| BLASLONG inc_y2 = incy * 2; | |||
| stride_x = inc_x2 * sizeof(FLOAT); | |||
| stride_y = inc_y2 * sizeof(FLOAT); | |||
| stride_a = 2 * sizeof(FLOAT); | |||
| lda2 = lda * 2; | |||
| BLASLONG m1 = m - offset; | |||
| a_ptr = a + m1 * lda2; | |||
| jx = m1 * inc_x2; | |||
| jy = m1 * inc_y2; | |||
| ja = m1 * 2; | |||
| for(j = m1; j < m; j++){ | |||
| temp_r1 = alpha_r * x[jx] - alpha_i * x[jx+1];; | |||
| temp_i1 = alpha_r * x[jx+1] + alpha_i * x[jx]; | |||
| temp_r2 = 0; | |||
| temp_i2 = 0; | |||
| ix = 0; | |||
| iy = 0; | |||
| ia = 0; | |||
| i = 0; | |||
| if(j > 0){ | |||
| gvl = vsetvli(j, RVV_EFLOAT, RVV_M); | |||
| inc_xv = incx * gvl * 2; | |||
| inc_yv = incy * gvl * 2; | |||
| inc_av = gvl * 2; | |||
| vr0 = VFMVVF_FLOAT(0, gvl); | |||
| vr1 = VFMVVF_FLOAT(0, gvl); | |||
| for(k = 0; k < j / gvl; k++){ | |||
| va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| #ifndef HEMVREV | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #endif | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| #ifndef HEMVREV | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); | |||
| vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #else | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx0, va0, gvl); | |||
| vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx1, va0, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #endif | |||
| i += gvl; | |||
| ix += inc_xv; | |||
| iy += inc_yv; | |||
| ia += inc_av; | |||
| } | |||
| va0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); | |||
| temp_r2 = vx0[0]; | |||
| vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); | |||
| temp_i2 = vx1[0]; | |||
| if(i < j){ | |||
| gvl = vsetvli(j-i, RVV_EFLOAT, RVV_M); | |||
| va0 = VLSEV_FLOAT(&a_ptr[ia], stride_a, gvl); | |||
| va1 = VLSEV_FLOAT(&a_ptr[ia+1], stride_a, gvl); | |||
| vy0 = VLSEV_FLOAT(&y[iy], stride_y, gvl); | |||
| vy1 = VLSEV_FLOAT(&y[iy+1], stride_y, gvl); | |||
| #ifndef HEMVREV | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFNMSACVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #else | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_r1, va0, gvl); | |||
| vy0 = VFMACCVF_FLOAT(vy0, temp_i1, va1, gvl); | |||
| vy1 = VFNMSACVF_FLOAT(vy1, temp_r1, va1, gvl); | |||
| vy1 = VFMACCVF_FLOAT(vy1, temp_i1, va0, gvl); | |||
| #endif | |||
| VSSEV_FLOAT(&y[iy], stride_y, vy0, gvl); | |||
| VSSEV_FLOAT(&y[iy+1], stride_y, vy1, gvl); | |||
| vx0 = VLSEV_FLOAT(&x[ix], stride_x, gvl); | |||
| vx1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl); | |||
| #ifndef HEMVREV | |||
| vr0 = VFMULVV_FLOAT(vx0, va0, gvl); | |||
| vr0 = VFMACCVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMULVV_FLOAT(vx1, va0, gvl); | |||
| vr1 = VFNMSACVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #else | |||
| vr0 = VFMULVV_FLOAT(vx0, va0, gvl); | |||
| vr0 = VFNMSACVV_FLOAT(vr0, vx1, va1, gvl); | |||
| vr1 = VFMULVV_FLOAT(vx1, va0, gvl); | |||
| vr1 = VFMACCVV_FLOAT(vr1, vx0, va1, gvl); | |||
| #endif | |||
| va0 = VFMVVF_FLOAT(0, gvl); | |||
| vx0 = VFREDSUM_FLOAT(vr0, va0, gvl); | |||
| temp_r2 += vx0[0]; | |||
| vx1 = VFREDSUM_FLOAT(vr1, va0, gvl); | |||
| temp_i2 += vx1[0]; | |||
| } | |||
| } | |||
| y[jy] += temp_r1 * a_ptr[ja]; | |||
| y[jy+1] += temp_i1 * a_ptr[ja]; | |||
| y[jy] += alpha_r * temp_r2 - alpha_i * temp_i2; | |||
| y[jy+1] += alpha_r * temp_i2 + alpha_i * temp_r2; | |||
| jx += inc_x2; | |||
| jy += inc_y2; | |||
| ja += 2; | |||
| a_ptr += lda2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,106 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2013/09/13 Saar | |||
| * BLASTEST float : OK | |||
| * BLASTEST double : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <math.h> | |||
| #if defined(DOUBLE) | |||
| #define ABS fabs | |||
| #else | |||
| #define ABS fabsf | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0; | |||
| FLOAT scale = 0.0; | |||
| FLOAT ssq = 1.0; | |||
| BLASLONG inc_x2; | |||
| FLOAT temp; | |||
| if (n <= 0 || inc_x <= 0) return(0.0); | |||
| inc_x2 = 2 * inc_x; | |||
| n *= inc_x2; | |||
| while(i < n) | |||
| { | |||
| if ( x[i] != 0.0 ) | |||
| { | |||
| temp = ABS( x[i] ); | |||
| if ( scale < temp ) | |||
| { | |||
| ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); | |||
| scale = temp ; | |||
| } | |||
| else | |||
| { | |||
| ssq += ( temp / scale ) * ( temp / scale ); | |||
| } | |||
| } | |||
| if ( x[i+1] != 0.0 ) | |||
| { | |||
| temp = ABS( x[i+1] ); | |||
| if ( scale < temp ) | |||
| { | |||
| ssq = 1 + ssq * ( scale / temp ) * ( scale / temp ); | |||
| scale = temp ; | |||
| } | |||
| else | |||
| { | |||
| ssq += ( temp / scale ) * ( temp / scale ); | |||
| } | |||
| } | |||
| i += inc_x2; | |||
| } | |||
| scale = scale * sqrt( ssq ); | |||
| return(scale); | |||
| } | |||
| @@ -0,0 +1,278 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2020, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| #if !defined(DOUBLE) | |||
| #define RVV_EFLOAT RVV_E32 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float32xm4_t | |||
| #define VLEV_FLOAT vlev_float32xm4 | |||
| #define VLSEV_FLOAT vlsev_float32xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float32xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float32xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float32xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float32xm4 | |||
| #define ABS fabsf | |||
| #define MASK_T e32xm4_t | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float32xm4 | |||
| #define VMFGTVF_FLOAT vmfgtvf_e32xm4_float32xm4 | |||
| #define VMFIRSTM vmfirstm_e32xm4 | |||
| #define VFDIVVF_FLOAT vfdivvf_float32xm4 | |||
| #define VMFLTVF_FLOAT vmfltvf_e32xm4_float32xm4 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float32xm4 | |||
| #else | |||
| #define RVV_EFLOAT RVV_E64 | |||
| #define RVV_M RVV_M4 | |||
| #define FLOAT_V_T float64xm4_t | |||
| #define VLEV_FLOAT vlev_float64xm4 | |||
| #define VLSEV_FLOAT vlsev_float64xm4 | |||
| #define VFREDSUM_FLOAT vfredsumvs_float64xm4 | |||
| #define VFMACCVV_FLOAT vfmaccvv_float64xm4 | |||
| #define VFMVVF_FLOAT vfmvvf_float64xm4 | |||
| #define VFDOTVV_FLOAT vfdotvv_float64xm4 | |||
| #define ABS fabs | |||
| #define MASK_T e64xm4_t | |||
| #define VFRSUBVF_MASK_FLOAT vfrsubvf_mask_float64xm4 | |||
| #define VMFGTVF_FLOAT vmfgtvf_e64xm4_float64xm4 | |||
| #define VMFIRSTM vmfirstm_e64xm4 | |||
| #define VFDIVVF_FLOAT vfdivvf_float64xm4 | |||
| #define VMFLTVF_FLOAT vmfltvf_e64xm4_float64xm4 | |||
| #define VFREDMAXVS_FLOAT vfredmaxvs_float64xm4 | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG i=0, j=0; | |||
| if ( n < 0 ) return(0.0); | |||
| // if(n == 1) return (ABS(x[0])); | |||
| FLOAT_V_T vr, v0, v_zero; | |||
| unsigned int gvl = 0; | |||
| FLOAT scale = 0.0, ssq = 0.0; | |||
| MASK_T mask; | |||
| BLASLONG index = 0; | |||
| if(inc_x == 1){ | |||
| BLASLONG n2 = n * 2; | |||
| gvl = vsetvli(n2, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| for(i=0,j=0; i<n2/gvl; i++){ | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0){ | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| } | |||
| }else{//found greater element | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq before current vector | |||
| ssq += vr[0]; | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| //ssq in vector vr | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| j += gvl; | |||
| } | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| //tail | |||
| if(j < n2){ | |||
| gvl = vsetvli(n2-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLEV_FLOAT(&x[j], gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0) | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| }else{//found greater element | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| } | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| } | |||
| }else{ | |||
| gvl = vsetvli(n, RVV_EFLOAT, RVV_M); | |||
| vr = VFMVVF_FLOAT(0, gvl); | |||
| v_zero = VFMVVF_FLOAT(0, gvl); | |||
| unsigned int stride_x = inc_x * sizeof(FLOAT) * 2; | |||
| int idx = 0, inc_v = inc_x * gvl * 2; | |||
| for(i=0,j=0; i<n/gvl; i++){ | |||
| v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0){ | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| } | |||
| }else{//found greater element | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq before current vector | |||
| ssq += vr[0]; | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| //ssq in vector vr | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| v0 = VLSEV_FLOAT(&x[idx+1], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0){ | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| } | |||
| }else{//found greater element | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq before current vector | |||
| ssq += vr[0]; | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| //ssq in vector vr | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| j += gvl; | |||
| idx += inc_v; | |||
| } | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| //tail | |||
| if(j < n){ | |||
| gvl = vsetvli(n-j, RVV_EFLOAT, RVV_M); | |||
| v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0){ | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| }else{//found greater element | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| v0 = VLSEV_FLOAT(&x[idx+1], stride_x, gvl); | |||
| //fabs(vector) | |||
| mask = VMFLTVF_FLOAT(v0, 0, gvl); | |||
| v0 = VFRSUBVF_MASK_FLOAT(v0, v0, 0, mask, gvl); | |||
| //if scale change | |||
| mask = VMFGTVF_FLOAT(v0, scale, gvl); | |||
| index = VMFIRSTM(mask, gvl); | |||
| if(index == -1){//no elements greater than scale | |||
| if(scale != 0.0){ | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(vr, v0, v0, gvl); | |||
| } | |||
| }else{//found greater element | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq before current vector | |||
| ssq += vr[0]; | |||
| //find max | |||
| vr = VFREDMAXVS_FLOAT(v0, v_zero, gvl); | |||
| //update ssq before max_index | |||
| ssq = ssq * (scale/vr[0])*(scale/vr[0]); | |||
| //update scale | |||
| scale = vr[0]; | |||
| v0 = VFDIVVF_FLOAT(v0, scale, gvl); | |||
| vr = VFMACCVV_FLOAT(v_zero, v0, v0, gvl); | |||
| } | |||
| //ssq in vector vr: vr[0] | |||
| vr = VFREDSUM_FLOAT(vr, v_zero, gvl); | |||
| //total ssq now | |||
| ssq += vr[0]; | |||
| } | |||
| } | |||
| return(scale * sqrt(ssq)); | |||
| } | |||
| @@ -0,0 +1,70 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order ColMajor | |||
| * No Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| ia = 0; | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[ia] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1]; | |||
| bptr[ia+1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia+=2; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,69 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order ColMajor | |||
| * No Trans, conjugate | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| ia = 0; | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[ia] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1]; | |||
| bptr[ia+1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia += 2; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,71 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order ColMajor | |||
| * Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia,ib; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| ib = 0; | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| bptr = &b[ib]; | |||
| ia = 0; | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[0] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1]; | |||
| bptr[1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia += 2; | |||
| bptr += ldb; | |||
| } | |||
| aptr += lda; | |||
| ib += 2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,71 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order ColMajor | |||
| * Trans, conjugate | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia,ib; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| ib = 0; | |||
| for ( i=0; i<cols ; i++ ) | |||
| { | |||
| bptr = &b[ib]; | |||
| ia = 0; | |||
| for(j=0; j<rows; j++) | |||
| { | |||
| bptr[0] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1]; | |||
| bptr[1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia += 2; | |||
| bptr += ldb; | |||
| } | |||
| aptr += lda; | |||
| ib += 2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,70 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order rowMajor | |||
| * No Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| lda *=2; | |||
| ldb *=2; | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| ia = 0; | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[ia] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1]; | |||
| bptr[ia+1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia += 2; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,69 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order rowMajor | |||
| * No Trans , conjugate | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| bptr = b; | |||
| lda *=2; | |||
| ldb *=2; | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| ia = 0; | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[ia] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1]; | |||
| bptr[ia+1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia += 2; | |||
| } | |||
| aptr += lda; | |||
| bptr += ldb; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,72 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order rowMajor | |||
| * Trans | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia,ib; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| ib = 0; | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| bptr = &b[ib]; | |||
| ia = 0; | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[0] = alpha_r * aptr[ia] - alpha_i * aptr[ia+1]; | |||
| bptr[1] = alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia += 2; | |||
| bptr += ldb; | |||
| } | |||
| aptr += lda; | |||
| ib += 2; | |||
| } | |||
| return(0); | |||
| } | |||
| @@ -0,0 +1,72 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "common.h" | |||
| /***************************************************** | |||
| * 2014/06/09 Saar | |||
| * | |||
| * Order rowMajor | |||
| * Trans, conjugate | |||
| * | |||
| ******************************************************/ | |||
| int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) | |||
| { | |||
| BLASLONG i,j,ia,ib; | |||
| FLOAT *aptr,*bptr; | |||
| if ( rows <= 0 ) return(0); | |||
| if ( cols <= 0 ) return(0); | |||
| aptr = a; | |||
| lda *= 2; | |||
| ldb *= 2; | |||
| ib = 0; | |||
| for ( i=0; i<rows ; i++ ) | |||
| { | |||
| bptr = &b[ib]; | |||
| ia = 0; | |||
| for(j=0; j<cols; j++) | |||
| { | |||
| bptr[0] = alpha_r * aptr[ia] + alpha_i * aptr[ia+1]; | |||
| bptr[1] = - alpha_r * aptr[ia+1] + alpha_i * aptr[ia]; | |||
| ia += 2; | |||
| bptr += ldb; | |||
| } | |||
| aptr += lda; | |||
| ib += 2; | |||
| } | |||
| return(0); | |||
| } | |||