| @@ -218,4 +218,7 @@ In chronological order: | |||
| * [2022-08] Fix building from sources for QNX | |||
| * Mark Seminatore <https://github.com/mseminatore> | |||
| * [2023-11-09] Improve Windows threading performance scaling | |||
| * [2023-11-09] Improve Windows threading performance scaling | |||
| * Dirreke <https://github.com/mseminatore> | |||
| * [2024-01-16] Add basic support for the CSKY architecture | |||
| @@ -0,0 +1,4 @@ | |||
| ifeq ($(CORE), CK860FV) | |||
| CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float | |||
| FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static | |||
| endif | |||
| @@ -55,6 +55,10 @@ ifeq ($(TARGET), C910V) | |||
| TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d | |||
| endif | |||
| ifeq ($(TARGET), CK860FV) | |||
| TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float | |||
| endif | |||
| all: getarch_2nd | |||
| ./getarch_2nd 0 >> $(TARGET_MAKE) | |||
| ./getarch_2nd 1 >> $(TARGET_CONF) | |||
| @@ -873,6 +873,11 @@ endif | |||
| endif | |||
| endif | |||
| ifeq ($(ARCH), csky) | |||
| NO_BINARY_MODE = 1 | |||
| BINARY_DEFINED = 1 | |||
| endif | |||
| # | |||
| # C Compiler dependent settings | |||
| # | |||
| @@ -133,3 +133,7 @@ E2K | |||
| EV4 | |||
| EV5 | |||
| EV6 | |||
| 14.CSKY | |||
| CSKY | |||
| CK860FV | |||
| @@ -91,6 +91,7 @@ case "$data" in | |||
| *ARCH_ZARCH*) architecture=zarch ;; | |||
| *ARCH_RISCV64*) architecture=riscv64 ;; | |||
| *ARCH_LOONGARCH64*) architecture=loongarch64 ;; | |||
| *ARCH_CSKY*) architecture=csky ;; | |||
| esac | |||
| defined=0 | |||
| @@ -236,6 +237,7 @@ case "$data" in | |||
| *ARCH_ARM*) architecture=arm ;; | |||
| *ARCH_ZARCH*) architecture=zarch ;; | |||
| *ARCH_LOONGARCH64*) architecture=loongarch64 ;; | |||
| *ARCH_CSKY*) architecture=csky ;; | |||
| esac | |||
| binformat='bin32' | |||
| @@ -97,6 +97,7 @@ $architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
| $architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
| $architecture = riscv64 if ($data =~ /ARCH_RISCV64/); | |||
| $architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
| $architecture = csky if ($data =~ /ARCH_CSKY/); | |||
| $defined = 0; | |||
| @@ -156,6 +157,11 @@ if ($architecture eq "loongarch64") { | |||
| $binary = 64; | |||
| } | |||
| if ($architecture eq "csky") { | |||
| $defined = 1; | |||
| $binary = 32; | |||
| } | |||
| if ($compiler eq "PGI") { | |||
| $compiler_name .= " -tp p7" if ($binary eq "32"); | |||
| $compiler_name .= " -tp p7-64" if ($binary eq "64"); | |||
| @@ -284,6 +290,7 @@ $architecture = arm if ($data =~ /ARCH_ARM/); | |||
| $architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
| $architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
| $architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
| $architecture = csky if ($data =~ /ARCH_CSKY/); | |||
| $binformat = bin32; | |||
| $binformat = bin64 if ($data =~ /BINARY_64/); | |||
| @@ -482,6 +482,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||
| #include "common_e2k.h" | |||
| #endif | |||
| #ifdef ARCH_CSKY | |||
| #include "common_csky.h" | |||
| #endif | |||
| #ifndef ASSEMBLER | |||
| #ifdef OS_WINDOWSSTORE | |||
| typedef char env_var_t[MAX_PATH]; | |||
| @@ -0,0 +1,56 @@ | |||
| /***************************************************************************** | |||
| Copyright (c) 2011-2015, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written | |||
| permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #ifndef COMMON_CSKY | |||
| #define COMMON_CSKY | |||
| #define MB __sync_synchronize() | |||
| #define WMB __sync_synchronize() | |||
| #define RMB __sync_synchronize() | |||
| #define INLINE inline | |||
| #ifndef ASSEMBLER | |||
| static inline int blas_quickdivide(blasint x, blasint y){ | |||
| return x / y; | |||
| } | |||
| #endif | |||
| #define BUFFER_SIZE ( 32 << 20) | |||
| #define SEEK_ADDRESS | |||
| #endif | |||
| @@ -173,6 +173,10 @@ HAVE_C11 | |||
| ARCH_E2K | |||
| #endif | |||
| #if defined(__csky__) | |||
| ARCH_CSKY | |||
| #endif | |||
| #if defined(__EMSCRIPTEN__) | |||
| ARCH_RISCV64 | |||
| OS_WINDOWS | |||
| @@ -150,6 +150,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /* #define FORCE_EV4 */ | |||
| /* #define FORCE_EV5 */ | |||
| /* #define FORCE_EV6 */ | |||
| /* #define FORCE_CSKY */ | |||
| /* #define FORCE_CK860FV */ | |||
| /* #define FORCE_GENERIC */ | |||
| #ifdef FORCE_P2 | |||
| @@ -1692,6 +1694,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CORENAME "generic" | |||
| #endif | |||
| #ifdef FORCE_CSKY | |||
| #define FORCE | |||
| #define ARCHITECTURE "CSKY" | |||
| #define SUBARCHITECTURE "CSKY" | |||
| #define SUBDIRNAME "csky" | |||
| #define ARCHCONFIG "-DCSKY" \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||
| #define LIBNAME "csky" | |||
| #define CORENAME "CSKY" | |||
| #endif | |||
| #ifdef FORCE_CK860FV | |||
| #define FORCE | |||
| #define ARCHITECTURE "CSKY" | |||
| #define SUBARCHITECTURE "CK860V" | |||
| #define SUBDIRNAME "csky" | |||
| #define ARCHCONFIG "-DCK860FV " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||
| #define LIBNAME "ck860fv" | |||
| #define CORENAME "CK860FV" | |||
| #endif | |||
| #ifndef FORCE | |||
| #ifdef USER_TARGET | |||
| @@ -1766,7 +1795,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifndef OPENBLAS_SUPPORTED | |||
| #error "This arch/CPU is not supported by OpenBLAS." | |||
| #endif | |||
| @@ -1831,7 +1859,7 @@ int main(int argc, char *argv[]){ | |||
| #ifdef FORCE | |||
| printf("CORE=%s\n", CORENAME); | |||
| #else | |||
| #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) | |||
| #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) || defined(__csky__) | |||
| printf("CORE=%s\n", get_corename()); | |||
| #endif | |||
| #endif | |||
| @@ -1979,7 +2007,7 @@ printf("ELF_VERSION=2\n"); | |||
| #ifdef FORCE | |||
| printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | |||
| #else | |||
| #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) | |||
| #if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__csky__) | |||
| printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,149 @@ | |||
| SAMAXKERNEL = ../arm/amax.c | |||
| DAMAXKERNEL = ../arm/amax.c | |||
| CAMAXKERNEL = ../arm/zamax.c | |||
| ZAMAXKERNEL = ../arm/zamax.c | |||
| SAMINKERNEL = ../arm/amin.c | |||
| DAMINKERNEL = ../arm/amin.c | |||
| CAMINKERNEL = ../arm/zamin.c | |||
| ZAMINKERNEL = ../arm/zamin.c | |||
| SMAXKERNEL = ../arm/max.c | |||
| DMAXKERNEL = ../arm/max.c | |||
| SMINKERNEL = ../arm/min.c | |||
| DMINKERNEL = ../arm/min.c | |||
| ISAMAXKERNEL = ../arm/iamax.c | |||
| IDAMAXKERNEL = ../arm/iamax.c | |||
| ICAMAXKERNEL = ../arm/izamax.c | |||
| IZAMAXKERNEL = ../arm/izamax.c | |||
| ISAMINKERNEL = ../arm/iamin.c | |||
| IDAMINKERNEL = ../arm/iamin.c | |||
| ICAMINKERNEL = ../arm/izamin.c | |||
| IZAMINKERNEL = ../arm/izamin.c | |||
| ISMAXKERNEL = ../arm/imax.c | |||
| IDMAXKERNEL = ../arm/imax.c | |||
| ISMINKERNEL = ../arm/imin.c | |||
| IDMINKERNEL = ../arm/imin.c | |||
| SASUMKERNEL = ../arm/asum.c | |||
| DASUMKERNEL = ../arm/asum.c | |||
| CASUMKERNEL = ../arm/zasum.c | |||
| ZASUMKERNEL = ../arm/zasum.c | |||
| SSUMKERNEL = ../arm/sum.c | |||
| DSUMKERNEL = ../arm/sum.c | |||
| CSUMKERNEL = ../arm/zsum.c | |||
| ZSUMKERNEL = ../arm/zsum.c | |||
| SAXPYKERNEL = ../arm/axpy.c | |||
| DAXPYKERNEL = ../arm/axpy.c | |||
| CAXPYKERNEL = ../arm/zaxpy.c | |||
| ZAXPYKERNEL = ../arm/zaxpy.c | |||
| SCOPYKERNEL = ../arm/copy.c | |||
| DCOPYKERNEL = ../arm/copy.c | |||
| CCOPYKERNEL = ../arm/zcopy.c | |||
| ZCOPYKERNEL = ../arm/zcopy.c | |||
| SDOTKERNEL = ../arm/dot.c | |||
| DDOTKERNEL = ../arm/dot.c | |||
| CDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = ../arm/zdot.c | |||
| DSDOTKERNEL = ../generic/dot.c | |||
| SNRM2KERNEL = ../arm/nrm2.c | |||
| DNRM2KERNEL = ../arm/nrm2.c | |||
| CNRM2KERNEL = ../arm/znrm2.c | |||
| ZNRM2KERNEL = ../arm/znrm2.c | |||
| SROTKERNEL = ../arm/rot.c | |||
| DROTKERNEL = ../arm/rot.c | |||
| CROTKERNEL = ../arm/zrot.c | |||
| ZROTKERNEL = ../arm/zrot.c | |||
| SSCALKERNEL = ../arm/scal.c | |||
| DSCALKERNEL = ../arm/scal.c | |||
| CSCALKERNEL = ../arm/zscal.c | |||
| ZSCALKERNEL = ../arm/zscal.c | |||
| SSWAPKERNEL = ../arm/swap.c | |||
| DSWAPKERNEL = ../arm/swap.c | |||
| CSWAPKERNEL = ../arm/zswap.c | |||
| ZSWAPKERNEL = ../arm/zswap.c | |||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||
| DGEMVNKERNEL = ../arm/gemv_n.c | |||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||
| ZGEMVNKERNEL = ../arm/zgemv_n.c | |||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||
| DGEMVTKERNEL = ../arm/gemv_t.c | |||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||
| ZGEMVTKERNEL = ../arm/zgemv_t.c | |||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| SCABS_KERNEL = ../generic/cabs.c | |||
| DCABS_KERNEL = ../generic/cabs.c | |||
| QCABS_KERNEL = ../generic/cabs.c | |||
| LSAME_KERNEL = ../generic/lsame.c | |||
| SGEMM_BETA = ../generic/gemm_beta.c | |||
| DGEMM_BETA = ../generic/gemm_beta.c | |||
| CGEMM_BETA = ../generic/zgemm_beta.c | |||
| ZGEMM_BETA = ../generic/zgemm_beta.c | |||
| @@ -0,0 +1 @@ | |||
| clean :: | |||
| @@ -0,0 +1,13 @@ | |||
| TOPDIR = ../../.. | |||
| include ../../../Makefile.system | |||
| ifndef LASWP | |||
| LASWP = ../generic/laswp_k.c | |||
| endif | |||
| ifndef ZLASWP | |||
| ZLASWP = ../generic/zlaswp_k.c | |||
| endif | |||
| include ../generic/Makefile | |||
| @@ -3807,7 +3807,44 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #if defined(CSKY) || defined(CK860FV) | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||
| #define SGEMM_DEFAULT_P 128 | |||
| #define DGEMM_DEFAULT_P 128 | |||
| #define CGEMM_DEFAULT_P 96 | |||
| #define ZGEMM_DEFAULT_P 64 | |||
| #define SGEMM_DEFAULT_Q 240 | |||
| #define DGEMM_DEFAULT_Q 120 | |||
| #define CGEMM_DEFAULT_Q 120 | |||
| #define ZGEMM_DEFAULT_Q 120 | |||
| #define SGEMM_DEFAULT_R 12288 | |||
| #define DGEMM_DEFAULT_R 8192 | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #define SYMV_P 16 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #endif | |||
| #ifdef GENERIC | |||