@@ -218,4 +218,7 @@ In chronological order: | |||
* [2022-08] Fix building from sources for QNX | |||
* Mark Seminatore <https://github.com/mseminatore> | |||
* [2023-11-09] Improve Windows threading performance scaling | |||
* [2023-11-09] Improve Windows threading performance scaling | |||
* Dirreke <https://github.com/mseminatore> | |||
* [2024-01-16] Add basic support for the CSKY architecture |
@@ -0,0 +1,4 @@ | |||
ifeq ($(CORE), CK860FV) | |||
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float | |||
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static | |||
endif |
@@ -55,6 +55,10 @@ ifeq ($(TARGET), C910V) | |||
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d | |||
endif | |||
ifeq ($(TARGET), CK860FV) | |||
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float | |||
endif | |||
all: getarch_2nd | |||
./getarch_2nd 0 >> $(TARGET_MAKE) | |||
./getarch_2nd 1 >> $(TARGET_CONF) | |||
@@ -873,6 +873,11 @@ endif | |||
endif | |||
endif | |||
ifeq ($(ARCH), csky) | |||
NO_BINARY_MODE = 1 | |||
BINARY_DEFINED = 1 | |||
endif | |||
# | |||
# C Compiler dependent settings | |||
# | |||
@@ -133,3 +133,7 @@ E2K | |||
EV4 | |||
EV5 | |||
EV6 | |||
14.CSKY | |||
CSKY | |||
CK860FV |
@@ -91,6 +91,7 @@ case "$data" in | |||
*ARCH_ZARCH*) architecture=zarch ;; | |||
*ARCH_RISCV64*) architecture=riscv64 ;; | |||
*ARCH_LOONGARCH64*) architecture=loongarch64 ;; | |||
*ARCH_CSKY*) architecture=csky ;; | |||
esac | |||
defined=0 | |||
@@ -236,6 +237,7 @@ case "$data" in | |||
*ARCH_ARM*) architecture=arm ;; | |||
*ARCH_ZARCH*) architecture=zarch ;; | |||
*ARCH_LOONGARCH64*) architecture=loongarch64 ;; | |||
*ARCH_CSKY*) architecture=csky ;; | |||
esac | |||
binformat='bin32' | |||
@@ -97,6 +97,7 @@ $architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
$architecture = riscv64 if ($data =~ /ARCH_RISCV64/); | |||
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
$architecture = csky if ($data =~ /ARCH_CSKY/); | |||
$defined = 0; | |||
@@ -156,6 +157,11 @@ if ($architecture eq "loongarch64") { | |||
$binary = 64; | |||
} | |||
if ($architecture eq "csky") { | |||
$defined = 1; | |||
$binary = 32; | |||
} | |||
if ($compiler eq "PGI") { | |||
$compiler_name .= " -tp p7" if ($binary eq "32"); | |||
$compiler_name .= " -tp p7-64" if ($binary eq "64"); | |||
@@ -284,6 +290,7 @@ $architecture = arm if ($data =~ /ARCH_ARM/); | |||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
$architecture = csky if ($data =~ /ARCH_CSKY/); | |||
$binformat = bin32; | |||
$binformat = bin64 if ($data =~ /BINARY_64/); | |||
@@ -482,6 +482,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||
#include "common_e2k.h" | |||
#endif | |||
#ifdef ARCH_CSKY | |||
#include "common_csky.h" | |||
#endif | |||
#ifndef ASSEMBLER | |||
#ifdef OS_WINDOWSSTORE | |||
typedef char env_var_t[MAX_PATH]; | |||
@@ -0,0 +1,56 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2011-2015, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written | |||
permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
**********************************************************************************/ | |||
#ifndef COMMON_CSKY | |||
#define COMMON_CSKY | |||
#define MB __sync_synchronize() | |||
#define WMB __sync_synchronize() | |||
#define RMB __sync_synchronize() | |||
#define INLINE inline | |||
#ifndef ASSEMBLER | |||
static inline int blas_quickdivide(blasint x, blasint y){ | |||
return x / y; | |||
} | |||
#endif | |||
#define BUFFER_SIZE ( 32 << 20) | |||
#define SEEK_ADDRESS | |||
#endif |
@@ -173,6 +173,10 @@ HAVE_C11 | |||
ARCH_E2K | |||
#endif | |||
#if defined(__csky__) | |||
ARCH_CSKY | |||
#endif | |||
#if defined(__EMSCRIPTEN__) | |||
ARCH_RISCV64 | |||
OS_WINDOWS | |||
@@ -150,6 +150,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
/* #define FORCE_EV4 */ | |||
/* #define FORCE_EV5 */ | |||
/* #define FORCE_EV6 */ | |||
/* #define FORCE_CSKY */ | |||
/* #define FORCE_CK860FV */ | |||
/* #define FORCE_GENERIC */ | |||
#ifdef FORCE_P2 | |||
@@ -1692,6 +1694,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CORENAME "generic" | |||
#endif | |||
#ifdef FORCE_CSKY | |||
#define FORCE | |||
#define ARCHITECTURE "CSKY" | |||
#define SUBARCHITECTURE "CSKY" | |||
#define SUBDIRNAME "csky" | |||
#define ARCHCONFIG "-DCSKY" \ | |||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||
#define LIBNAME "csky" | |||
#define CORENAME "CSKY" | |||
#endif | |||
#ifdef FORCE_CK860FV | |||
#define FORCE | |||
#define ARCHITECTURE "CSKY" | |||
#define SUBARCHITECTURE "CK860V" | |||
#define SUBDIRNAME "csky" | |||
#define ARCHCONFIG "-DCK860FV " \ | |||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=524288 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||
#define LIBNAME "ck860fv" | |||
#define CORENAME "CK860FV" | |||
#endif | |||
#ifndef FORCE | |||
#ifdef USER_TARGET | |||
@@ -1766,7 +1795,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifndef OPENBLAS_SUPPORTED | |||
#error "This arch/CPU is not supported by OpenBLAS." | |||
#endif | |||
@@ -1831,7 +1859,7 @@ int main(int argc, char *argv[]){ | |||
#ifdef FORCE | |||
printf("CORE=%s\n", CORENAME); | |||
#else | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__alpha__) || defined(__csky__) | |||
printf("CORE=%s\n", get_corename()); | |||
#endif | |||
#endif | |||
@@ -1979,7 +2007,7 @@ printf("ELF_VERSION=2\n"); | |||
#ifdef FORCE | |||
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | |||
#else | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) || defined(__csky__) | |||
printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | |||
#endif | |||
#endif | |||
@@ -0,0 +1,149 @@ | |||
SAMAXKERNEL = ../arm/amax.c | |||
DAMAXKERNEL = ../arm/amax.c | |||
CAMAXKERNEL = ../arm/zamax.c | |||
ZAMAXKERNEL = ../arm/zamax.c | |||
SAMINKERNEL = ../arm/amin.c | |||
DAMINKERNEL = ../arm/amin.c | |||
CAMINKERNEL = ../arm/zamin.c | |||
ZAMINKERNEL = ../arm/zamin.c | |||
SMAXKERNEL = ../arm/max.c | |||
DMAXKERNEL = ../arm/max.c | |||
SMINKERNEL = ../arm/min.c | |||
DMINKERNEL = ../arm/min.c | |||
ISAMAXKERNEL = ../arm/iamax.c | |||
IDAMAXKERNEL = ../arm/iamax.c | |||
ICAMAXKERNEL = ../arm/izamax.c | |||
IZAMAXKERNEL = ../arm/izamax.c | |||
ISAMINKERNEL = ../arm/iamin.c | |||
IDAMINKERNEL = ../arm/iamin.c | |||
ICAMINKERNEL = ../arm/izamin.c | |||
IZAMINKERNEL = ../arm/izamin.c | |||
ISMAXKERNEL = ../arm/imax.c | |||
IDMAXKERNEL = ../arm/imax.c | |||
ISMINKERNEL = ../arm/imin.c | |||
IDMINKERNEL = ../arm/imin.c | |||
SASUMKERNEL = ../arm/asum.c | |||
DASUMKERNEL = ../arm/asum.c | |||
CASUMKERNEL = ../arm/zasum.c | |||
ZASUMKERNEL = ../arm/zasum.c | |||
SSUMKERNEL = ../arm/sum.c | |||
DSUMKERNEL = ../arm/sum.c | |||
CSUMKERNEL = ../arm/zsum.c | |||
ZSUMKERNEL = ../arm/zsum.c | |||
SAXPYKERNEL = ../arm/axpy.c | |||
DAXPYKERNEL = ../arm/axpy.c | |||
CAXPYKERNEL = ../arm/zaxpy.c | |||
ZAXPYKERNEL = ../arm/zaxpy.c | |||
SCOPYKERNEL = ../arm/copy.c | |||
DCOPYKERNEL = ../arm/copy.c | |||
CCOPYKERNEL = ../arm/zcopy.c | |||
ZCOPYKERNEL = ../arm/zcopy.c | |||
SDOTKERNEL = ../arm/dot.c | |||
DDOTKERNEL = ../arm/dot.c | |||
CDOTKERNEL = ../arm/zdot.c | |||
ZDOTKERNEL = ../arm/zdot.c | |||
DSDOTKERNEL = ../generic/dot.c | |||
SNRM2KERNEL = ../arm/nrm2.c | |||
DNRM2KERNEL = ../arm/nrm2.c | |||
CNRM2KERNEL = ../arm/znrm2.c | |||
ZNRM2KERNEL = ../arm/znrm2.c | |||
SROTKERNEL = ../arm/rot.c | |||
DROTKERNEL = ../arm/rot.c | |||
CROTKERNEL = ../arm/zrot.c | |||
ZROTKERNEL = ../arm/zrot.c | |||
SSCALKERNEL = ../arm/scal.c | |||
DSCALKERNEL = ../arm/scal.c | |||
CSCALKERNEL = ../arm/zscal.c | |||
ZSCALKERNEL = ../arm/zscal.c | |||
SSWAPKERNEL = ../arm/swap.c | |||
DSWAPKERNEL = ../arm/swap.c | |||
CSWAPKERNEL = ../arm/zswap.c | |||
ZSWAPKERNEL = ../arm/zswap.c | |||
SGEMVNKERNEL = ../arm/gemv_n.c | |||
DGEMVNKERNEL = ../arm/gemv_n.c | |||
CGEMVNKERNEL = ../arm/zgemv_n.c | |||
ZGEMVNKERNEL = ../arm/zgemv_n.c | |||
SGEMVTKERNEL = ../arm/gemv_t.c | |||
DGEMVTKERNEL = ../arm/gemv_t.c | |||
CGEMVTKERNEL = ../arm/zgemv_t.c | |||
ZGEMVTKERNEL = ../arm/zgemv_t.c | |||
STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
SCABS_KERNEL = ../generic/cabs.c | |||
DCABS_KERNEL = ../generic/cabs.c | |||
QCABS_KERNEL = ../generic/cabs.c | |||
LSAME_KERNEL = ../generic/lsame.c | |||
SGEMM_BETA = ../generic/gemm_beta.c | |||
DGEMM_BETA = ../generic/gemm_beta.c | |||
CGEMM_BETA = ../generic/zgemm_beta.c | |||
ZGEMM_BETA = ../generic/zgemm_beta.c | |||
@@ -0,0 +1 @@ | |||
clean :: |
@@ -0,0 +1,13 @@ | |||
TOPDIR = ../../.. | |||
include ../../../Makefile.system | |||
ifndef LASWP | |||
LASWP = ../generic/laswp_k.c | |||
endif | |||
ifndef ZLASWP | |||
ZLASWP = ../generic/zlaswp_k.c | |||
endif | |||
include ../generic/Makefile | |||
@@ -3807,7 +3807,44 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||
#define SYMV_P 16 | |||
#endif | |||
#if defined(CSKY) || defined(CK860FV) | |||
#define GEMM_DEFAULT_OFFSET_A 0 | |||
#define GEMM_DEFAULT_OFFSET_B 0 | |||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 2 | |||
#define SGEMM_DEFAULT_UNROLL_N 2 | |||
#define DGEMM_DEFAULT_UNROLL_M 2 | |||
#define DGEMM_DEFAULT_UNROLL_N 2 | |||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||
#define ZGEMM_DEFAULT_UNROLL_M 2 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_P 128 | |||
#define DGEMM_DEFAULT_P 128 | |||
#define CGEMM_DEFAULT_P 96 | |||
#define ZGEMM_DEFAULT_P 64 | |||
#define SGEMM_DEFAULT_Q 240 | |||
#define DGEMM_DEFAULT_Q 120 | |||
#define CGEMM_DEFAULT_Q 120 | |||
#define ZGEMM_DEFAULT_Q 120 | |||
#define SGEMM_DEFAULT_R 12288 | |||
#define DGEMM_DEFAULT_R 8192 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#define SYMV_P 16 | |||
#define GEMM_DEFAULT_OFFSET_A 0 | |||
#define GEMM_DEFAULT_OFFSET_B 0 | |||
#endif | |||
#ifdef GENERIC | |||