| @@ -891,11 +891,9 @@ BINARY_DEFINED = 1 | |||||
| endif | endif | ||||
| ifeq ($(ARCH), loongarch64) | ifeq ($(ARCH), loongarch64) | ||||
| ifeq ($(CORE), LOONGSON3R5) | |||||
| CCOMMON_OPT += -march=loongarch64 -mabi=lp64 | CCOMMON_OPT += -march=loongarch64 -mabi=lp64 | ||||
| FCOMMON_OPT += -march=loongarch64 -mabi=lp64 | FCOMMON_OPT += -march=loongarch64 -mabi=lp64 | ||||
| endif | endif | ||||
| endif | |||||
| endif | endif | ||||
| @@ -121,7 +121,9 @@ RISCV64_GENERIC | |||||
| C910V | C910V | ||||
| 11.LOONGARCH64: | 11.LOONGARCH64: | ||||
| LOONGSONGENERIC | |||||
| LOONGSON3R5 | LOONGSON3R5 | ||||
| LOONGSON2K1000 | |||||
| 12. Elbrus E2000: | 12. Elbrus E2000: | ||||
| E2K | E2K | ||||
| @@ -33,30 +33,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #define CPU_UNKNOWN 0 | |||||
| #define CPU_LOONGSON3R5 1 | |||||
| /* If LASX extension instructions supported, | |||||
| * using core LOONGSON3R5 | |||||
| * If only LSX extension instructions supported, | |||||
| * using core LOONGSON2K1000 | |||||
| * If neither LASX nor LSX extension instructions supported, | |||||
| * using core LOONGSONGENERIC (As far as I know, there is no such | |||||
| * CPU yet) | |||||
| */ | |||||
| #define CPU_GENERIC 0 | |||||
| #define CPU_LOONGSON3R5 1 | |||||
| #define CPU_LOONGSON2K1000 2 | |||||
| #define LOONGARCH_CFG2 0x02 | #define LOONGARCH_CFG2 0x02 | ||||
| #define LOONGARCH_LASX 1<<7 | #define LOONGARCH_LASX 1<<7 | ||||
| #define LOONGARCH_LSX 1<<6 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKNOWN", | |||||
| "LOONGSON3R5" | |||||
| "LOONGSONGENERIC", | |||||
| "LOONGSON3R5", | |||||
| "LOONGSON2K1000" | |||||
| }; | |||||
| static char *cpuname_lower[] = { | |||||
| "loongsongeneric", | |||||
| "loongson3r5", | |||||
| "loongson2k1000" | |||||
| }; | }; | ||||
| int detect(void) { | int detect(void) { | ||||
| uint32_t reg = 0; | |||||
| __asm__ volatile ( | |||||
| "cpucfg %0, %1 \n\t" | |||||
| : "+&r"(reg) | |||||
| : "r"(LOONGARCH_CFG2) | |||||
| ); | |||||
| if (reg & LOONGARCH_LASX) | |||||
| return CPU_LOONGSON3R5; | |||||
| else | |||||
| return CPU_UNKNOWN; | |||||
| #ifdef __linux | |||||
| uint32_t reg = 0; | |||||
| __asm__ volatile ( | |||||
| "cpucfg %0, %1 \n\t" | |||||
| : "+&r"(reg) | |||||
| : "r"(LOONGARCH_CFG2) | |||||
| ); | |||||
| if (reg & LOONGARCH_LASX) | |||||
| return CPU_LOONGSON3R5; | |||||
| else if (reg & LOONGARCH_LSX) | |||||
| return CPU_LOONGSON2K1000; | |||||
| else | |||||
| return CPU_GENERIC; | |||||
| #endif | |||||
| return CPU_GENERIC; | |||||
| } | } | ||||
| char *get_corename(void) { | char *get_corename(void) { | ||||
| @@ -68,11 +91,8 @@ void get_architecture(void) { | |||||
| } | } | ||||
| void get_subarchitecture(void) { | void get_subarchitecture(void) { | ||||
| if (detect() == CPU_LOONGSON3R5) { | |||||
| printf("LOONGSON3R5"); | |||||
| } else { | |||||
| printf("UNKNOWN"); | |||||
| } | |||||
| int d = detect(); | |||||
| printf("%s", cpuname[d]); | |||||
| } | } | ||||
| void get_subdirname(void) { | void get_subdirname(void) { | ||||
| @@ -80,31 +100,44 @@ void get_subdirname(void) { | |||||
| } | } | ||||
| void get_cpuconfig(void) { | void get_cpuconfig(void) { | ||||
| if (detect() == CPU_LOONGSON3R5) { | |||||
| printf("#define LOONGSON3R5\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 1048576\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| } else { | |||||
| printf("#define LOONGSON3R5\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 1048576\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| int d = detect(); | |||||
| switch (d) { | |||||
| case CPU_LOONGSON3R5: | |||||
| printf("#define LOONGSON3R5\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 1048576\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| break; | |||||
| case CPU_LOONGSON2K1000: | |||||
| printf("#define LOONGSON2K1000\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 262144\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| break; | |||||
| default: | |||||
| printf("#define LOONGSONGENERIC\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 262144\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| void get_libname(void){ | void get_libname(void){ | ||||
| if (detect() == CPU_LOONGSON3R5) { | |||||
| printf("loongson3r5\n"); | |||||
| } else { | |||||
| printf("loongarch64\n"); | |||||
| } | |||||
| int d = detect(); | |||||
| printf("%s", cpuname_lower[d]); | |||||
| } | } | ||||
| @@ -132,9 +132,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /* #define FORCE_PPC440FP2 */ | /* #define FORCE_PPC440FP2 */ | ||||
| /* #define FORCE_CELL */ | /* #define FORCE_CELL */ | ||||
| /* #define FORCE_SICORTEX */ | /* #define FORCE_SICORTEX */ | ||||
| /* #define FORCE_LOONGSON3R3 */ | |||||
| /* #define FORCE_LOONGSON3R4 */ | |||||
| /* #define FORCE_LOONGSON3R5 */ | |||||
| /* #define FORCE_LOONGSON3R3 */ | |||||
| /* #define FORCE_LOONGSON3R4 */ | |||||
| /* #define FORCE_LOONGSON3R5 */ | |||||
| /* #define FORCE_LOONGSON2K1000 */ | |||||
| /* #define FORCE_LOONGSONGENERIC */ | |||||
| /* #define FORCE_I6400 */ | /* #define FORCE_I6400 */ | ||||
| /* #define FORCE_P6600 */ | /* #define FORCE_P6600 */ | ||||
| /* #define FORCE_P5600 */ | /* #define FORCE_P5600 */ | ||||
| @@ -969,6 +971,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #else | #else | ||||
| #endif | #endif | ||||
| #ifdef FORCE_LOONGSON2K1000 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "LOONGARCH" | |||||
| #define SUBARCHITECTURE "LOONGSON2K1000" | |||||
| #define SUBDIRNAME "loongarch64" | |||||
| #define ARCHCONFIG "-DLOONGSON2K1000 " \ | |||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " | |||||
| #define LIBNAME "loongson2k1000" | |||||
| #define CORENAME "LOONGSON2K1000" | |||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_LOONGSONGENERIC | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "LOONGARCH" | |||||
| #define SUBARCHITECTURE "LOONGSONGENERIC" | |||||
| #define SUBDIRNAME "loongarch64" | |||||
| #define ARCHCONFIG "-DLOONGSONGENERIC " \ | |||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " | |||||
| #define LIBNAME "loongsongeneric" | |||||
| #define CORENAME "LOONGSONGENERIC" | |||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_I6400 | #ifdef FORCE_I6400 | ||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "MIPS" | #define ARCHITECTURE "MIPS" | ||||
| @@ -2881,6 +2881,76 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #endif | #endif | ||||
| #ifdef LOONGSON2K1000 | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define SGEMM_DEFAULT_P 128 | |||||
| #define DGEMM_DEFAULT_P 128 | |||||
| #define CGEMM_DEFAULT_P 96 | |||||
| #define ZGEMM_DEFAULT_P 64 | |||||
| #define SGEMM_DEFAULT_Q 240 | |||||
| #define DGEMM_DEFAULT_Q 120 | |||||
| #define CGEMM_DEFAULT_Q 120 | |||||
| #define ZGEMM_DEFAULT_Q 120 | |||||
| #define SGEMM_DEFAULT_R 12288 | |||||
| #define DGEMM_DEFAULT_R 8192 | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 4096 | |||||
| #define SYMV_P 16 | |||||
| #endif | |||||
| #ifdef LOONGSONGENERIC | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||||
| #define SGEMM_DEFAULT_P 128 | |||||
| #define DGEMM_DEFAULT_P 128 | |||||
| #define CGEMM_DEFAULT_P 96 | |||||
| #define ZGEMM_DEFAULT_P 64 | |||||
| #define SGEMM_DEFAULT_Q 240 | |||||
| #define DGEMM_DEFAULT_Q 120 | |||||
| #define CGEMM_DEFAULT_Q 120 | |||||
| #define ZGEMM_DEFAULT_Q 120 | |||||
| #define SGEMM_DEFAULT_R 12288 | |||||
| #define DGEMM_DEFAULT_R 8192 | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 4096 | |||||
| #define SYMV_P 16 | |||||
| #endif | |||||
| #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) | #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) | ||||
| #define SNUMOPT 2 | #define SNUMOPT 2 | ||||
| #define DNUMOPT 2 | #define DNUMOPT 2 | ||||