| @@ -891,11 +891,9 @@ BINARY_DEFINED = 1 | |||
| endif | |||
| ifeq ($(ARCH), loongarch64) | |||
| ifeq ($(CORE), LOONGSON3R5) | |||
| CCOMMON_OPT += -march=loongarch64 -mabi=lp64 | |||
| FCOMMON_OPT += -march=loongarch64 -mabi=lp64 | |||
| endif | |||
| endif | |||
| endif | |||
| @@ -121,7 +121,9 @@ RISCV64_GENERIC | |||
| C910V | |||
| 11.LOONGARCH64: | |||
| LOONGSONGENERIC | |||
| LOONGSON3R5 | |||
| LOONGSON2K1000 | |||
| 12. Elbrus E2000: | |||
| E2K | |||
| @@ -33,30 +33,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <stdint.h> | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_LOONGSON3R5 1 | |||
| /* If LASX extension instructions supported, | |||
| * using core LOONGSON3R5 | |||
| * If only LSX extension instructions supported, | |||
| * using core LOONGSON2K1000 | |||
| * If neither LASX nor LSX extension instructions supported, | |||
| * using core LOONGSONGENERIC (As far as I know, there is no such | |||
| * CPU yet) | |||
| */ | |||
| #define CPU_GENERIC 0 | |||
| #define CPU_LOONGSON3R5 1 | |||
| #define CPU_LOONGSON2K1000 2 | |||
| #define LOONGARCH_CFG2 0x02 | |||
| #define LOONGARCH_LASX 1<<7 | |||
| #define LOONGARCH_LSX 1<<6 | |||
| static char *cpuname[] = { | |||
| "UNKNOWN", | |||
| "LOONGSON3R5" | |||
| "LOONGSONGENERIC", | |||
| "LOONGSON3R5", | |||
| "LOONGSON2K1000" | |||
| }; | |||
| static char *cpuname_lower[] = { | |||
| "loongsongeneric", | |||
| "loongson3r5", | |||
| "loongson2k1000" | |||
| }; | |||
| int detect(void) { | |||
| uint32_t reg = 0; | |||
| __asm__ volatile ( | |||
| "cpucfg %0, %1 \n\t" | |||
| : "+&r"(reg) | |||
| : "r"(LOONGARCH_CFG2) | |||
| ); | |||
| if (reg & LOONGARCH_LASX) | |||
| return CPU_LOONGSON3R5; | |||
| else | |||
| return CPU_UNKNOWN; | |||
| #ifdef __linux | |||
| uint32_t reg = 0; | |||
| __asm__ volatile ( | |||
| "cpucfg %0, %1 \n\t" | |||
| : "+&r"(reg) | |||
| : "r"(LOONGARCH_CFG2) | |||
| ); | |||
| if (reg & LOONGARCH_LASX) | |||
| return CPU_LOONGSON3R5; | |||
| else if (reg & LOONGARCH_LSX) | |||
| return CPU_LOONGSON2K1000; | |||
| else | |||
| return CPU_GENERIC; | |||
| #endif | |||
| return CPU_GENERIC; | |||
| } | |||
| char *get_corename(void) { | |||
| @@ -68,11 +91,8 @@ void get_architecture(void) { | |||
| } | |||
| void get_subarchitecture(void) { | |||
| if (detect() == CPU_LOONGSON3R5) { | |||
| printf("LOONGSON3R5"); | |||
| } else { | |||
| printf("UNKNOWN"); | |||
| } | |||
| int d = detect(); | |||
| printf("%s", cpuname[d]); | |||
| } | |||
| void get_subdirname(void) { | |||
| @@ -80,31 +100,44 @@ void get_subdirname(void) { | |||
| } | |||
| void get_cpuconfig(void) { | |||
| if (detect() == CPU_LOONGSON3R5) { | |||
| printf("#define LOONGSON3R5\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 1048576\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||
| } else { | |||
| printf("#define LOONGSON3R5\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 1048576\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||
| int d = detect(); | |||
| switch (d) { | |||
| case CPU_LOONGSON3R5: | |||
| printf("#define LOONGSON3R5\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 1048576\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||
| break; | |||
| case CPU_LOONGSON2K1000: | |||
| printf("#define LOONGSON2K1000\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 262144\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||
| break; | |||
| default: | |||
| printf("#define LOONGSONGENERIC\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 262144\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||
| break; | |||
| } | |||
| } | |||
| void get_libname(void){ | |||
| if (detect() == CPU_LOONGSON3R5) { | |||
| printf("loongson3r5\n"); | |||
| } else { | |||
| printf("loongarch64\n"); | |||
| } | |||
| int d = detect(); | |||
| printf("%s", cpuname_lower[d]); | |||
| } | |||
| @@ -132,9 +132,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| /* #define FORCE_PPC440FP2 */ | |||
| /* #define FORCE_CELL */ | |||
| /* #define FORCE_SICORTEX */ | |||
| /* #define FORCE_LOONGSON3R3 */ | |||
| /* #define FORCE_LOONGSON3R4 */ | |||
| /* #define FORCE_LOONGSON3R5 */ | |||
| /* #define FORCE_LOONGSON3R3 */ | |||
| /* #define FORCE_LOONGSON3R4 */ | |||
| /* #define FORCE_LOONGSON3R5 */ | |||
| /* #define FORCE_LOONGSON2K1000 */ | |||
| /* #define FORCE_LOONGSONGENERIC */ | |||
| /* #define FORCE_I6400 */ | |||
| /* #define FORCE_P6600 */ | |||
| /* #define FORCE_P5600 */ | |||
| @@ -969,6 +971,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_LOONGSON2K1000 | |||
| #define FORCE | |||
| #define ARCHITECTURE "LOONGARCH" | |||
| #define SUBARCHITECTURE "LOONGSON2K1000" | |||
| #define SUBDIRNAME "loongarch64" | |||
| #define ARCHCONFIG "-DLOONGSON2K1000 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " | |||
| #define LIBNAME "loongson2k1000" | |||
| #define CORENAME "LOONGSON2K1000" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_LOONGSONGENERIC | |||
| #define FORCE | |||
| #define ARCHITECTURE "LOONGARCH" | |||
| #define SUBARCHITECTURE "LOONGSONGENERIC" | |||
| #define SUBDIRNAME "loongarch64" | |||
| #define ARCHCONFIG "-DLOONGSONGENERIC " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=16 " | |||
| #define LIBNAME "loongsongeneric" | |||
| #define CORENAME "LOONGSONGENERIC" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_I6400 | |||
| #define FORCE | |||
| #define ARCHITECTURE "MIPS" | |||
| @@ -2881,6 +2881,76 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #ifdef LOONGSON2K1000 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||
| #define CGEMM_DEFAULT_UNROLL_M 1 | |||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_P 128 | |||
| #define DGEMM_DEFAULT_P 128 | |||
| #define CGEMM_DEFAULT_P 96 | |||
| #define ZGEMM_DEFAULT_P 64 | |||
| #define SGEMM_DEFAULT_Q 240 | |||
| #define DGEMM_DEFAULT_Q 120 | |||
| #define CGEMM_DEFAULT_Q 120 | |||
| #define ZGEMM_DEFAULT_Q 120 | |||
| #define SGEMM_DEFAULT_R 12288 | |||
| #define DGEMM_DEFAULT_R 8192 | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #ifdef LOONGSONGENERIC | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 8 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 8 | |||
| #define CGEMM_DEFAULT_UNROLL_M 1 | |||
| #define CGEMM_DEFAULT_UNROLL_N 4 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | |||
| #define SGEMM_DEFAULT_P 128 | |||
| #define DGEMM_DEFAULT_P 128 | |||
| #define CGEMM_DEFAULT_P 96 | |||
| #define ZGEMM_DEFAULT_P 64 | |||
| #define SGEMM_DEFAULT_Q 240 | |||
| #define DGEMM_DEFAULT_Q 120 | |||
| #define CGEMM_DEFAULT_Q 120 | |||
| #define ZGEMM_DEFAULT_Q 120 | |||
| #define SGEMM_DEFAULT_R 12288 | |||
| #define DGEMM_DEFAULT_R 8192 | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #if defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500) | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||