@@ -101,10 +101,13 @@ static void INLINE blas_lock(volatile unsigned long *address){ | |||
static inline unsigned int rpcc(void){ | |||
unsigned long ret; | |||
#if defined(LOONGSON3A) | |||
#if defined(LOONGSON3A) | |||
unsigned long long tmp; | |||
__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); | |||
ret=tmp; | |||
#elif defined(LOONGSON3B) | |||
//Temp Implementation. | |||
return 1; | |||
#else | |||
__asm__ __volatile__(".set push \n" | |||
".set mips32r2\n" | |||
@@ -234,6 +237,11 @@ REALNAME: ;\ | |||
#define FIXED_PAGESIZE (16UL << 10) | |||
#endif | |||
#if defined(LOONGSON3B) | |||
#define PAGESIZE (16UL << 10) | |||
#define FIXED_PAGESIZE (16UL << 10) | |||
#endif | |||
#ifndef PAGESIZE | |||
#define PAGESIZE (64UL << 10) | |||
#endif | |||
@@ -245,7 +253,7 @@ REALNAME: ;\ | |||
#define MAP_ANONYMOUS MAP_ANON | |||
#endif | |||
#if defined(LOONGSON3A) | |||
#if defined(LOONGSON3A) || defined(LOONGSON3B) | |||
#define PREFETCHD_(x) ld $0, x | |||
#define PREFETCHD(x) PREFETCHD_(x) | |||
#else | |||
@@ -72,11 +72,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CPU_UNKNOWN 0 | |||
#define CPU_SICORTEX 1 | |||
#define CPU_LOONGSON3A 2 | |||
#define CPU_LOONGSON3B 3 | |||
static char *cpuname[] = { | |||
"UNKOWN", | |||
"SICORTEX", | |||
"LOONGSON3A" | |||
"LOONGSON3A", | |||
"LOONGSON3B" | |||
}; | |||
int detect(void){ | |||
@@ -101,6 +103,8 @@ int detect(void){ | |||
if (strstr(p, "Loongson-3A")){ | |||
return CPU_LOONGSON3A; | |||
}else if(strstr(p, "Loongson-3B")){ | |||
return CPU_LOONGSON3B; | |||
}else if (strstr(p, "Loongson-3")){ | |||
infile = fopen("/proc/cpuinfo", "r"); | |||
while (fgets(buffer, sizeof(buffer), infile)){ | |||
@@ -130,6 +134,8 @@ void get_architecture(void){ | |||
void get_subarchitecture(void){ | |||
if(detect()==CPU_LOONGSON3A) { | |||
printf("LOONGSON3A"); | |||
}else if(detect()==CPU_LOONGSON3B){ | |||
printf("LOONGSON3B"); | |||
}else{ | |||
printf("SICORTEX"); | |||
} | |||
@@ -149,6 +155,15 @@ void get_cpuconfig(void){ | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
printf("#define L2_ASSOCIATIVE 4\n"); | |||
}else if(detect()==CPU_LOONGSON3B){ | |||
printf("#define LOONGSON3B\n"); | |||
printf("#define L1_DATA_SIZE 65536\n"); | |||
printf("#define L1_DATA_LINESIZE 32\n"); | |||
printf("#define L2_SIZE 512488\n"); | |||
printf("#define L2_LINESIZE 32\n"); | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
printf("#define L2_ASSOCIATIVE 4\n"); | |||
}else{ | |||
printf("#define SICORTEX\n"); | |||
printf("#define L1_DATA_SIZE 32768\n"); | |||
@@ -164,6 +179,8 @@ void get_cpuconfig(void){ | |||
void get_libname(void){ | |||
if(detect()==CPU_LOONGSON3A) { | |||
printf("loongson3a\n"); | |||
}else if(detect()==CPU_LOONGSON3B) { | |||
printf("loongson3b\n"); | |||
}else{ | |||
#ifdef __mips64 | |||
printf("mips64\n"); | |||
@@ -683,7 +683,7 @@ void blas_set_parameter(void){ | |||
#if defined(ARCH_MIPS64) | |||
void blas_set_parameter(void){ | |||
#if defined(LOONGSON3A) | |||
#if defined(LOONGSON3A) || defined(LOONGSON3B) | |||
#ifdef SMP | |||
if(blas_num_threads == 1){ | |||
#endif | |||
@@ -117,6 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
/* #define FORCE_CELL */ | |||
/* #define FORCE_SICORTEX */ | |||
/* #define FORCE_LOONGSON3A */ | |||
/* #define FORCE_LOONGSON3B */ | |||
/* #define FORCE_ITANIUM2 */ | |||
/* #define FORCE_GENERIC */ | |||
/* #define FORCE_SPARC */ | |||
@@ -548,6 +549,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#else | |||
#endif | |||
#ifdef FORCE_LOONGSON3B | |||
#define FORCE | |||
#define ARCHITECTURE "MIPS" | |||
#define SUBARCHITECTURE "LOONGSON3B" | |||
#define SUBDIRNAME "mips64" | |||
#define ARCHCONFIG "-DLOONGSON3B " \ | |||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
#define LIBNAME "loongson3b" | |||
#define CORENAME "LOONGSON3B" | |||
#else | |||
#endif | |||
#ifdef FORCE_ITANIUM2 | |||
#define FORCE | |||
#define ARCHITECTURE "IA64" | |||
@@ -0,0 +1,68 @@ | |||
SAXPYKERNEL=axpy_loongson3a.S | |||
DAXPYKERNEL=daxpy_loongson3a_simd.S | |||
SGEMVNKERNEL = gemv_n_loongson3a.c | |||
SGEMVTKERNEL = gemv_t_loongson3a.c | |||
DGEMVNKERNEL = gemv_n_loongson3a.c | |||
DGEMVTKERNEL = gemv_t_loongson3a.c | |||
CGEMVNKERNEL = zgemv_n_loongson3a.c | |||
CGEMVTKERNEL = zgemv_t_loongson3a.c | |||
ZGEMVNKERNEL = zgemv_n_loongson3a.c | |||
ZGEMVTKERNEL = zgemv_t_loongson3a.c | |||
SGEMMKERNEL = sgemm_kernel_8x4_ps.S | |||
SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
DGEMMKERNEL = dgemm_kernel_loongson3a_4x4.S | |||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
CGEMMKERNEL = cgemm_kernel_loongson3a_4x2_ps.S | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
ZGEMMKERNEL = zgemm_kernel_loongson3a_2x2.S | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
@@ -1513,6 +1513,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define SYMV_P 16 | |||
#endif | |||
#ifdef LOONGSON3B | |||
#define SNUMOPT 2 | |||
#define DNUMOPT 2 | |||
#define GEMM_DEFAULT_OFFSET_A 0 | |||
#define GEMM_DEFAULT_OFFSET_B 0 | |||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 8 | |||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||
#define DGEMM_DEFAULT_UNROLL_M 4 | |||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||
#define CGEMM_DEFAULT_UNROLL_M 4 | |||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||
#define ZGEMM_DEFAULT_UNROLL_M 2 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_P 64 | |||
#define DGEMM_DEFAULT_P 44 | |||
#define CGEMM_DEFAULT_P 64 | |||
#define ZGEMM_DEFAULT_P 32 | |||
#define SGEMM_DEFAULT_Q 192 | |||
#define DGEMM_DEFAULT_Q 92 | |||
#define CGEMM_DEFAULT_Q 128 | |||
#define ZGEMM_DEFAULT_Q 80 | |||
#define SGEMM_DEFAULT_R 1024 | |||
#define DGEMM_DEFAULT_R dgemm_r | |||
#define CGEMM_DEFAULT_R 1024 | |||
#define ZGEMM_DEFAULT_R 1024 | |||
#define GEMM_OFFSET_A1 0x10000 | |||
#define GEMM_OFFSET_B1 0x100000 | |||
#define SYMV_P 16 | |||
#endif | |||
#ifdef GENERIC | |||
#define SNUMOPT 2 | |||