LoongArch64: Add DYNAMIC_ARCH supporttags/v0.3.21
@@ -680,6 +680,10 @@ ifeq ($(ARCH), mips64) | |||
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 | |||
endif | |||
ifeq ($(ARCH), loongarch64) | |||
DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC | |||
endif | |||
ifeq ($(ARCH), zarch) | |||
DYNAMIC_CORE = ZARCH_GENERIC | |||
@@ -27,11 +27,15 @@ else | |||
ifeq ($(ARCH),mips64) | |||
COMMONOBJS += dynamic_mips64.$(SUFFIX) | |||
else | |||
ifeq ($(ARCH),loongarch64) | |||
COMMONOBJS += dynamic_loongarch64.$(SUFFIX) | |||
else | |||
COMMONOBJS += dynamic.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
endif | |||
endif | |||
else | |||
COMMONOBJS += parameter.$(SUFFIX) | |||
endif | |||
@@ -99,11 +103,15 @@ else | |||
ifeq ($(ARCH),mips64) | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_mips64.$(SUFFIX) | |||
else | |||
ifeq ($(ARCH),loongarch64) | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_loongarch64.$(SUFFIX) | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
endif | |||
endif | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||
endif | |||
@@ -0,0 +1,128 @@ | |||
/******************************************************************************* | |||
Copyright (c) 2022, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*******************************************************************************/ | |||
#include "common.h" | |||
extern gotoblas_t gotoblas_LOONGSON3R5; | |||
extern gotoblas_t gotoblas_LOONGSON2K1000; | |||
extern gotoblas_t gotoblas_LOONGSONGENERIC; | |||
extern void openblas_warning(int verbose, const char * msg); | |||
#define NUM_CORETYPES 3 | |||
static char *corename[] = { | |||
"loongson3r5", | |||
"loongson2k1000", | |||
"loongsongeneric", | |||
"unknown" | |||
}; | |||
char *gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_LOONGSON3R5) return corename[0]; | |||
if (gotoblas == &gotoblas_LOONGSON2K1000) return corename[1]; | |||
if (gotoblas == &gotoblas_LOONGSONGENERIC) return corename[2]; | |||
return corename[NUM_CORETYPES]; | |||
} | |||
static gotoblas_t *force_coretype(char *coretype) { | |||
int i; | |||
int found = -1; | |||
char message[128]; | |||
for ( i=0 ; i < NUM_CORETYPES; i++) | |||
{ | |||
if (!strncasecmp(coretype, corename[i], 20)) | |||
{ | |||
found = i; | |||
break; | |||
} | |||
} | |||
switch (found) | |||
{ | |||
case 0: return (&gotoblas_LOONGSON3R5); | |||
case 1: return (&gotoblas_LOONGSON2K1000); | |||
case 2: return (&gotoblas_LOONGSONGENERIC); | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
openblas_warning(1, message); | |||
return NULL; | |||
} | |||
#define LASX_MASK 1<<7 | |||
#define LSX_MASK 1<<6 | |||
#define LOONGARCH_CFG2 0x02 | |||
static gotoblas_t *get_coretype(void) { | |||
int ret = 0; | |||
__asm__ volatile ( | |||
"cpucfg %0, %1 \n\t" | |||
: "+&r"(ret) | |||
: "r"(LOONGARCH_CFG2) | |||
); | |||
if (ret & LASX_MASK) | |||
return &gotoblas_LOONGSON3R5; | |||
else if (ret & LSX_MASK) | |||
return &gotoblas_LOONGSON2K1000; | |||
else | |||
return &gotoblas_LOONGSONGENERIC; | |||
} | |||
void gotoblas_dynamic_init(void) { | |||
char coremsg[128]; | |||
char coren[22]; | |||
char *p; | |||
if (gotoblas) return; | |||
p = getenv("OPENBLAS_CORETYPE"); | |||
if ( p ) | |||
{ | |||
gotoblas = force_coretype(p); | |||
} | |||
else | |||
{ | |||
gotoblas = get_coretype(); | |||
} | |||
if (gotoblas && gotoblas->init) { | |||
strncpy(coren, gotoblas_corename(), 20); | |||
sprintf(coremsg, "Core: %s\n", coren); | |||
openblas_warning(2, coremsg); | |||
gotoblas -> init(); | |||
} else { | |||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||
exit(1); | |||
} | |||
} | |||
void gotoblas_dynamic_quit(void) { | |||
gotoblas = NULL; | |||
} |
@@ -108,10 +108,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifndef DGEMMKERNEL | |||
@@ -120,10 +120,10 @@ DGEMMINCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifndef CGEMMKERNEL | |||
@@ -132,10 +132,10 @@ CGEMMINCOPY = ../generic/zgemm_ncopy_1.c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_1.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifndef ZGEMMKERNEL | |||
@@ -144,10 +144,10 @@ ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c | |||
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifndef SGEMM_BETA | |||
@@ -3,10 +3,10 @@ DGEMMINCOPY = dgemm_ncopy_16.S | |||
DGEMMITCOPY = dgemm_tcopy_16.S | |||
DGEMMONCOPY = dgemm_ncopy_4.S | |||
DGEMMOTCOPY = dgemm_tcopy_4.S | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
@@ -11,26 +11,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
@@ -1046,6 +1046,34 @@ static void init_parameter(void) { | |||
#endif | |||
} | |||
#else // (ARCH_MIPS64) | |||
#if (ARCH_LOONGARCH64) | |||
static void init_parameter(void) { | |||
#ifdef BUILD_BFLOAT16 | |||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | |||
#endif | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#ifdef BUILD_BFLOAT16 | |||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||
#endif | |||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
#ifdef BUILD_BFLOAT16 | |||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | |||
#endif | |||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
} | |||
#else // (ARCH_LOONGARCH64) | |||
#if (ARCH_POWER) | |||
static void init_parameter(void) { | |||
@@ -1899,5 +1927,6 @@ static void init_parameter(void) { | |||
} | |||
#endif //POWER | |||
#endif //ZARCH | |||
#endif //(ARCH_LOONGARCH64) | |||
#endif //(ARCH_MIPS64) | |||
#endif //(ARCH_ARM64) |
@@ -2857,26 +2857,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ZGEMM_DEFAULT_UNROLL_M 1 | |||
#define XGEMM_DEFAULT_UNROLL_M 1 | |||
#define SGEMM_DEFAULT_P sgemm_p | |||
#define SGEMM_DEFAULT_P 512 | |||
#define DGEMM_DEFAULT_P 32 | |||
#define QGEMM_DEFAULT_P qgemm_p | |||
#define CGEMM_DEFAULT_P cgemm_p | |||
#define ZGEMM_DEFAULT_P zgemm_p | |||
#define XGEMM_DEFAULT_P xgemm_p | |||
#define CGEMM_DEFAULT_P 128 | |||
#define ZGEMM_DEFAULT_P 128 | |||
#define SGEMM_DEFAULT_R sgemm_r | |||
#define SGEMM_DEFAULT_R 12288 | |||
#define DGEMM_DEFAULT_R 858 | |||
#define QGEMM_DEFAULT_R qgemm_r | |||
#define CGEMM_DEFAULT_R cgemm_r | |||
#define ZGEMM_DEFAULT_R zgemm_r | |||
#define XGEMM_DEFAULT_R xgemm_r | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#define SGEMM_DEFAULT_Q 128 | |||
#define DGEMM_DEFAULT_Q 152 | |||
#define QGEMM_DEFAULT_Q 128 | |||
#define CGEMM_DEFAULT_Q 128 | |||
#define ZGEMM_DEFAULT_Q 128 | |||
#define XGEMM_DEFAULT_Q 128 | |||
#define SYMV_P 16 | |||
#endif | |||
@@ -3795,6 +3789,21 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout | |||
#define DGEMM_DEFAULT_R 8192 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#elif defined(ARCH_LOONGARCH64) | |||
#define SGEMM_DEFAULT_P 128 | |||
#define DGEMM_DEFAULT_P 128 | |||
#define CGEMM_DEFAULT_P 96 | |||
#define ZGEMM_DEFAULT_P 64 | |||
#define SGEMM_DEFAULT_Q 240 | |||
#define DGEMM_DEFAULT_Q 120 | |||
#define CGEMM_DEFAULT_Q 120 | |||
#define ZGEMM_DEFAULT_Q 120 | |||
#define SGEMM_DEFAULT_R 12288 | |||
#define DGEMM_DEFAULT_R 8192 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#else | |||
#define SGEMM_DEFAULT_P sgemm_p | |||
#define DGEMM_DEFAULT_P dgemm_p | |||