Add DYNAMIC_ARCH support for ARM64tags/v0.3.4
@@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT) | |||
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)' | |||
endif | |||
ifeq ($(ARCH), arm64) | |||
DYNAMIC_CORE = ARMV8 | |||
DYNAMIC_CORE += CORTEXA57 | |||
DYNAMIC_CORE += THUNDERX | |||
DYNAMIC_CORE += THUNDERX2T99 | |||
endif | |||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | |||
ifndef DYNAMIC_CORE | |||
override DYNAMIC_ARCH= | |||
@@ -237,7 +237,6 @@ void get_cpuconfig(void) | |||
break; | |||
case CPU_THUNDERX: | |||
printf("#define ARMV8\n"); | |||
printf("#define THUNDERX\n"); | |||
printf("#define L1_DATA_SIZE 32768\n"); | |||
printf("#define L1_DATA_LINESIZE 128\n"); | |||
@@ -15,7 +15,11 @@ endif | |||
# COMMONOBJS += info.$(SUFFIX) | |||
ifeq ($(DYNAMIC_ARCH), 1) | |||
ifeq ($(ARCH),arm64) | |||
COMMONOBJS += dynamic_arm64.$(SUFFIX) | |||
else | |||
COMMONOBJS += dynamic.$(SUFFIX) | |||
endif | |||
else | |||
COMMONOBJS += parameter.$(SUFFIX) | |||
endif | |||
@@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c | |||
endif | |||
ifeq ($(DYNAMIC_ARCH), 1) | |||
ifeq ($(ARCH),arm64) | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX) | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||
endif | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||
endif | |||
@@ -0,0 +1,198 @@ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#include "common.h" | |||
#include <asm/hwcap.h> | |||
#include <sys/auxv.h> | |||
extern gotoblas_t gotoblas_ARMV8; | |||
extern gotoblas_t gotoblas_CORTEXA57; | |||
extern gotoblas_t gotoblas_THUNDERX; | |||
extern gotoblas_t gotoblas_THUNDERX2T99; | |||
extern void openblas_warning(int verbose, const char * msg); | |||
#define NUM_CORETYPES 4 | |||
/* | |||
* In case asm/hwcap.h is outdated on the build system, make sure | |||
* that HWCAP_CPUID is defined | |||
*/ | |||
#ifndef HWCAP_CPUID | |||
#define HWCAP_CPUID (1 << 11) | |||
#endif | |||
#define get_cpu_ftr(id, var) ({ \ | |||
asm("mrs %0, "#id : "=r" (var)); \ | |||
}) | |||
static char *corename[] = { | |||
"armv8", | |||
"cortexa57", | |||
"thunderx", | |||
"thunderx2t99", | |||
"unknown" | |||
}; | |||
char *gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_ARMV8) return corename[ 0]; | |||
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1]; | |||
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2]; | |||
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3]; | |||
return corename[NUM_CORETYPES]; | |||
} | |||
static gotoblas_t *force_coretype(char *coretype) { | |||
int i ; | |||
int found = -1; | |||
char message[128]; | |||
for ( i=0 ; i < NUM_CORETYPES; i++) | |||
{ | |||
if (!strncasecmp(coretype, corename[i], 20)) | |||
{ | |||
found = i; | |||
break; | |||
} | |||
} | |||
switch (found) | |||
{ | |||
case 0: return (&gotoblas_ARMV8); | |||
case 1: return (&gotoblas_CORTEXA57); | |||
case 2: return (&gotoblas_THUNDERX); | |||
case 3: return (&gotoblas_THUNDERX2T99); | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
openblas_warning(1, message); | |||
return NULL; | |||
} | |||
static gotoblas_t *get_coretype(void) { | |||
int implementer, variant, part, arch, revision, midr_el1; | |||
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { | |||
char coremsg[128]; | |||
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n"); | |||
openblas_warning(1, coremsg); | |||
return NULL; | |||
} | |||
get_cpu_ftr(MIDR_EL1, midr_el1); | |||
/* | |||
* MIDR_EL1 | |||
* | |||
* 31 24 23 20 19 16 15 4 3 0 | |||
* ----------------------------------------------------------------- | |||
* | Implementer | Variant | Architecture | Part Number | Revision | | |||
* ----------------------------------------------------------------- | |||
*/ | |||
implementer = (midr_el1 >> 24) & 0xFF; | |||
part = (midr_el1 >> 4) & 0xFFF; | |||
switch(implementer) | |||
{ | |||
case 0x41: // ARM | |||
switch (part) | |||
{ | |||
case 0xd07: // Cortex A57 | |||
case 0xd08: // Cortex A72 | |||
case 0xd03: // Cortex A53 | |||
return &gotoblas_CORTEXA57; | |||
} | |||
break; | |||
case 0x42: // Broadcom | |||
switch (part) | |||
{ | |||
case 0x516: // Vulcan | |||
return &gotoblas_THUNDERX2T99; | |||
} | |||
break; | |||
case 0x43: // Cavium | |||
switch (part) | |||
{ | |||
case 0x0a1: // ThunderX | |||
return &gotoblas_THUNDERX; | |||
case 0x0af: // ThunderX2 | |||
return &gotoblas_THUNDERX2T99; | |||
} | |||
break; | |||
} | |||
return NULL; | |||
} | |||
void gotoblas_dynamic_init(void) { | |||
char coremsg[128]; | |||
char coren[22]; | |||
char *p; | |||
if (gotoblas) return; | |||
p = getenv("OPENBLAS_CORETYPE"); | |||
if ( p ) | |||
{ | |||
gotoblas = force_coretype(p); | |||
} | |||
else | |||
{ | |||
gotoblas = get_coretype(); | |||
} | |||
if (gotoblas == NULL) | |||
{ | |||
snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n"); | |||
openblas_warning(1, coremsg); | |||
gotoblas = &gotoblas_ARMV8; | |||
} | |||
if (gotoblas && gotoblas->init) { | |||
strncpy(coren, gotoblas_corename(), 20); | |||
sprintf(coremsg, "Core: %s\n", coren); | |||
openblas_warning(2, coremsg); | |||
gotoblas -> init(); | |||
} else { | |||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||
exit(1); | |||
} | |||
} | |||
void gotoblas_dynamic_quit(void) { | |||
gotoblas = NULL; | |||
} |
@@ -730,35 +730,8 @@ void blas_set_parameter(void){ | |||
#if defined(ARCH_ARM64) | |||
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8) | |||
unsigned long dgemm_prefetch_size_a; | |||
unsigned long dgemm_prefetch_size_b; | |||
unsigned long dgemm_prefetch_size_c; | |||
#endif | |||
void blas_set_parameter(void) | |||
{ | |||
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8) | |||
dgemm_p = 160; | |||
dgemm_q = 128; | |||
dgemm_r = 4096; | |||
sgemm_p = 128; | |||
sgemm_q = 352; | |||
sgemm_r = 4096; | |||
cgemm_p = 128; | |||
cgemm_q = 224; | |||
cgemm_r = 4096; | |||
zgemm_p = 128; | |||
zgemm_q = 112; | |||
zgemm_r = 4096; | |||
dgemm_prefetch_size_a = 3584; | |||
dgemm_prefetch_size_b = 512; | |||
dgemm_prefetch_size_c = 128; | |||
#endif | |||
} | |||
#endif |
@@ -88,7 +88,11 @@ lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) | |||
$(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) | |||
setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | |||
ifeq ($(USE_GEMM3M), 1) | |||
$(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@ | |||
else | |||
$(CC) -c $(CFLAGS) $< -o $@ | |||
endif | |||
setparam$(TSUFFIX).c : setparam-ref.c | |||
sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | |||
@@ -113,13 +113,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
@@ -134,8 +134,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
endif | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(DGEMM_UNROLL_N), 4) | |||
@@ -146,34 +146,34 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
endif | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | |||
DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | |||
@@ -201,25 +201,25 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
endif |
@@ -111,13 +111,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
@@ -132,8 +132,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
endif | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(DGEMM_UNROLL_N), 4) | |||
@@ -144,32 +144,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
endif | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
@@ -89,26 +89,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = sgemm_kernel_4x4.S | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
@@ -74,13 +74,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||
@@ -94,8 +94,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||
endif | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(DGEMM_UNROLL_N), 4) | |||
@@ -106,32 +106,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||
endif | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||
ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
SASUMKERNEL = sasum_thunderx2t99.c | |||
DASUMKERNEL = dasum_thunderx2t99.c | |||
@@ -1,135 +0,0 @@ | |||
SAMAXKERNEL = amax.S | |||
DAMAXKERNEL = amax.S | |||
CAMAXKERNEL = zamax.S | |||
ZAMAXKERNEL = zamax.S | |||
SAMINKERNEL = ../arm/amin.c | |||
DAMINKERNEL = ../arm/amin.c | |||
CAMINKERNEL = ../arm/zamin.c | |||
ZAMINKERNEL = ../arm/zamin.c | |||
SMAXKERNEL = ../arm/max.c | |||
DMAXKERNEL = ../arm/max.c | |||
SMINKERNEL = ../arm/min.c | |||
DMINKERNEL = ../arm/min.c | |||
ISAMAXKERNEL = iamax.S | |||
IDAMAXKERNEL = iamax.S | |||
ICAMAXKERNEL = izamax.S | |||
IZAMAXKERNEL = izamax.S | |||
ISAMINKERNEL = ../arm/iamin.c | |||
IDAMINKERNEL = ../arm/iamin.c | |||
ICAMINKERNEL = ../arm/izamin.c | |||
IZAMINKERNEL = ../arm/izamin.c | |||
ISMAXKERNEL = ../arm/imax.c | |||
IDMAXKERNEL = ../arm/imax.c | |||
ISMINKERNEL = ../arm/imin.c | |||
IDMINKERNEL = ../arm/imin.c | |||
SASUMKERNEL = asum.S | |||
DASUMKERNEL = asum.S | |||
CASUMKERNEL = casum.S | |||
ZASUMKERNEL = zasum.S | |||
SAXPYKERNEL = axpy.S | |||
DAXPYKERNEL = axpy.S | |||
CAXPYKERNEL = zaxpy.S | |||
ZAXPYKERNEL = zaxpy.S | |||
SCOPYKERNEL = copy.S | |||
DCOPYKERNEL = copy.S | |||
CCOPYKERNEL = copy.S | |||
ZCOPYKERNEL = copy.S | |||
SDOTKERNEL = dot.S | |||
DDOTKERNEL = dot.S | |||
CDOTKERNEL = zdot.S | |||
ZDOTKERNEL = zdot.S | |||
DSDOTKERNEL = dot.S | |||
SNRM2KERNEL = nrm2.S | |||
DNRM2KERNEL = nrm2.S | |||
CNRM2KERNEL = znrm2.S | |||
ZNRM2KERNEL = znrm2.S | |||
SROTKERNEL = rot.S | |||
DROTKERNEL = rot.S | |||
CROTKERNEL = zrot.S | |||
ZROTKERNEL = zrot.S | |||
SSCALKERNEL = scal.S | |||
DSCALKERNEL = scal.S | |||
CSCALKERNEL = zscal.S | |||
ZSCALKERNEL = zscal.S | |||
SSWAPKERNEL = swap.S | |||
DSWAPKERNEL = swap.S | |||
CSWAPKERNEL = swap.S | |||
ZSWAPKERNEL = swap.S | |||
SGEMVNKERNEL = gemv_n.S | |||
DGEMVNKERNEL = gemv_n.S | |||
CGEMVNKERNEL = zgemv_n.S | |||
ZGEMVNKERNEL = zgemv_n.S | |||
SGEMVTKERNEL = gemv_t.S | |||
DGEMVTKERNEL = gemv_t.S | |||
CGEMVTKERNEL = zgemv_t.S | |||
ZGEMVTKERNEL = zgemv_t.S | |||
STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = sgemm_kernel_4x4.S | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
@@ -943,13 +943,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
prfm PLDL1KEEP, [origPB] | |||
prfm PLDL1KEEP, [origPA] | |||
ldr A_PRE_SIZE, =dgemm_prefetch_size_a | |||
ldr A_PRE_SIZE, [A_PRE_SIZE] | |||
ldr B_PRE_SIZE, =dgemm_prefetch_size_b | |||
ldr B_PRE_SIZE, [B_PRE_SIZE] | |||
ldr C_PRE_SIZE, =dgemm_prefetch_size_c | |||
ldr C_PRE_SIZE, [C_PRE_SIZE] | |||
mov A_PRE_SIZE, #3584 | |||
mov B_PRE_SIZE, #512 | |||
mov C_PRE_SIZE, #128 | |||
add A_PRE_SIZE_64, A_PRE_SIZE, #64 | |||
add B_PRE_SIZE_64, B_PRE_SIZE, #64 | |||
@@ -294,6 +294,8 @@ gotoblas_t TABLE_NAME = { | |||
chemm_outcopyTS, chemm_oltcopyTS, | |||
0, 0, 0, | |||
#if defined(USE_GEMM3M) | |||
#ifdef CGEMM3M_DEFAULT_UNROLL_M | |||
CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), | |||
#else | |||
@@ -324,6 +326,33 @@ gotoblas_t TABLE_NAME = { | |||
chemm3m_oucopybTS, chemm3m_olcopybTS, | |||
chemm3m_oucopyrTS, chemm3m_olcopyrTS, | |||
chemm3m_oucopyiTS, chemm3m_olcopyiTS, | |||
#else | |||
0, 0, 0, | |||
NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
#endif | |||
#ifndef NO_LAPACK | |||
cneg_tcopyTS, claswp_ncopyTS, | |||
@@ -400,6 +429,7 @@ gotoblas_t TABLE_NAME = { | |||
zhemm_outcopyTS, zhemm_oltcopyTS, | |||
0, 0, 0, | |||
#if defined(USE_GEMM3M) | |||
#ifdef ZGEMM3M_DEFAULT_UNROLL_M | |||
ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), | |||
#else | |||
@@ -430,6 +460,33 @@ gotoblas_t TABLE_NAME = { | |||
zhemm3m_oucopybTS, zhemm3m_olcopybTS, | |||
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, | |||
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, | |||
#else | |||
0, 0, 0, | |||
NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
#endif | |||
#ifndef NO_LAPACK | |||
zneg_tcopyTS, zlaswp_ncopyTS, | |||
@@ -503,6 +560,7 @@ gotoblas_t TABLE_NAME = { | |||
xhemm_outcopyTS, xhemm_oltcopyTS, | |||
0, 0, 0, | |||
#if defined(USE_GEMM3M) | |||
QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), | |||
xgemm3m_kernelTS, | |||
@@ -528,6 +586,33 @@ gotoblas_t TABLE_NAME = { | |||
xhemm3m_oucopybTS, xhemm3m_olcopybTS, | |||
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, | |||
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, | |||
#else | |||
0, 0, 0, | |||
NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
NULL, NULL, | |||
#endif | |||
#ifndef NO_LAPACK | |||
xneg_tcopyTS, xlaswp_ncopyTS, | |||
@@ -561,6 +646,78 @@ gotoblas_t TABLE_NAME = { | |||
}; | |||
#if defined(ARCH_ARM64) | |||
static void init_parameter(void) { | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; | |||
TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q; | |||
TABLE_NAME.qgemm_r = QGEMM_DEFAULT_R; | |||
TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R; | |||
#endif | |||
#if defined(USE_GEMM3M) | |||
#ifdef CGEMM3M_DEFAULT_P | |||
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; | |||
#else | |||
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; | |||
#endif | |||
#ifdef ZGEMM3M_DEFAULT_P | |||
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; | |||
#else | |||
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; | |||
#endif | |||
#ifdef CGEMM3M_DEFAULT_Q | |||
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; | |||
#else | |||
TABLE_NAME.cgemm3m_q = TABLE_NAME.sgemm_q; | |||
#endif | |||
#ifdef ZGEMM3M_DEFAULT_Q | |||
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; | |||
#else | |||
TABLE_NAME.zgemm3m_q = TABLE_NAME.dgemm_q; | |||
#endif | |||
#ifdef CGEMM3M_DEFAULT_R | |||
TABLE_NAME.cgemm3m_r = CGEMM3M_DEFAULT_R; | |||
#else | |||
TABLE_NAME.cgemm3m_r = TABLE_NAME.sgemm_r; | |||
#endif | |||
#ifdef ZGEMM3M_DEFAULT_R | |||
TABLE_NAME.zgemm3m_r = ZGEMM3M_DEFAULT_R; | |||
#else | |||
TABLE_NAME.zgemm3m_r = TABLE_NAME.dgemm_r; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; | |||
TABLE_NAME.xgemm3m_q = TABLE_NAME.qgemm_q; | |||
TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r; | |||
#endif | |||
#endif | |||
} | |||
#else // defined(ARCH_ARM64) | |||
#ifdef ARCH_X86 | |||
static int get_l2_size_old(void){ | |||
int i, eax, ebx, ecx, edx, cpuid_level; | |||
@@ -1146,3 +1303,4 @@ static void init_parameter(void) { | |||
} | |||
#endif //defined(ARCH_ARM64) |
@@ -2641,20 +2641,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ZGEMM_DEFAULT_UNROLL_M 4 | |||
#define ZGEMM_DEFAULT_UNROLL_N 4 | |||
#define SGEMM_DEFAULT_P sgemm_p | |||
#define DGEMM_DEFAULT_P dgemm_p | |||
#define CGEMM_DEFAULT_P cgemm_p | |||
#define ZGEMM_DEFAULT_P zgemm_p | |||
#define SGEMM_DEFAULT_P 128 | |||
#define DGEMM_DEFAULT_P 160 | |||
#define CGEMM_DEFAULT_P 128 | |||
#define ZGEMM_DEFAULT_P 128 | |||
#define SGEMM_DEFAULT_Q sgemm_q | |||
#define DGEMM_DEFAULT_Q dgemm_q | |||
#define CGEMM_DEFAULT_Q cgemm_q | |||
#define ZGEMM_DEFAULT_Q zgemm_q | |||
#define SGEMM_DEFAULT_Q 352 | |||
#define DGEMM_DEFAULT_Q 128 | |||
#define CGEMM_DEFAULT_Q 224 | |||
#define ZGEMM_DEFAULT_Q 112 | |||
#define SGEMM_DEFAULT_R sgemm_r | |||
#define DGEMM_DEFAULT_R dgemm_r | |||
#define CGEMM_DEFAULT_R cgemm_r | |||
#define ZGEMM_DEFAULT_R zgemm_r | |||
#define SGEMM_DEFAULT_R 4096 | |||
#define DGEMM_DEFAULT_R 4096 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#define SYMV_P 16 | |||
#endif | |||
@@ -2720,20 +2720,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ZGEMM_DEFAULT_UNROLL_M 4 | |||
#define ZGEMM_DEFAULT_UNROLL_N 4 | |||
#define SGEMM_DEFAULT_P sgemm_p | |||
#define DGEMM_DEFAULT_P dgemm_p | |||
#define CGEMM_DEFAULT_P cgemm_p | |||
#define ZGEMM_DEFAULT_P zgemm_p | |||
#define SGEMM_DEFAULT_P 128 | |||
#define DGEMM_DEFAULT_P 160 | |||
#define CGEMM_DEFAULT_P 128 | |||
#define ZGEMM_DEFAULT_P 128 | |||
#define SGEMM_DEFAULT_Q sgemm_q | |||
#define DGEMM_DEFAULT_Q dgemm_q | |||
#define CGEMM_DEFAULT_Q cgemm_q | |||
#define ZGEMM_DEFAULT_Q zgemm_q | |||
#define SGEMM_DEFAULT_Q 352 | |||
#define DGEMM_DEFAULT_Q 128 | |||
#define CGEMM_DEFAULT_Q 224 | |||
#define ZGEMM_DEFAULT_Q 112 | |||
#define SGEMM_DEFAULT_R sgemm_r | |||
#define DGEMM_DEFAULT_R dgemm_r | |||
#define CGEMM_DEFAULT_R cgemm_r | |||
#define ZGEMM_DEFAULT_R zgemm_r | |||
#define SGEMM_DEFAULT_R 4096 | |||
#define DGEMM_DEFAULT_R 4096 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#define SYMV_P 16 | |||
#endif | |||