Browse Source

Fix ARMV9SME target in DYNAMIC_ARCH and add SME query code for MacOS (#5222)

* Fix ARMV9SME target and add support_sme1 code for MacOS
* make sgemm_direct unconditionally available on all arm64
* build a (dummy) sgemm_direct kernel on all arm64





* Update dynamic_arm64.c
tags/v0.3.30
Martin Kroeker GitHub 4 months ago
parent
commit
5141a90993
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
6 changed files with 44 additions and 18 deletions
  1. +0
    -2
      common_param.h
  2. +31
    -10
      driver/others/dynamic_arm64.c
  3. +3
    -1
      kernel/CMakeLists.txt
  4. +4
    -3
      kernel/Makefile.L3
  5. +6
    -0
      kernel/arm64/sgemm_direct_arm64_sme1.c
  6. +0
    -2
      kernel/setparam-ref.c

+ 0
- 2
common_param.h View File

@@ -224,10 +224,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
#endif
#ifdef ARCH_ARM64
#ifdef HAVE_SME
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
#endif
#endif

int (*sgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);


+ 31
- 10
driver/others/dynamic_arm64.c View File

@@ -43,6 +43,14 @@
#include <sys/auxv.h>
#endif

#ifdef __APPLE__
#include <sys/sysctl.h>
int32_t value;
size_t length=sizeof(value);
int64_t value64;
size_t length64=sizeof(value64);
#endif

extern gotoblas_t gotoblas_ARMV8;
#ifdef DYNAMIC_LIST
#ifdef DYN_CORTEXA53
@@ -120,7 +128,7 @@ extern gotoblas_t gotoblas_ARMV9SME;
#else
#define gotoblas_ARMV9SME gotoblas_ARMV8
#endif
#ifdef DYN_CORTEX_A55
#ifdef DYN_CORTEXA55
extern gotoblas_t gotoblas_CORTEXA55;
#else
#define gotoblas_CORTEXA55 gotoblas_ARMV8
@@ -147,17 +155,17 @@ extern gotoblas_t gotoblas_NEOVERSEV1;
extern gotoblas_t gotoblas_NEOVERSEN2;
extern gotoblas_t gotoblas_ARMV8SVE;
extern gotoblas_t gotoblas_A64FX;
#ifndef NO_SME
extern gotoblas_t gotoblas_ARMV9SME;
#else
#define gotoblas_ARMV9SME gotoblas_ARMV8SVE
#endif
#else
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
#define gotoblas_ARMV8SVE gotoblas_ARMV8
#define gotoblas_A64FX gotoblas_ARMV8
#endif

#ifndef NO_SME
extern gotoblas_t gotoblas_ARMV9SME;
#else
#define gotoblas_ARMV9SME gotoblas_ARMV8SVE
#define gotoblas_ARMV9SME gotoblas_ARMV8
#endif

extern gotoblas_t gotoblas_THUNDERX3T110;
@@ -168,7 +176,7 @@ extern void openblas_warning(int verbose, const char * msg);
#define FALLBACK_VERBOSE 1
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"

#define NUM_CORETYPES 18
#define NUM_CORETYPES 19

/*
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -207,6 +215,7 @@ static char *corename[] = {
"cortexa55",
"armv8sve",
"a64fx",
"armv9sme",
"unknown"
};

@@ -229,6 +238,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
if (gotoblas == &gotoblas_A64FX) return corename[17];
if (gotoblas == &gotoblas_ARMV9SME) return corename[18];
return corename[NUM_CORETYPES];
}

@@ -266,6 +276,7 @@ static gotoblas_t *force_coretype(char *coretype) {
case 15: return (&gotoblas_CORTEXA55);
case 16: return (&gotoblas_ARMV8SVE);
case 17: return (&gotoblas_A64FX);
case 18: return (&gotoblas_ARMV9SME);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
@@ -277,6 +288,11 @@ static gotoblas_t *get_coretype(void) {
char coremsg[128];

#if defined (OS_DARWIN)
//future #if !defined(NO_SME)
// if (support_sme1()) {
// return &gotoblas_ARMV9SME;
// }
// #endif
return &gotoblas_NEOVERSEN1;
#endif
@@ -439,6 +455,7 @@ static gotoblas_t *get_coretype(void) {
}
break;
case 0x61: // Apple
//future if (support_sme1()) return &gotoblas_ARMV9SME;
return &gotoblas_NEOVERSEN1;
break;
default:
@@ -446,8 +463,8 @@ static gotoblas_t *get_coretype(void) {
openblas_warning(1, coremsg);
}

#if !defined(NO_SME) && defined(HWCAP2_SME)
if ((getauxval(AT_HWCAP2) & HWCAP2_SME)) {
#if !defined(NO_SME)
if (support_sme1()) {
return &gotoblas_ARMV9SME;
}
#endif
@@ -511,6 +528,10 @@ int support_sme1(void) {
if(getauxval(AT_HWCAP2) & HWCAP2_SME){
ret = 1;
}
#endif
#if defined(__APPLE__)
sysctlbyname("hw.optional.arm.FEAT_SME",&value64,&length64,NULL,0);
ret = value64;
#endif
return ret;
}

+ 3
- 1
kernel/CMakeLists.txt View File

@@ -208,7 +208,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
set(USE_TRMM true)
endif ()
set(USE_DIRECT_SGEMM false)
if (X86_64 OR (ARM64 AND (UC_TARGET_CORE MATCHES ARMV9SME)))
if (X86_64 OR ARM64)
set(USE_DIRECT_SGEMM true)
endif()

@@ -225,9 +225,11 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
set (SGEMMDIRECTSMEKERNEL sgemm_direct_sme1.S)
set (SGEMMDIRECTPREKERNEL sgemm_direct_sme1_preprocess.S)
GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE)
if (HAVE_SME)
GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTSMEKERNEL}" "" "gemm_direct_sme1" false "" "" false SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPREKERNEL}" "" "gemm_direct_sme1_preprocess" false "" "" false SINGLE)
endif ()
endif ()
endif()

foreach (float_type SINGLE DOUBLE)


+ 4
- 3
kernel/Makefile.L3 View File

@@ -103,8 +103,8 @@ endif
ifeq ($(ARCH), arm64)
ifeq ($(TARGET_CORE), ARMV9SME)
HAVE_SME = 1
SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c
endif
SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c
endif
endif
endif
@@ -143,9 +143,10 @@ SKERNELOBJS += \
sgemm_direct_performant$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(ARCH), arm64)
SKERNELOBJS += \
sgemm_direct$(TSUFFIX).$(SUFFIX)
ifdef HAVE_SME
SKERNELOBJS += \
sgemm_direct$(TSUFFIX).$(SUFFIX) \
sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) \
sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX)
endif
@@ -835,9 +836,9 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL)
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
endif
ifeq ($(ARCH), arm64)
ifdef HAVE_SME
$(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL)
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
ifdef HAVE_SME
$(KDIR)sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) :
$(CC) $(CFLAGS) -c $(KERNELDIR)/sgemm_direct_sme1.S -UDOUBLE -UCOMPLEX -o $@
$(KDIR)sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX) :


+ 6
- 0
kernel/arm64/sgemm_direct_arm64_sme1.c View File

@@ -71,4 +71,10 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\
free(A_mod);
}
#else
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\
BLASLONG strideA, float * __restrict B, BLASLONG strideB ,\
float * __restrict R, BLASLONG strideR){}
#endif

+ 0
- 2
kernel/setparam-ref.c View File

@@ -180,9 +180,7 @@ gotoblas_t TABLE_NAME = {
sgemm_direct_performantTS,
#endif
#ifdef ARCH_ARM64
#ifdef HAVE_SME
sgemm_directTS,
#endif
#endif

sgemm_kernelTS, sgemm_betaTS,


Loading…
Cancel
Save