Browse Source

Merge pull request #5362 from Mousius/fix-bf16

Fix SBGEMM BFLOAT16 build
pull/5364/head
Martin Kroeker GitHub 2 months ago
parent
commit
3d31887073
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
8 changed files with 16 additions and 13 deletions
  1. +2
    -1
      .github/workflows/arm64_graviton.yml
  2. +3
    -3
      cmake/cc.cmake
  3. +2
    -2
      cmake/system.cmake
  4. +2
    -1
      driver/level3/level3.c
  5. +2
    -2
      driver/level3/level3_thread.c
  6. +2
    -2
      getarch.c
  7. +1
    -1
      lapack/CMakeLists.txt
  8. +2
    -1
      lapack/potrf/potrf_parallel.c

+ 2
- 1
.github/workflows/arm64_graviton.yml View File

@@ -88,13 +88,14 @@ jobs:
run: |
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DBUILD_BFLOAT16=1 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \


+ 3
- 3
cmake/cc.cmake View File

@@ -211,14 +211,14 @@ endif ()
if (${CORE} STREQUAL NEOVERSEV1)
if (NOT DYNAMIC_ARCH)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v1")
else ()
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve+bf16")
endif()
endif()
endif ()


+ 2
- 2
cmake/system.cmake View File

@@ -291,10 +291,10 @@ if (DEFINED TARGET)

if (${TARGET} STREQUAL NEOVERSEV1)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
else ()
if (CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.4 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
else ()
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} does not support Neoverse V1.")
endif()


+ 2
- 1
driver/level3/level3.c View File

@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@@ -305,7 +306,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}

BLASLONG pad_min_l = min_l;
#if defined(HALF)
#if defined(BFLOAT16)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else


+ 2
- 2
driver/level3/level3_thread.c View File

@@ -1,6 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* Copyright 2023, 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@@ -324,7 +324,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
BLASLONG pad_min_l = min_l;

#if defined(HALF)
#if defined(BFLOAT16)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else


+ 2
- 2
getarch.c View File

@@ -1,5 +1,5 @@
/*****************************************************************************
Copyright (c) 2011-2014, The OpenBLAS Project
Copyright (c) 2011-2014, 2025 The OpenBLAS Project
All rights reserved.

Redistribution and use in source and binary forms, with or without
@@ -1476,7 +1476,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \
"-march=armv8.4-a+sve -mtune=neoverse-v1"
"-march=armv8.4-a+sve+bf16 -mtune=neoverse-v1"
#define LIBNAME "neoversev1"
#define CORENAME "NEOVERSEV1"
#endif


+ 1
- 1
lapack/CMakeLists.txt View File

@@ -52,7 +52,7 @@ GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" fa
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3)

foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "HALF")
if (${float_type} STREQUAL "BFLOAT16")
continue()
endif()
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type})


+ 2
- 1
lapack/potrf/potrf_parallel.c View File

@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@@ -405,7 +406,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
#elif defined(DOUBLE)
mode = BLAS_DOUBLE | BLAS_REAL;
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1;
#elif defined(HALF)
#elif defined(BFLOAT16)
mode = BLAS_HALF | BLAS_REAL;
mask = MAX(SBGEMM_UNROLL_M, SBGEMM_UNROLL_N) - 1;
#else


Loading…
Cancel
Save