Fix SBGEMM BFLOAT16 buildpull/5364/head
@@ -88,13 +88,14 @@ jobs: | |||||
run: | | run: | | ||||
case "${{ matrix.build }}" in | case "${{ matrix.build }}" in | ||||
"make") | "make") | ||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" | |||||
make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" | |||||
;; | ;; | ||||
"cmake") | "cmake") | ||||
mkdir build && cd build | mkdir build && cd build | ||||
cmake -DDYNAMIC_ARCH=1 \ | cmake -DDYNAMIC_ARCH=1 \ | ||||
-DNOFORTRAN=0 \ | -DNOFORTRAN=0 \ | ||||
-DBUILD_WITHOUT_LAPACK=0 \ | -DBUILD_WITHOUT_LAPACK=0 \ | ||||
-DBUILD_BFLOAT16=1 \ | |||||
-DCMAKE_VERBOSE_MAKEFILE=ON \ | -DCMAKE_VERBOSE_MAKEFILE=ON \ | ||||
-DCMAKE_BUILD_TYPE=Release \ | -DCMAKE_BUILD_TYPE=Release \ | ||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | ||||
@@ -211,14 +211,14 @@ endif () | |||||
if (${CORE} STREQUAL NEOVERSEV1) | if (${CORE} STREQUAL NEOVERSEV1) | ||||
if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) | if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) | ||||
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1") | |||||
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") | |||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE) | elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE) | ||||
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v1") | set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v1") | ||||
else () | else () | ||||
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) | if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) | ||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1") | |||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") | |||||
else () | else () | ||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve") | |||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve+bf16") | |||||
endif() | endif() | ||||
endif() | endif() | ||||
endif () | endif () | ||||
@@ -291,10 +291,10 @@ if (DEFINED TARGET) | |||||
if (${TARGET} STREQUAL NEOVERSEV1) | if (${TARGET} STREQUAL NEOVERSEV1) | ||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) | if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) | ||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1") | |||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") | |||||
else () | else () | ||||
if (CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.4 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.4) | if (CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.4 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.4) | ||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1") | |||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") | |||||
else () | else () | ||||
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} does not support Neoverse V1.") | message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} does not support Neoverse V1.") | ||||
endif() | endif() | ||||
@@ -1,5 +1,6 @@ | |||||
/*********************************************************************/ | /*********************************************************************/ | ||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
/* Copyright 2025 The OpenBLAS Project. */ | |||||
/* All rights reserved. */ | /* All rights reserved. */ | ||||
/* */ | /* */ | ||||
/* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
@@ -305,7 +306,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
} | } | ||||
BLASLONG pad_min_l = min_l; | BLASLONG pad_min_l = min_l; | ||||
#if defined(HALF) | |||||
#if defined(BFLOAT16) | |||||
#if defined(DYNAMIC_ARCH) | #if defined(DYNAMIC_ARCH) | ||||
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | ||||
#else | #else | ||||
@@ -1,6 +1,6 @@ | |||||
/*********************************************************************/ | /*********************************************************************/ | ||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
/* Copyright 2023 The OpenBLAS Project. */ | |||||
/* Copyright 2023, 2025 The OpenBLAS Project. */ | |||||
/* All rights reserved. */ | /* All rights reserved. */ | ||||
/* */ | /* */ | ||||
/* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
@@ -324,7 +324,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
BLASLONG pad_min_l = min_l; | BLASLONG pad_min_l = min_l; | ||||
#if defined(HALF) | |||||
#if defined(BFLOAT16) | |||||
#if defined(DYNAMIC_ARCH) | #if defined(DYNAMIC_ARCH) | ||||
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); | ||||
#else | #else | ||||
@@ -1,5 +1,5 @@ | |||||
/***************************************************************************** | /***************************************************************************** | ||||
Copyright (c) 2011-2014, The OpenBLAS Project | |||||
Copyright (c) 2011-2014, 2025 The OpenBLAS Project | |||||
All rights reserved. | All rights reserved. | ||||
Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
@@ -1476,7 +1476,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | "-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | ||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | ||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \ | "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \ | ||||
"-march=armv8.4-a+sve -mtune=neoverse-v1" | |||||
"-march=armv8.4-a+sve+bf16 -mtune=neoverse-v1" | |||||
#define LIBNAME "neoversev1" | #define LIBNAME "neoversev1" | ||||
#define CORENAME "NEOVERSEV1" | #define CORENAME "NEOVERSEV1" | ||||
#endif | #endif | ||||
@@ -52,7 +52,7 @@ GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" fa | |||||
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) | GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) | ||||
foreach (float_type ${FLOAT_TYPES}) | foreach (float_type ${FLOAT_TYPES}) | ||||
if (${float_type} STREQUAL "HALF") | |||||
if (${float_type} STREQUAL "BFLOAT16") | |||||
continue() | continue() | ||||
endif() | endif() | ||||
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type}) | GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type}) | ||||
@@ -1,5 +1,6 @@ | |||||
/*********************************************************************/ | /*********************************************************************/ | ||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | /* Copyright 2009, 2010 The University of Texas at Austin. */ | ||||
/* Copyright 2025 The OpenBLAS Project. */ | |||||
/* All rights reserved. */ | /* All rights reserved. */ | ||||
/* */ | /* */ | ||||
/* Redistribution and use in source and binary forms, with or */ | /* Redistribution and use in source and binary forms, with or */ | ||||
@@ -405,7 +406,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){ | |||||
#elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
mode = BLAS_DOUBLE | BLAS_REAL; | mode = BLAS_DOUBLE | BLAS_REAL; | ||||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; | mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; | ||||
#elif defined(HALF) | |||||
#elif defined(BFLOAT16) | |||||
mode = BLAS_HALF | BLAS_REAL; | mode = BLAS_HALF | BLAS_REAL; | ||||
mask = MAX(SBGEMM_UNROLL_M, SBGEMM_UNROLL_N) - 1; | mask = MAX(SBGEMM_UNROLL_M, SBGEMM_UNROLL_N) - 1; | ||||
#else | #else | ||||