Browse Source

Merge pull request #5396 from abhishek-iitmadras/abhishekk_bfloat16

ARM64: Enable bfloat16 kernels by default
pull/5406/head
Martin Kroeker GitHub 2 months ago
parent
commit
a5e7c0e3e0
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
7 changed files with 20 additions and 3 deletions
  1. +7
    -1
      .github/workflows/apple_m.yml
  2. +8
    -0
      .github/workflows/dynamic_arch.yml
  3. +1
    -0
      CONTRIBUTORS.md
  4. +1
    -2
      Makefile.system
  5. +1
    -0
      kernel/arm64/bgemm_kernel_4x4_neoversev1.c
  6. +1
    -0
      kernel/arm64/bgemm_kernel_4x4_neoversev1_impl.c
  7. +1
    -0
      kernel/arm64/bgemv_n_sve_v3x4.c

+ 7
- 1
.github/workflows/apple_m.yml View File

@@ -87,10 +87,16 @@ jobs:
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Add gfortran runtime to link path
if: matrix.build == 'make' && runner.os == 'macOS'
run: |
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
echo "LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L$GFORTRAN_LIBDIR" >> $GITHUB_ENV

- name: Build OpenBLAS
run: |
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
export CC="/opt/homebrew/opt/llvm/bin/clang"
case "${{ matrix.build }}" in


+ 8
- 0
.github/workflows/dynamic_arch.yml View File

@@ -89,6 +89,14 @@ jobs:
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Add gfortran runtime to link path
if: matrix.build == 'make' && runner.os == 'macOS'
run: |
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
# Preserve whatever LDFLAGS may already contain
echo "LDFLAGS=${LDFLAGS:+$LDFLAGS }-L$GFORTRAN_LIBDIR" >> "$GITHUB_ENV"

- name: Build OpenBLAS
run: |


+ 1
- 0
CONTRIBUTORS.md View File

@@ -255,6 +255,7 @@ In chronological order:

* Abhishek Kumar <https://github.com/abhishek-iitmadras>
* [2025-04-22] Optimise dot kernel for NEOVERSE V1
* [2025-07-23] ARM64-Enable bfloat16 kernels by default

* Sharif Inamdar <sharif.inamdar@arm.com>
* [2025-06-05] Optimize gemv_n_sve_v1x3 kernel


+ 1
- 2
Makefile.system View File

@@ -270,6 +270,7 @@ SMALL_MATRIX_OPT = 1
BUILD_BFLOAT16 = 1
else ifeq ($(ARCH), arm64)
SMALL_MATRIX_OPT = 1
BUILD_BFLOAT16 = 1
endif
ifeq ($(ARCH), loongarch64)
SMALL_MATRIX_OPT = 1
@@ -425,10 +426,8 @@ ifeq ($(OSNAME), Darwin)
ifndef MACOSX_DEPLOYMENT_TARGET
ifeq ($(ARCH), arm64)
export MACOSX_DEPLOYMENT_TARGET=11.0
ifeq ($(C_COMPILER), GCC)
export NO_SVE = 1
export NO_SME = 1
endif
else
export MACOSX_DEPLOYMENT_TARGET=10.8
endif


+ 1
- 0
kernel/arm64/bgemm_kernel_4x4_neoversev1.c View File

@@ -27,6 +27,7 @@
* *****************************************************************************/

#include <arm_sve.h>
#include <arm_neon.h>

#include "common.h"



+ 1
- 0
kernel/arm64/bgemm_kernel_4x4_neoversev1_impl.c View File

@@ -27,6 +27,7 @@
* *****************************************************************************/

#include <arm_sve.h>
#include <arm_neon.h>

#include "common.h"



+ 1
- 0
kernel/arm64/bgemv_n_sve_v3x4.c View File

@@ -28,6 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"

#include <arm_sve.h>
#include <arm_neon.h>

#define UPDATE_PTRSx2 \
a_ptr1 = a_ptr0 + lda;


Loading…
Cancel
Save