Browse Source

Merge pull request #5381 from Mousius/bgemv-infrastructure

Add infrastructure for BGEMV
pull/5386/head
Martin Kroeker GitHub 2 months ago
parent
commit
ac8cbfdd8e
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
34 changed files with 919 additions and 167 deletions
  1. +4
    -0
      .gitignore
  2. +1
    -0
      cblas.h
  3. +5
    -0
      cmake/kernel.cmake
  4. +1
    -1
      cmake/utils.cmake
  5. +9
    -0
      common_b.h
  6. +3
    -0
      common_interface.h
  7. +4
    -0
      common_level1.h
  8. +6
    -0
      common_level2.h
  9. +7
    -4
      common_macro.h
  10. +4
    -0
      common_param.h
  11. +35
    -0
      driver/level2/Makefile
  12. +10
    -9
      driver/level2/sbgemv_thread.c
  13. +29
    -1
      exports/gensymbol
  14. +29
    -1
      exports/gensymbol.pl
  15. +2
    -0
      interface/CMakeLists.txt
  16. +18
    -4
      interface/Makefile
  17. +2
    -2
      interface/sbgemmt.c
  18. +32
    -14
      interface/sbgemv.c
  19. +9
    -1
      interface/scal.c
  20. +3
    -0
      kernel/CMakeLists.txt
  21. +36
    -0
      kernel/Makefile.L1
  22. +43
    -0
      kernel/Makefile.L2
  23. +64
    -0
      kernel/generic/bf16_macros.h
  24. +1
    -32
      kernel/generic/gemmkernel_2x2.c
  25. +70
    -0
      kernel/generic/gemv_n.c
  26. +60
    -0
      kernel/generic/gemv_t.c
  27. +106
    -0
      kernel/generic/scal.c
  28. +2
    -2
      kernel/setparam-ref.c
  29. +33
    -5
      test/Makefile
  30. +2
    -10
      test/compare_sgemm_bgemm.c
  31. +0
    -80
      test/compare_sgemm_sbgemm.c
  32. +149
    -0
      test/compare_sgemv_bgemv.c
  33. +128
    -0
      test/compare_sgemv_sbgemv.c
  34. +12
    -1
      test/test_helpers.h

+ 4
- 0
.gitignore View File

@@ -81,7 +81,9 @@ test/ZBLAT2.SUMM
test/ZBLAT3.SUMM
test/ZBLAT3_3M.SUMM
test/SHBLAT3.SUMM
test/SBBLAT2.SUMM
test/SBBLAT3.SUMM
test/BBLAT2.SUMM
test/BBLAT3.SUMM
test/cblat1
test/cblat2
@@ -97,7 +99,9 @@ test/sblat3
test/sblat3_3m
test/test_shgemm
test/test_sbgemm
test/test_sbgemv
test/test_bgemm
test/test_bgemv
test/zblat1
test/zblat2
test/zblat3


+ 1
- 0
cblas.h View File

@@ -465,6 +465,7 @@ void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPEN
void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout);
/* convert BFLOAT16 array to double array */
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
void cblas_bgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 beta, bfloat16 *y, OPENBLAS_CONST blasint incy);
/* dot production of BFLOAT16 input arrays, and output as float */
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);


+ 5
- 0
cmake/kernel.cmake View File

@@ -110,6 +110,7 @@ macro(SetDefaultL1)
SetFallback(SROTMKERNEL rotm.S)
SetFallback(DROTMKERNEL rotm.S)
SetFallback(QROTMKERNEL rotm.S)
SetFallback(BSCALKERNEL ../generic/scal.c)
SetFallback(SSCALKERNEL scal.S)
SetFallback(DSCALKERNEL scal.S)
SetFallback(CSCALKERNEL zscal.S)
@@ -169,6 +170,8 @@ if (BUILD_BFLOAT16)
SetFallback(SHSWAPKERNEL ../arm/swap.c)
SetFallback(TOBF16KERNEL ../x86_64/tobf16.c)
SetFallback(BF16TOKERNEL ../x86_64/bf16to.c)
SetFallback(BGEMVNKERNEL ../generic/gemv_n.c)
SetFallback(BGEMVTKERNEL ../generic/gemv_t.c)
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
endif ()
@@ -221,6 +224,8 @@ macro(SetDefaultL2)
SetFallback(XHEMV_V_KERNEL ../generic/zhemv_k.c)
SetFallback(XHEMV_M_KERNEL ../generic/zhemv_k.c)
if (BUILD_BFLOAT16)
SetFallback(BGEMVNKERNEL ../generic/gemv_n.c)
SetFallback(BGEMVTKERNEL ../generic/gemv_t.c)
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
SetFallback(SHGERKERNEL ../generic/ger.c)


+ 1
- 1
cmake/utils.cmake View File

@@ -375,7 +375,7 @@ function(GenerateNamedObjects sources_in)
if (NOT no_float_type)
string(SUBSTRING ${float_type} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEMM")
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEM")
set (float_char "sb")
endif ()
endif ()


+ 9
- 0
common_b.h View File

@@ -30,6 +30,11 @@
#define COMMON_B_H

#ifndef DYNAMIC_ARCH
#define BGEMV_N_K bgemv_n
#define BGEMV_T_K bgemv_t

#define BSCAL_K bscal_k

#define BGEMM_ONCOPY bgemm_oncopy
#define BGEMM_OTCOPY bgemm_otcopy

@@ -45,6 +50,10 @@
#define BGEMM_KERNEL bgemm_kernel

#else
#define BGEMV_N_K gotoblas->bgemv_n
#define BGEMV_T_K gotoblas->bgemv_t

#define BSCAL_K gotoblas->bscal_k

#define BGEMM_ONCOPY gotoblas->bgemm_oncopy
#define BGEMM_OTCOPY gotoblas->bgemm_otcopy


+ 3
- 0
common_interface.h View File

@@ -60,6 +60,7 @@ double BLASFUNC(dsdot) (blasint *, float *, blasint *, float *, blasint *);
double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *);
xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *);

void BLASFUNC(bscal) (blasint *, bfloat16 *, bfloat16 *, blasint *);
float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *);
void BLASFUNC(sbstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *);
void BLASFUNC(sbdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *);
@@ -256,6 +257,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
xdouble *, blasint *, xdouble *, blasint *);

void BLASFUNC(bgemv)(char *, blasint *, blasint *, bfloat16 *, bfloat16 *, blasint *,
bfloat16 *, blasint *, bfloat16 *, bfloat16 *, blasint *);
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
bfloat16 *, blasint *, float *, float *, blasint *);
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,


+ 4
- 0
common_level1.h View File

@@ -1,4 +1,5 @@
/*********************************************************************/
/* Copyright 2025 The OpenBLAS Project. */
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
@@ -169,6 +170,9 @@ BLASLONG icmin_k(BLASLONG, float *, BLASLONG);
BLASLONG izmin_k(BLASLONG, double *, BLASLONG);
BLASLONG ixmin_k(BLASLONG, xdouble *, BLASLONG);


int bscal_k(BLASLONG, BLASLONG, BLASLONG, bfloat16,
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
int sscal_k(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int dscal_k(BLASLONG, BLASLONG, BLASLONG, double,


+ 6
- 0
common_level2.h View File

@@ -1,4 +1,5 @@
/*********************************************************************/
/* Copyright 2025 The OpenBLAS Project */
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
@@ -44,6 +45,11 @@
extern "C" {
#endif


int bgemv_n(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);
int bgemv_t(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);
int bgemv_thread_n(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG, int);
int bgemv_thread_t(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG, int);
int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);


+ 7
- 4
common_macro.h View File

@@ -705,6 +705,11 @@


#elif defined(BFLOAT16) && defined(BGEMM)
#define SCAL_K BSCAL_K

#define GEMV_N BGEMV_N_K
#define GEMV_T BGEMV_T_K

#define GEMM_BETA BGEMM_BETA
#define GEMM_KERNEL_N BGEMM_KERNEL
#define GEMM_KERNEL_L BGEMM_KERNEL
@@ -754,8 +759,8 @@
#define D_BF16_TO_K DBF16TOD_K
#define S_TO_BF16_K SBSTOBF16_K
#define S_BF16_TO_K SBF16TOS_K
#define SBGEMV_N SBGEMV_N_K
#define SBGEMV_T SBGEMV_T_K
#define GEMV_N SBGEMV_N_K
#define GEMV_T SBGEMV_T_K

#define AMAX_K SAMAX_K
#define AMIN_K SAMIN_K
@@ -773,8 +778,6 @@
#define AXPYC_K SAXPYC_K
#define AXPBY_K SAXPBY_K
#define SCAL_K SSCAL_K
#define GEMV_N SGEMV_N
#define GEMV_T SGEMV_T
#define SYMV_U SSYMV_U
#define SYMV_L SSYMV_L
#define GERU_K SGERU_K


+ 4
- 0
common_param.h View File

@@ -98,10 +98,14 @@ int (*shgemm_otcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *);
int (*sbrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*sbrotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);

int (*bscal_k) (BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
int (*sbaxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);

int (*bgemv_n) (BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);
int (*bgemv_t) (BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);

int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);


+ 35
- 0
driver/level2/Makefile View File

@@ -1,3 +1,31 @@
###############################################################################
# Copyright (c) 2025 The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

TOPDIR = ../..
include ../../Makefile.system

@@ -423,6 +451,9 @@ XBLASOBJS += \
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX)

ifeq ($(BUILD_BFLOAT16),1)
BBLASOBJS += \
bgemv_thread_n$(TSUFFIX).$(SUFFIX) \
bgemv_thread_t$(TSUFFIX).$(SUFFIX)
SBBLASOBJS += \
sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \
sbgemv_thread_t$(TSUFFIX).$(SUFFIX)
@@ -3707,6 +3738,10 @@ xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)

ifeq ($(BUILD_BFLOAT16),1)
bgemv_thread_n.$(SUFFIX) bgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
bgemv_thread_t.$(SUFFIX) bgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h


+ 10
- 9
driver/level2/sbgemv_thread.c View File

@@ -1,4 +1,5 @@
/*********************************************************************/
/* Copyright 2025 The OpenBLAS Project. */
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
@@ -41,21 +42,21 @@
#include "common.h"

#ifndef TRANSA
#define SBGEMV SBGEMV_N
#define GEMV GEMV_N
#else
#define SBGEMV SBGEMV_T
#define GEMV GEMV_T
#endif

static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){

bfloat16 *a, *x;
float *y;
IFLOAT *a, *x;
FLOAT *y;
BLASLONG lda, incx, incy;
BLASLONG m_from, m_to, n_from, n_to;

a = (bfloat16 *)args->a;
x = (bfloat16 *)args->b;
y = (float *)args->c;
a = (IFLOAT *)args->a;
x = (IFLOAT *)args->b;
y = (FLOAT *)args->c;

lda = args->lda;
incx = args->ldb;
@@ -77,12 +78,12 @@ static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
y += n_from * incy;
#endif

SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);
GEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);

return 0;
}

int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads)
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG incx, FLOAT beta, FLOAT *y, BLASLONG incy, int threads)
{
blas_arg_t args;
blas_queue_t queue[MAX_CPU_NUMBER];


+ 29
- 1
exports/gensymbol View File

@@ -1,5 +1,33 @@
#!/bin/sh

###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

# Changelog
# 2017/09/03 staticfloat
# Added zsymv and csymv into @lapackobjs2 so they are properly renamed
@@ -51,7 +79,7 @@ blasobjsz="
zgeadd dzsum zgemmt zgemmtr"

blasobjs="lsame xerbla"
bfblasobjs="bgemm sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod"
bfblasobjs="bgemm bgemv sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod"
hfblasobjs="shgemm"
cblasobjsc="
cblas_caxpy cblas_ccopy cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv


+ 29
- 1
exports/gensymbol.pl View File

@@ -1,5 +1,33 @@
#!/usr/bin/env perl

###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

# Changelog
# 2017/09/03 staticfloat
# Added zsymv and csymv into @lapackobjs2 so they are properly renamed
@@ -51,7 +79,7 @@
zgeadd, dzsum, zgemmt,zgemmtr);

@blasobjs = (lsame, xerbla);
@bfblasobjs = (bgemm, sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
@bfblasobjs = (bgemm, bgemv, sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
@hfblasobjs = (shgemm);
@cblasobjsc = (
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,


+ 2
- 0
interface/CMakeLists.txt View File

@@ -150,11 +150,13 @@ endif ()
GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG})

if (BUILD_BFLOAT16)
GenerateNamedObjects("scal.c" "BGEMM" "bscal" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("bf16dot.c" "" "sbdot" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("gemm.c" "BGEMM" "bgemm" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("gemm.c" "" "sbgemm" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("sbgemmt.c" "" "sbgemmt" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("sbgemmt.c" "RNAME" "sbgemmtr" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("sbgemv.c" "BGEMM" "bgemv" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("sbgemv.c" "" "sbgemv" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("tobf16.c" "SINGLE_PREC" "sbstobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16")
GenerateNamedObjects("tobf16.c" "DOUBLE_PREC" "sbdtobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16")


+ 18
- 4
interface/Makefile View File

@@ -75,7 +75,9 @@ SBLAS3OBJS = \
sgeadd.$(SUFFIX) sgemmt.$(SUFFIX) sgemmtr.$(SUFFIX)

ifeq ($(BUILD_BFLOAT16),1)
BBLAS3OBJ = bgemm.$(SUFFIX)
BBLAS3OBJS = bgemm.$(SUFFIX)
BBLAS2OBJS = bgemv.$(SUFFIX)
BBLAS1OBJS = bscal.$(SUFFIX)
SBBLAS1OBJS = sbdot.$(SUFFIX)
SBBLAS2OBJS = sbgemv.$(SUFFIX)
SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX) sbgemmtr.$(SUFFIX)
@@ -319,6 +321,8 @@ CSBLAS3OBJS = \

ifeq ($(BUILD_BFLOAT16),1)
CBBLAS3OBJS = cblas_bgemm.$(SUFFIX)
CBBLAS2OBJS = cblas_bgemv.$(SUFFIX)
CBBLAS1OBJS = cblas_bscal.$(SUFFIX)
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX) cblas_sbgemmtr.$(SUFFIX) cblas_sbgemm_batch.$(SUFFIX)
@@ -423,7 +427,9 @@ override CFLAGS += -I.
SBLAS1OBJS += $(CSBLAS1OBJS)
SBLAS2OBJS += $(CSBLAS2OBJS)
SBLAS3OBJS += $(CSBLAS3OBJS)
BBLAS3OBJ += $(CBBLAS3OBJS)
BBLAS3OBJS += $(CBBLAS3OBJS)
BBLAS2OBJS += $(CBBLAS2OBJS)
BBLAS1OBJS += $(CBBLAS1OBJS)
SBBLAS1OBJS += $(CSBBLAS1OBJS)
SBBLAS2OBJS += $(CSBBLAS2OBJS)
SBBLAS3OBJS += $(CSBBLAS3OBJS)
@@ -443,7 +449,7 @@ SBEXTOBJS += $(CSBEXTOBJS)
CBAUXOBJS += $(CXERBLAOBJ)
endif

BBLASOBJS = $(BBLAS3OBJ)
BBLASOBJS = $(BBLAS3OBJS) $(BBLAS2OBJS) $(BBLAS1OBJS)
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS)
SHBLASOBJS = $(SHBLAS3OBJS)
@@ -589,7 +595,7 @@ clean ::
level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
level2 : $(SBBLAS2OBJS) $(BBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

level3 : $(SBBLAS3OBJS) $(BBLAS3OBJ) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(SHBLAS3OBJS)
@@ -824,6 +830,8 @@ dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c
$(CC) $(CFLAGS) -c $< -o $(@F)

ifeq ($(BUILD_BFLOAT16),1)
bscal.$(SUFFIX) bscal.$(PSUFFIX) : scal.c
$(CC) $(CFLAGS) -DBGEMM -c $< -o $(@F)
sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c
$(CC) $(CFLAGS) -c $< -o $(@F)
sbstobf16.$(SUFFIX) sbstobf16.$(PSUFFIX) : tobf16.c
@@ -981,6 +989,8 @@ xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)

ifeq ($(BUILD_BFLOAT16),1)
bgemv.$(SUFFIX) bgemv.$(PSUFFIX) : sbgemv.c
$(CC) $(CFLAGS) -DBGEMM -c $< -o $(@F)
sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c
$(CC) $(CFLAGS) -c $< -o $(@F)
endif
@@ -1653,6 +1663,8 @@ cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)

ifeq ($(BUILD_BFLOAT16),1)
cblas_bscal.$(SUFFIX) cblas_bscal.$(PSUFFIX) : scal.c
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
cblas_sbstobf16.$(SUFFIX) cblas_sbstobf16.$(PSUFFIX) : tobf16.c
@@ -1807,6 +1819,8 @@ cblas_zdrot.$(SUFFIX) cblas_zdrot.$(PSUFFIX) : zrot.c
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)

ifeq ($(BUILD_BFLOAT16),1)
cblas_bgemv.$(SUFFIX) cblas_bgemv.$(PSUFFIX) : sbgemv.c
$(CC) -DCBLAS -DBGEMM -c $(CFLAGS) $< -o $(@F)
cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
endif


+ 2
- 2
interface/sbgemmt.c View File

@@ -1,5 +1,5 @@
/*********************************************************************/
/* Copyright 2024, The OpenBLAS Project. */
/* Copyright 2024-2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@@ -305,7 +305,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif
int (*gemv[]) (BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG,
IFLOAT *, BLASLONG, FLOAT, FLOAT *, BLASLONG) = {
SBGEMV_N, SBGEMV_T,};
GEMV_N, GEMV_T,};


if (m == 0)


+ 32
- 14
interface/sbgemv.c View File

@@ -1,4 +1,5 @@
/*********************************************************************/
/* Copyright 2025 The OpenBLAS Project. */
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
@@ -43,17 +44,25 @@
#include "functable.h"
#endif

#ifdef BGEMM
#define GEMV_THREAD_N bgemv_thread_n
#define GEMV_THREAD_T bgemv_thread_t
#define ERROR_NAME "BGEMV "
#else
#define GEMV_THREAD_N sbgemv_thread_n
#define GEMV_THREAD_T sbgemv_thread_t
#define ERROR_NAME "SBGEMV "
#endif

#ifdef SMP
static int (*sbgemv_thread[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG, int) = {
sbgemv_thread_n, sbgemv_thread_t,
static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG, int) = {
GEMV_THREAD_N, GEMV_THREAD_T,
};
#endif

#ifndef CBLAS

void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasint *LDA, bfloat16 *x, blasint *INCX, float *BETA, float *y, blasint *INCY)
void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *ALPHA, IFLOAT *a, blasint *LDA, IFLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY)
{
char trans = *TRANS;
blasint m = *M;
@@ -61,14 +70,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasin
blasint lda = *LDA;
blasint incx = *INCX;
blasint incy = *INCY;
float alpha = *ALPHA;
float beta = *BETA;
FLOAT alpha = *ALPHA;
FLOAT beta = *BETA;
#ifdef SMP
int nthreads;
#endif

int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
SBGEMV_N, SBGEMV_T,
int (*gemv[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG) = {
GEMV_N, GEMV_T,
};

blasint info;
@@ -104,7 +113,7 @@ void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasin

#else

void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, float alpha, bfloat16 *a, blasint lda, bfloat16 *x, blasint incx, float beta, float *y, blasint incy)
void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, FLOAT alpha, IFLOAT *a, blasint lda, IFLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy)
{
blasint lenx, leny;
int trans;
@@ -113,8 +122,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi
int nthreads;
#endif

int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
SBGEMV_N, SBGEMV_T,
int (*gemv[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG) = {
GEMV_N, GEMV_T,
};

PRINT_DEBUG_CNAME;
@@ -166,8 +175,17 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi
leny = m;
}

if (alpha == ZERO) {
if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
#ifdef BGEMM
float alpha_float, beta_float;
SBF16TOS_K(1, &alpha, 1, &alpha_float, 1);
SBF16TOS_K(1, &beta, 1, &beta_float, 1);
#else
float alpha_float = alpha;
float beta_float = beta;
#endif

if (alpha_float == ZERO) {
if (beta_float != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
return;
}

@@ -185,10 +203,10 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi

if (nthreads == 1) {
#endif
(sbgemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy);
(gemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy);
#ifdef SMP
} else {
(sbgemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads);
(gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads);
}
#endif



+ 9
- 1
interface/scal.c View File

@@ -1,4 +1,5 @@
/*********************************************************************/
/* Copyright 2025 The OpenBLAS Project. */
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
@@ -68,7 +69,14 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){

if (incx <= 0 || n <= 0) return;

if (alpha == ONE) return;
#ifdef BGEMM
float alpha_float;
SBF16TOS_K(1, &alpha, 1, &alpha_float, 1);
#else
float alpha_float = alpha;
#endif

if (alpha_float == ONE) return;

IDEBUG_START;



+ 3
- 0
kernel/CMakeLists.txt View File

@@ -121,6 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)

# sbdot
if (BUILD_BFLOAT16)
GenerateNamedObjects("${KERNELDIR}/${BSCALKERNEL}" "BGEMM" "scal_k" false "" "" false "BFLOAT16")
GenerateNamedObjects("${KERNELDIR}/${SBDOTKERNEL}" "SBDOT" "dot_k" false "" "" false "BFLOAT16")
GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "SINGLE" "f16tos_k" false "" "" false "BFLOAT16")
GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "DOUBLE" "bf16tod_k" false "" "" false "DOUBLE")
@@ -222,6 +223,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
endif ()
if (BUILD_BFLOAT16)
GenerateNamedObjects("${KERNELDIR}/${BGEMVNKERNEL}" "BGEMM" "gemv_n" false "" "" false "BFLOAT16")
GenerateNamedObjects("${KERNELDIR}/${BGEMVTKERNEL}" "BGEMM" "gemv_t" false "" "" false "BFLOAT16")
GenerateNamedObjects("${KERNELDIR}/${SBGEMVNKERNEL}" "" "gemv_n" false "" "" false "BFLOAT16")
GenerateNamedObjects("${KERNELDIR}/${SBGEMVTKERNEL}" "" "gemv_t" false "" "" false "BFLOAT16")
endif ()


+ 36
- 0
kernel/Makefile.L1 View File

@@ -1,3 +1,31 @@
###############################################################################
# Copyright (c) 2025 The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

FMAFLAG=
ifndef OLDGCC
ifdef HAVE_FMA3
@@ -271,6 +299,10 @@ XDOTKERNEL = zdot.S
endif

ifeq ($(BUILD_BFLOAT16),1)
ifndef BSCALKERNEL
BSCALKERNEL = ../generic/scal.c
endif

ifndef SBDOTKERNEL
SBDOTKERNEL = ../x86_64/sbdot.c
endif
@@ -551,6 +583,8 @@ XBLASOBJS += \
xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX)

ifeq ($(BUILD_BFLOAT16),1)
BBLASOBJS += \
bscal_k$(TSUFFIX).$(SUFFIX)
SBBLASOBJS += \
sbdot_k$(TSUFFIX).$(SUFFIX)
SBEXTOBJS += \
@@ -778,6 +812,8 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@

ifeq ($(BUILD_BFLOAT16),1)
$(KDIR)bscal_k$(TSUFFIX).$(SUFFIX) $(KDIR)bscal_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BSCALKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
$(KDIR)sbdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sbdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBDOTKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
$(KDIR)sbstobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL)


+ 43
- 0
kernel/Makefile.L2 View File

@@ -1,3 +1,31 @@
###############################################################################
# Copyright (c) 2025 The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

FMAFLAG=
ifndef OLDGCC
ifdef HAVE_FMA3
@@ -56,6 +84,14 @@ XGEMVTKERNEL = zgemv_t.S
endif

ifeq ($(BUILD_BFLOAT16),1)
ifndef BGEMVNKERNEL
BGEMVNKERNEL = ../generic/gemv_n.c
endif

ifndef BGEMVTKERNEL
BGEMVTKERNEL = ../generic/gemv_t.c
endif

ifndef SBGEMVNKERNEL
SBGEMVNKERNEL = ../x86_64/sbgemv_n.c
endif
@@ -255,6 +291,9 @@ XBLASOBJS += \
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)

ifeq ($(BUILD_BFLOAT16),1)
BBLASOBJS += \
bgemv_n$(TSUFFIX).$(SUFFIX) \
bgemv_t$(TSUFFIX).$(SUFFIX)
SBBLASOBJS += \
sbgemv_n$(TSUFFIX).$(SUFFIX) \
sbgemv_t$(TSUFFIX).$(SUFFIX)
@@ -513,5 +552,9 @@ $(KDIR)sbgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KE
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
$(KDIR)sbgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMVTKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
$(KDIR)bgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)bgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BGEMVNKERNEL)
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX $< -o $@
$(KDIR)bgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)bgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BGEMVTKERNEL)
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX $< -o $@
endif


+ 64
- 0
kernel/generic/bf16_macros.h View File

@@ -0,0 +1,64 @@
/***************************************************************************
* Copyright (c) 2025, The OpenBLAS Project
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of the OpenBLAS project nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* *****************************************************************************/

#if defined(BFLOAT16) && defined(BFLOAT16CONVERSION)
static float
bfloat16tof32 (bfloat16 value)
{
blasint one = 1;
float result;
sbf16tos_(&one, &value, &one, &result, &one);
return result;
}

#ifdef BGEMM
static bfloat16 f32tobfloat16(float value) {
blasint one = 1;
bfloat16 result;
sbstobf16_(&one, &value, &one, &result, &one);
return result;
}
#endif

#ifdef BGEMM
#define ALPHA bfloat16tof32(alpha)
#define BETA bfloat16tof32(beta)
#define BF16TOF32(x) (bfloat16tof32(x))
#define F32TOBF16(x) (f32tobfloat16(x))
#else
#define ALPHA alpha
#define BETA beta
#define BF16TOF32(x) (bfloat16tof32(x))
#define F32TOBF16(x) x
#endif
#else
#define ALPHA alpha
#define BETA beta
#define BF16TOF32(x) x
#define F32TOBF16(x) x
#endif

+ 1
- 32
kernel/generic/gemmkernel_2x2.c View File

@@ -27,39 +27,8 @@
* *****************************************************************************/

#include "common.h"
#if defined(BFLOAT16) && defined(BFLOAT16CONVERSION)
static float
bfloat16tof32 (bfloat16 value)
{
blasint one = 1;
float result;
sbf16tos_(&one, &value, &one, &result, &one);
return result;
}

#ifdef BGEMM
static bfloat16 f32tobfloat16(float value) {
blasint one = 1;
bfloat16 result;
sbstobf16_(&one, &value, &one, &result, &one);
return result;
}
#endif
#include "bf16_macros.h"

#ifdef BGEMM
#define ALPHA bfloat16tof32(alpha)
#define BF16TOF32(x) (bfloat16tof32(x))
#define F32TOBF16(x) (f32tobfloat16(x))
#else
#define ALPHA alpha
#define BF16TOF32(x) (bfloat16tof32(x))
#define F32TOBF16(x) x
#endif
#else
#define ALPHA alpha
#define BF16TOF32(x) x
#define F32TOBF16(x) x
#endif
int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb,FLOAT* C,BLASLONG ldc
#ifdef TRMMKERNEL
,BLASLONG offset


+ 70
- 0
kernel/generic/gemv_n.c View File

@@ -0,0 +1,70 @@
/***************************************************************************
Copyright (c) 2013-2014, 2025 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "common.h"
#include "bf16_macros.h"

int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i;
BLASLONG ix, iy;
BLASLONG j;
FLOAT *a_ptr;
#ifdef BGEMM
float temp;
#else
FLOAT temp;
#endif

iy = 0;
for (BLASLONG i = 0; i < m; i++)
{
temp = 0.0;

ix = 0;
a_ptr = a;
for (BLASLONG j = 0; j < n; j++)
{
temp += BF16TOF32(a_ptr[i]) * BF16TOF32(x[ix]);
ix += inc_x;
a_ptr += lda;
}

if (BETA == ZERO)
{
y[iy] = F32TOBF16(ALPHA * temp);
}
else
{
y[iy] = F32TOBF16(ALPHA * temp + BETA * BF16TOF32(y[iy]));
}

iy += inc_y;
}

return (0);
}

+ 60
- 0
kernel/generic/gemv_t.c View File

@@ -0,0 +1,60 @@
/***************************************************************************
Copyright (c) 2013, 2025 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "common.h"
#include "bf16_macros.h"

int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i;
BLASLONG ix, iy;
BLASLONG j;
FLOAT *a_ptr;
#ifdef BGEMM
float temp;
#else
FLOAT temp;
#endif

iy = 0;
a_ptr = a;

for (j = 0; j < n; j++)
{
temp = 0.0;
ix = 0;
for (i = 0; i < m; i++)
{
temp += BF16TOF32(a_ptr[i]) * BF16TOF32(x[ix]);
ix += inc_x;
}
y[iy] += F32TOBF16(ALPHA * temp);
iy += inc_y;
a_ptr += lda;
}
return (0);
}

+ 106
- 0
kernel/generic/scal.c View File

@@ -0,0 +1,106 @@
/***************************************************************************
Copyright (c) 2013, 2025 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "common.h"

int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i = 0, j = 0;
#if defined(BFLOAT16)
float x_float, da_float;
SBF16TOS_K(1, &da, 1, &da_float, 1);
#else
float x_float;
float da_float = da;
#endif

if ((n <= 0) || (inc_x <= 0))
return (0);

if (dummy2 == 0)
{
while (j < n)
{

if (da_float == 0.0)
x_float = 0.0;
else
{
#if defined(BFLOAT16)
SBF16TOS_K(1, &x[i], 1, &x_float, 1);
#else
float x_float = x[i];
#endif
x_float = da_float * x_float;
}

#if defined(BFLOAT16)
SBSTOBF16_K(1, &x_float, 1, &x[i], 1);
#else
x[i] = x_float;
#endif

i += inc_x;
j++;
}
}
else
{

while (j < n)
{
#if defined(BFLOAT16)
SBF16TOS_K(1, &x[i], 1, &x_float, 1);
#else
float x_float = x[i];
#endif
if (da == 0.0)
if (!isnan(x_float) && !isinf(x_float))
{
x_float = 0.0;
}
else
{
x_float = NAN;
}
else
{
x_float = da_float * x_float;
}

#if defined(BFLOAT16)
SBSTOBF16_K(1, &x_float, 1, &x[i], 1);
#else
x[i] = x_float;
#endif

i += inc_x;
j++;
}
}
return 0;
}

+ 2
- 2
kernel/setparam-ref.c View File

@@ -83,8 +83,8 @@ gotoblas_t TABLE_NAME = {
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
dsdot_kTS,
srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
sbgemv_nTS, sbgemv_tTS, sger_kTS,
srot_kTS, srotm_kTS, bscal_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
bgemv_nTS, bgemv_tTS, sbgemv_nTS, sbgemv_tTS, sger_kTS,
ssymv_LTS, ssymv_UTS,

bgemm_kernelTS, bgemm_betaTS,


+ 33
- 5
test/Makefile View File

@@ -119,6 +119,10 @@ endif
endif
endif

ifeq ($(BUILD_BFLOAT16), 1)
BB2 = test_bgemv
B2 = test_sbgemv
endif
ifeq ($(BUILD_SINGLE),1)
S2=sblat2
endif
@@ -132,11 +136,17 @@ ifeq ($(BUILD_COMPLEX16),1)
Z2=zblat2
endif

level2: $(S2) $(D2) $(C2) $(Z2)
level2: $(BB2) $(B2) $(S2) $(D2) $(C2) $(Z2)


ifneq ($(CROSS), 1)
rm -f ?BLAT2.SUMM
ifeq ($(BUILD_BFLOAT16),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_bgemv > BBLAT2.SUMM
@$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_sbgemv > SBBLAT2.SUMM
@$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
@@ -156,6 +166,12 @@ endif
ifdef SMP
rm -f ?BLAT2.SUMM
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_BFLOAT16),1)
OMP_NUM_THREADS=2 ./test_bgemv > BBLAT2.SUMM
@$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0
OMP_NUM_THREADS=2 ./test_sbgemv > SBBLAT2.SUMM
@$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
@@ -173,6 +189,12 @@ ifeq ($(BUILD_COMPLEX16),1)
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
endif
else
ifeq ($(BUILD_BFLOAT16),1)
OMP_NUM_THREADS=2 ./test_bgemv > BBLAT2.SUMM
@$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0
OMP_NUM_THREADS=2 ./test_sbgemv > SBBLAT2.SUMM
@$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
@@ -195,7 +217,7 @@ endif

ifeq ($(BUILD_BFLOAT16),1)
BF3= test_bgemm
B3= test_sbgemm
B3 = test_sbgemm
endif
ifeq ($(BUILD_SINGLE),1)
S3=sblat3
@@ -404,10 +426,16 @@ endif

ifeq ($(BUILD_BFLOAT16),1)
test_bgemm : compare_sgemm_bgemm.c test_helpers.h ../$(LIBNAME)
$(CC) $(CLDFLAGS) -o test_bgemm compare_sgemm_bgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
$(CC) $(CLDFLAGS) -DIBFLOAT16 -DOBFLOAT16 -o test_bgemm compare_sgemm_bgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

test_bgemv : compare_sgemv_bgemv.c ../$(LIBNAME)
$(CC) $(CLDFLAGS) -DIBFLOAT16 -DOBFLOAT16 -o test_bgemv compare_sgemv_bgemv.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

test_sbgemm : compare_sgemm_sbgemm.c test_helpers.h ../$(LIBNAME)
$(CC) $(CLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
$(CC) $(CLDFLAGS) -DIBFLOAT16 -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

test_sbgemv : compare_sgemv_sbgemv.c ../$(LIBNAME)
$(CC) $(CLDFLAGS) -DIBFLOAT16 -o test_sbgemv compare_sgemv_sbgemv.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif

ifeq ($(BUILD_COMPLEX),1)
@@ -426,7 +454,7 @@ clean:
@rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \
sblat1 dblat1 cblat1 zblat1 \
sblat2 dblat2 cblat2 zblat2 \
test_bgemm test_sbgemm sblat3 dblat3 cblat3 zblat3 \
test_bgemm test_bgemv test_sbgemm test_sbgemv sblat3 dblat3 cblat3 zblat3 \
sblat1p dblat1p cblat1p zblat1p \
sblat2p dblat2p cblat2p zblat2p \
sblat3p dblat3p cblat3p zblat3p \


+ 2
- 10
test/compare_sgemm_bgemm.c View File

@@ -34,15 +34,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define BGEMM BLASFUNC(bgemm)
#define BGEMM_LARGEST 256

static float truncate_float32_to_bfloat16(float value) {
blasint one = 1;
bfloat16 tmp;
float result;
sbstobf16_(&one, &value, &one, &tmp, &one);
sbf16tos_(&one, &tmp, &one, &result, &one);
return result;
}

int
main (int argc, char *argv[])
{
@@ -158,6 +149,7 @@ main (int argc, char *argv[])

if (ret != 0) {
fprintf (stderr, "FATAL ERROR BGEMM - Return code: %d\n", ret);
return ret;
}

return ret;
}

+ 0
- 80
test/compare_sgemm_sbgemm.c View File

@@ -141,87 +141,7 @@ main (int argc, char *argv[])

if (ret != 0) {
fprintf (stderr, "FATAL ERROR SBGEMM - Return code: %d\n", ret);
return ret;
}

for (beta = 0; beta < 3; beta += 1) {
for (alpha = 0; alpha < 3; alpha += 1) {
for (l = 0; l < 2; l++) { // l = 1 to test inc_x & inc_y not equal to one.
for (x = 1; x <= loop; x++)
{
k = (x == 0) ? 0 : l + 1;
float *A = (float *)malloc_safe(x * x * sizeof(FLOAT));
float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l);
float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l);
bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16));
bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
float *DD = (float *)malloc_safe(x * sizeof(FLOAT));
float *CC = (float *)malloc_safe(x * sizeof(FLOAT) << l);
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
(DD == NULL) || (CC == NULL))
return 1;
blasint one = 1;

for (j = 0; j < x; j++)
{
for (i = 0; i < x; i++)
{
A[j * x + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
sbstobf16_(&one, &A[j*x+i], &one, &AA[j * x + i], &one);
}
B[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one);
CC[j << l] = C[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
}

for (y = 0; y < 2; y++)
{
if (y == 0) {
transA = 'N';
} else {
transA = 'T';
}

memset(CC, 0, x * sizeof(FLOAT) << l);
memset(DD, 0, x * sizeof(FLOAT));
memset(C, 0, x * sizeof(FLOAT) << l);

SGEMV (&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k);
SBGEMV (&transA, &x, &x, &alpha, (bfloat16*) AA, &x, (bfloat16*) BB, &k, &beta, CC, &k);

for (int i = 0; i < x; i ++) DD[i] *= beta;

for (j = 0; j < x; j++)
for (i = 0; i < x; i++)
if (transA == 'N') {
DD[i] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[j << l]);
} else if (transA == 'T') {
DD[j] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[i << l]);
}

for (j = 0; j < x; j++) {
if (!is_close(CC[j << l], C[j << l], 0.01, 0.001)) {
ret++;
}
if (!is_close(CC[j << l], DD[j], 0.001, 0.0001)) {
ret++;
}
}
}
free(A);
free(B);
free(C);
free(AA);
free(BB);
free(DD);
free(CC);
} // x
} // l
} // alpha
} // beta

if (ret != 0)
fprintf (stderr, "FATAL ERROR SBGEMV - Return code: %d\n", ret);
return ret;
}

+ 149
- 0
test/compare_sgemv_bgemv.c View File

@@ -0,0 +1,149 @@
/***************************************************************************
Copyright (c) 2020,2025 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdint.h>
#include "../common.h"

#include "test_helpers.h"

#define SGEMV BLASFUNC(sgemv)
#define BGEMV BLASFUNC(bgemv)
#define BGEMV_LARGEST 256

int main(int argc, char *argv[])
{
blasint k;
int i, j, l;
blasint x, y;
blasint one = 1;
int ret = 0;
int loop = BGEMV_LARGEST;
char transA = 'N';
float alpha = 1.0, beta = 0.0;
bfloat16 alpha_bf16, beta_bf16;

for (beta = 0; beta < 3; beta += 1)
{
for (alpha = 0; alpha < 3; alpha += 1)
{
for (l = 0; l < 2; l++)
{ // l = 1 to test inc_x & inc_y not equal to one.
for (x = 1; x <= loop; x++)
{
k = (x == 0) ? 0 : l + 1;
float *A = (float *)malloc_safe(x * x * sizeof(FLOAT));
float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l);
float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l);
bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16));
bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
bfloat16 *CC = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
float *DD = (float *)malloc_safe(x * sizeof(FLOAT));
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
(CC == NULL) || (DD == NULL))
return 1;

for (j = 0; j < x; j++)
{
for (i = 0; i < x; i++)
{
A[j * x + i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5;
sbstobf16_(&one, &A[j * x + i], &one, &AA[j * x + i], &one);
}
B[j << l] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5;
sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one);

C[j << l] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5;
sbstobf16_(&one, &B[j << l], &one, &CC[j << l], &one);
}

for (y = 0; y < 2; y++)
{
if (y == 0)
{
transA = 'N';
}
else
{
transA = 'T';
}

memset(C, 0, x * sizeof(FLOAT) << l);
memset(CC, 0, x * sizeof(bfloat16) << l);
memset(DD, 0, x * sizeof(FLOAT));

sbstobf16_(&one, &alpha, &one, &alpha_bf16, &one);
sbstobf16_(&one, &beta, &one, &beta_bf16, &one);
SGEMV(&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k);
BGEMV(&transA, &x, &x, &alpha_bf16, AA, &x, BB, &k, &beta_bf16, CC, &k);

for (int i = 0; i < x; i++)
DD[i] *= beta;

for (j = 0; j < x; j++)
for (i = 0; i < x; i++)
if (transA == 'N')
{
DD[i] += alpha * float16to32(AA[j * x + i]) * float16to32(BB[j << l]);
}
else if (transA == 'T')
{
DD[j] += alpha * float16to32(AA[j * x + i]) * float16to32(BB[i << l]);
}

for (j = 0; j < x; j++)
{
if (!is_close(float16to32(CC[j << l]), truncate_float32_to_bfloat16(C[j << l]), 0.01, 0.001))
{
printf("Mismatch at trans=%c, alpha=%.2f, beta=%.2f, i=%d, j=%d, k=%d: CC=%.6f, C=%.6f\n",
transA, alpha, beta, i, j, k, float16to32(CC[j << l]), truncate_float32_to_bfloat16(C[j << l]));
ret++;
}
if (!is_close(float16to32(CC[j << l]), truncate_float32_to_bfloat16(DD[j]), 0.001, 0.0001))
{
printf("Mismatch at trans=%c, alpha=%.2f, beta=%.2f, i=%d, j=%d, k=%d: CC=%.6f, C=%.6f\n",
transA, alpha, beta, i, j, k, float16to32(CC[j << l]), truncate_float32_to_bfloat16(DD[j]));
ret++;
}
}
}

free(A);
free(B);
free(C);
free(AA);
free(BB);
free(CC);
free(DD);
} // x
} // l
} // alpha
} // beta

if (ret != 0)
fprintf(stderr, "FATAL ERROR BGEMV - Return code: %d\n", ret);
return ret;
}

+ 128
- 0
test/compare_sgemv_sbgemv.c View File

@@ -0,0 +1,128 @@
/***************************************************************************
Copyright (c) 2020,2025 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdint.h>
#include "../common.h"

#include "test_helpers.h"

#define SGEMV BLASFUNC(sgemv)
#define SBGEMV BLASFUNC(sbgemv)
#define SBGEMV_LARGEST 256

int
main (int argc, char *argv[])
{
blasint k;
int i, j, l;
blasint x, y;
int ret = 0;
int loop = SBGEMV_LARGEST;
char transA = 'N';
float alpha = 1.0, beta = 0.0;

for (beta = 0; beta < 3; beta += 1) {
for (alpha = 0; alpha < 3; alpha += 1) {
for (l = 0; l < 2; l++) { // l = 1 to test inc_x & inc_y not equal to one.
for (x = 1; x <= loop; x++)
{
k = (x == 0) ? 0 : l + 1;
float *A = (float *)malloc_safe(x * x * sizeof(FLOAT));
float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l);
float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l);
bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16));
bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
float *CC = (float *)malloc_safe(x * sizeof(FLOAT) << l);
float *DD = (float *)malloc_safe(x * sizeof(FLOAT));
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
(DD == NULL) || (CC == NULL))
return 1;
blasint one = 1;

for (j = 0; j < x; j++)
{
for (i = 0; i < x; i++)
{
A[j * x + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
sbstobf16_(&one, &A[j*x+i], &one, &AA[j * x + i], &one);
}
B[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one);
CC[j << l] = C[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
}

for (y = 0; y < 2; y++)
{
if (y == 0) {
transA = 'N';
} else {
transA = 'T';
}

memset(CC, 0, x * sizeof(FLOAT) << l);
memset(DD, 0, x * sizeof(FLOAT));
memset(C, 0, x * sizeof(FLOAT) << l);

SGEMV (&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k);
SBGEMV (&transA, &x, &x, &alpha, (bfloat16*) AA, &x, (bfloat16*) BB, &k, &beta, CC, &k);

for (int i = 0; i < x; i ++) DD[i] *= beta;

for (j = 0; j < x; j++)
for (i = 0; i < x; i++)
if (transA == 'N') {
DD[i] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[j << l]);
} else if (transA == 'T') {
DD[j] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[i << l]);
}

for (j = 0; j < x; j++) {
if (!is_close(CC[j << l], C[j << l], 0.01, 0.001)) {
ret++;
}
if (!is_close(CC[j << l], DD[j], 0.001, 0.0001)) {
ret++;
}
}
}
free(A);
free(B);
free(C);
free(AA);
free(BB);
free(DD);
free(CC);
} // x
} // l
} // alpha
} // beta

if (ret != 0)
fprintf (stderr, "FATAL ERROR SBGEMV - Return code: %d\n", ret);
return ret;
}

+ 12
- 1
test/test_helpers.h View File

@@ -31,7 +31,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "../common.h"

#if IFLOAT == bfloat16
#ifdef IBFLOAT16
static float float16to32(bfloat16 value)
{
blasint one = 1;
@@ -41,6 +41,17 @@ static float float16to32(bfloat16 value)
}
#endif

#ifdef OBFLOAT16
static float truncate_float32_to_bfloat16(float value) {
blasint one = 1;
bfloat16 tmp;
float result;
sbstobf16_(&one, &value, &one, &tmp, &one);
sbf16tos_(&one, &tmp, &one, &result, &one);
return result;
}
#endif

static void *malloc_safe(size_t size) {
if (size == 0)
return malloc(1);


Loading…
Cancel
Save