* make building the bfloat16 BLAS functions conditional on BUILD_HALF * pass the BUILD_HALF option to gensymbol * Pass BUILD_HALF as a compiler define for dynamic_arch buildstags/v0.3.10^2
@@ -86,10 +86,13 @@ if (NOT NO_LAPACK) | |||||
list(APPEND SUBDIRS lapack) | list(APPEND SUBDIRS lapack) | ||||
endif () | endif () | ||||
if (NOT DEFINED BUILD_HALF) | |||||
set (BUILD_HALF false) | |||||
endif () | |||||
# set which float types we want to build for | # set which float types we want to build for | ||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | ||||
# if none are defined, build for all | # if none are defined, build for all | ||||
set(BUILD_HALF true) | |||||
# set(BUILD_HALF true) | |||||
set(BUILD_SINGLE true) | set(BUILD_SINGLE true) | ||||
set(BUILD_DOUBLE true) | set(BUILD_DOUBLE true) | ||||
set(BUILD_COMPLEX true) | set(BUILD_COMPLEX true) | ||||
@@ -121,7 +124,7 @@ if (BUILD_COMPLEX16) | |||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE | list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE | ||||
endif () | endif () | ||||
if (BUILD_SINGLE OR BUILD_HALF) | |||||
if (BUILD_HALF) | |||||
message(STATUS "Building Half Precision") | message(STATUS "Building Half Precision") | ||||
list(APPEND FLOAT_TYPES "HALF") # defines nothing | list(APPEND FLOAT_TYPES "HALF") # defines nothing | ||||
endif () | endif () | ||||
@@ -273,6 +273,9 @@ COMMON_PROF = -pg | |||||
# | # | ||||
# CPP_THREAD_SAFETY_TEST = 1 | # CPP_THREAD_SAFETY_TEST = 1 | ||||
# If you want to enable the experimental BFLOAT16 support | |||||
# BUILD_HALF = 1 | |||||
# | # | ||||
# End of user configuration | # End of user configuration | ||||
# | # |
@@ -1124,6 +1124,10 @@ ifeq ($(USE_TLS), 1) | |||||
CCOMMON_OPT += -DUSE_TLS | CCOMMON_OPT += -DUSE_TLS | ||||
endif | endif | ||||
ifeq ($(BUILD_HALF), 1) | |||||
CCOMMON_OPT += -DBUILD_HALF | |||||
endif | |||||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\" | CCOMMON_OPT += -DVERSION=\"$(VERSION)\" | ||||
ifndef SYMBOLPREFIX | ifndef SYMBOLPREFIX | ||||
@@ -1395,6 +1399,7 @@ export KERNELDIR | |||||
export FUNCTION_PROFILE | export FUNCTION_PROFILE | ||||
export TARGET_CORE | export TARGET_CORE | ||||
export NO_AVX512 | export NO_AVX512 | ||||
export BUILD_HALF | |||||
export SHGEMM_UNROLL_M | export SHGEMM_UNROLL_M | ||||
export SHGEMM_UNROLL_N | export SHGEMM_UNROLL_N | ||||
@@ -113,6 +113,7 @@ macro(SetDefaultL1) | |||||
set(ZSUMKERNEL zsum.S) | set(ZSUMKERNEL zsum.S) | ||||
set(QSUMKERNEL sum.S) | set(QSUMKERNEL sum.S) | ||||
set(XSUMKERNEL zsum.S) | set(XSUMKERNEL zsum.S) | ||||
if (BUILD_HALF) | |||||
set(SHAMINKERNEL ../arm/amin.c) | set(SHAMINKERNEL ../arm/amin.c) | ||||
set(SHAMAXKERNEL ../arm/amax.c) | set(SHAMAXKERNEL ../arm/amax.c) | ||||
set(SHMAXKERNEL ../arm/max.c) | set(SHMAXKERNEL ../arm/max.c) | ||||
@@ -131,6 +132,7 @@ macro(SetDefaultL1) | |||||
set(SHNRM2KERNEL ../arm/nrm2.c) | set(SHNRM2KERNEL ../arm/nrm2.c) | ||||
set(SHSUMKERNEL ../arm/sum.c) | set(SHSUMKERNEL ../arm/sum.c) | ||||
set(SHSWAPKERNEL ../arm/swap.c) | set(SHSWAPKERNEL ../arm/swap.c) | ||||
endif () | |||||
endmacro () | endmacro () | ||||
macro(SetDefaultL2) | macro(SetDefaultL2) | ||||
@@ -179,10 +181,11 @@ macro(SetDefaultL2) | |||||
set(XHEMV_L_KERNEL ../generic/zhemv_k.c) | set(XHEMV_L_KERNEL ../generic/zhemv_k.c) | ||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | ||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | ||||
if (BUILD_HALF) | |||||
set(SHGEMVNKERNEL ../arm/gemv_n.c) | set(SHGEMVNKERNEL ../arm/gemv_n.c) | ||||
set(SHGEMVTKERNEL ../arm/gemv_t.c) | set(SHGEMVTKERNEL ../arm/gemv_t.c) | ||||
set(SHGERKERNEL ../generic/ger.c) | set(SHGERKERNEL ../generic/ger.c) | ||||
endif () | |||||
endmacro () | endmacro () | ||||
macro(SetDefaultL3) | macro(SetDefaultL3) | ||||
@@ -190,6 +193,7 @@ macro(SetDefaultL3) | |||||
set(DGEADD_KERNEL ../generic/geadd.c) | set(DGEADD_KERNEL ../generic/geadd.c) | ||||
set(CGEADD_KERNEL ../generic/zgeadd.c) | set(CGEADD_KERNEL ../generic/zgeadd.c) | ||||
set(ZGEADD_KERNEL ../generic/zgeadd.c) | set(ZGEADD_KERNEL ../generic/zgeadd.c) | ||||
if (BUILD_HALF) | |||||
set(SHGEADD_KERNEL ../generic/geadd.c) | set(SHGEADD_KERNEL ../generic/geadd.c) | ||||
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c) | set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c) | ||||
set(SHGEMM_BETA ../generic/gemm_beta.c) | set(SHGEMM_BETA ../generic/gemm_beta.c) | ||||
@@ -201,6 +205,6 @@ macro(SetDefaultL3) | |||||
set(SHGEMMITCOPYOBJ shgemm_itcopy.o) | set(SHGEMMITCOPYOBJ shgemm_itcopy.o) | ||||
set(SHGEMMONCOPYOBJ shgemm_oncopy.o) | set(SHGEMMONCOPYOBJ shgemm_oncopy.o) | ||||
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o) | set(SHGEMMOTCOPYOBJ shgemm_otcopy.o) | ||||
endif () | |||||
endmacro () | endmacro () |
@@ -47,7 +47,7 @@ typedef struct { | |||||
int dtb_entries; | int dtb_entries; | ||||
int offsetA, offsetB, align; | int offsetA, offsetB, align; | ||||
#if 1 | |||||
#ifdef BUILD_HALF | |||||
int shgemm_p, shgemm_q, shgemm_r; | int shgemm_p, shgemm_q, shgemm_r; | ||||
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn; | int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn; | ||||
@@ -1002,12 +1002,14 @@ extern gotoblas_t *gotoblas; | |||||
#define HAVE_EX_L2 gotoblas -> exclusive_cache | #define HAVE_EX_L2 gotoblas -> exclusive_cache | ||||
#ifdef BUILD_HALF | |||||
#define SHGEMM_P gotoblas -> shgemm_p | #define SHGEMM_P gotoblas -> shgemm_p | ||||
#define SHGEMM_Q gotoblas -> shgemm_q | #define SHGEMM_Q gotoblas -> shgemm_q | ||||
#define SHGEMM_R gotoblas -> shgemm_r | #define SHGEMM_R gotoblas -> shgemm_r | ||||
#define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m | #define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m | ||||
#define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n | #define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n | ||||
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn | #define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn | ||||
#endif | |||||
#define SGEMM_P gotoblas -> sgemm_p | #define SGEMM_P gotoblas -> sgemm_p | ||||
#define SGEMM_Q gotoblas -> sgemm_q | #define SGEMM_Q gotoblas -> sgemm_q | ||||
@@ -1086,6 +1088,7 @@ extern gotoblas_t *gotoblas; | |||||
#define HAVE_EX_L2 0 | #define HAVE_EX_L2 0 | ||||
#endif | #endif | ||||
#ifdef BUILD_HALF | |||||
#define SHGEMM_P SHGEMM_DEFAULT_P | #define SHGEMM_P SHGEMM_DEFAULT_P | ||||
#define SHGEMM_Q SHGEMM_DEFAULT_Q | #define SHGEMM_Q SHGEMM_DEFAULT_Q | ||||
#define SHGEMM_R SHGEMM_DEFAULT_R | #define SHGEMM_R SHGEMM_DEFAULT_R | ||||
@@ -1096,6 +1099,7 @@ extern gotoblas_t *gotoblas; | |||||
#else | #else | ||||
#define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N)) | #define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N)) | ||||
#endif | #endif | ||||
#endif | |||||
#define SGEMM_P SGEMM_DEFAULT_P | #define SGEMM_P SGEMM_DEFAULT_P | ||||
#define SGEMM_Q SGEMM_DEFAULT_Q | #define SGEMM_Q SGEMM_DEFAULT_Q | ||||
@@ -1330,31 +1334,31 @@ extern gotoblas_t *gotoblas; | |||||
#endif | #endif | ||||
#ifndef SHGEMM_DEFAULT_R | #ifndef SHGEMM_DEFAULT_R | ||||
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15) | |||||
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15UL) | |||||
#endif | #endif | ||||
#ifndef SGEMM_DEFAULT_R | #ifndef SGEMM_DEFAULT_R | ||||
#define SGEMM_DEFAULT_R (((BUFFER_SIZE - ((SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SGEMM_DEFAULT_Q * 4) - 15) & ~15) | |||||
#define SGEMM_DEFAULT_R (((BUFFER_SIZE - ((SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SGEMM_DEFAULT_Q * 4) - 15) & ~15UL) | |||||
#endif | #endif | ||||
#ifndef DGEMM_DEFAULT_R | #ifndef DGEMM_DEFAULT_R | ||||
#define DGEMM_DEFAULT_R (((BUFFER_SIZE - ((DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (DGEMM_DEFAULT_Q * 8) - 15) & ~15) | |||||
#define DGEMM_DEFAULT_R (((BUFFER_SIZE - ((DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (DGEMM_DEFAULT_Q * 8) - 15) & ~15UL) | |||||
#endif | #endif | ||||
#ifndef QGEMM_DEFAULT_R | #ifndef QGEMM_DEFAULT_R | ||||
#define QGEMM_DEFAULT_R (((BUFFER_SIZE - ((QGEMM_DEFAULT_P * QGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (QGEMM_DEFAULT_Q * 16) - 15) & ~15) | |||||
#define QGEMM_DEFAULT_R (((BUFFER_SIZE - ((QGEMM_DEFAULT_P * QGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (QGEMM_DEFAULT_Q * 16) - 15) & ~15UL) | |||||
#endif | #endif | ||||
#ifndef CGEMM_DEFAULT_R | #ifndef CGEMM_DEFAULT_R | ||||
#define CGEMM_DEFAULT_R (((BUFFER_SIZE - ((CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (CGEMM_DEFAULT_Q * 8) - 15) & ~15) | |||||
#define CGEMM_DEFAULT_R (((BUFFER_SIZE - ((CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (CGEMM_DEFAULT_Q * 8) - 15) & ~15UL) | |||||
#endif | #endif | ||||
#ifndef ZGEMM_DEFAULT_R | #ifndef ZGEMM_DEFAULT_R | ||||
#define ZGEMM_DEFAULT_R (((BUFFER_SIZE - ((ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (ZGEMM_DEFAULT_Q * 16) - 15) & ~15) | |||||
#define ZGEMM_DEFAULT_R (((BUFFER_SIZE - ((ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (ZGEMM_DEFAULT_Q * 16) - 15) & ~15UL) | |||||
#endif | #endif | ||||
#ifndef XGEMM_DEFAULT_R | #ifndef XGEMM_DEFAULT_R | ||||
#define XGEMM_DEFAULT_R (((BUFFER_SIZE - ((XGEMM_DEFAULT_P * XGEMM_DEFAULT_Q * 32 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (XGEMM_DEFAULT_Q * 32) - 15) & ~15) | |||||
#define XGEMM_DEFAULT_R (((BUFFER_SIZE - ((XGEMM_DEFAULT_P * XGEMM_DEFAULT_Q * 32 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (XGEMM_DEFAULT_Q * 32) - 15) & ~15UL) | |||||
#endif | #endif | ||||
#ifndef SNUMOPT | #ifndef SNUMOPT | ||||
@@ -19,7 +19,10 @@ ifeq ($(ARCH), MIPS) | |||||
USE_GEMM3M = 1 | USE_GEMM3M = 1 | ||||
endif | endif | ||||
ifeq ($(BUILD_HALF),1) | |||||
SHBLASOBJS += shgemm_nn.$(SUFFIX) shgemm_nt.$(SUFFIX) shgemm_tn.$(SUFFIX) shgemm_tt.$(SUFFIX) | SHBLASOBJS += shgemm_nn.$(SUFFIX) shgemm_nt.$(SUFFIX) shgemm_tn.$(SUFFIX) shgemm_tt.$(SUFFIX) | ||||
endif | |||||
SBLASOBJS += \ | SBLASOBJS += \ | ||||
sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) \ | sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) \ | ||||
strmm_LNUU.$(SUFFIX) strmm_LNUN.$(SUFFIX) strmm_LNLU.$(SUFFIX) strmm_LNLN.$(SUFFIX) \ | strmm_LNUU.$(SUFFIX) strmm_LNUN.$(SUFFIX) strmm_LNLU.$(SUFFIX) strmm_LNLN.$(SUFFIX) \ | ||||
@@ -204,8 +207,9 @@ COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$( | |||||
COMMONOBJS += syrk_thread.$(SUFFIX) | COMMONOBJS += syrk_thread.$(SUFFIX) | ||||
ifndef USE_SIMPLE_THREADED_LEVEL3 | ifndef USE_SIMPLE_THREADED_LEVEL3 | ||||
ifeq ($(BUILD_HALF),1) | |||||
SHBLASOBJS += shgemm_thread_nn.$(SUFFIX) shgemm_thread_nt.$(SUFFIX) shgemm_thread_tn.$(SUFFIX) shgemm_thread_tt.$(SUFFIX) | SHBLASOBJS += shgemm_thread_nn.$(SUFFIX) shgemm_thread_nt.$(SUFFIX) shgemm_thread_tn.$(SUFFIX) shgemm_thread_tt.$(SUFFIX) | ||||
endif | |||||
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | ||||
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | ||||
QBLASOBJS += qgemm_thread_nn.$(SUFFIX) qgemm_thread_nt.$(SUFFIX) qgemm_thread_tn.$(SUFFIX) qgemm_thread_tt.$(SUFFIX) | QBLASOBJS += qgemm_thread_nn.$(SUFFIX) qgemm_thread_nt.$(SUFFIX) qgemm_thread_tn.$(SUFFIX) qgemm_thread_tt.$(SUFFIX) | ||||
@@ -30,6 +30,10 @@ ifndef BUILD_LAPACK_DEPRECATED | |||||
BUILD_LAPACK_DEPRECATED = 0 | BUILD_LAPACK_DEPRECATED = 0 | ||||
endif | endif | ||||
ifndef BUILD_HALF | |||||
BUILD_HALF = 0 | |||||
endif | |||||
ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
ifndef ONLY_CBLAS | ifndef ONLY_CBLAS | ||||
@@ -234,23 +238,23 @@ static : ../$(LIBNAME) | |||||
rm -f goto.$(SUFFIX) | rm -f goto.$(SUFFIX) | ||||
osx.def : gensymbol ../Makefile.system ../getarch.c | osx.def : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||||
aix.def : gensymbol ../Makefile.system ../getarch.c | aix.def : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||||
objcopy.def : gensymbol ../Makefile.system ../getarch.c | objcopy.def : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||||
objconv.def : gensymbol ../Makefile.system ../getarch.c | objconv.def : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||||
test : linktest.c | test : linktest.c | ||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | ||||
rm -f linktest | rm -f linktest | ||||
linktest.c : gensymbol ../Makefile.system ../getarch.c | linktest.c : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > linktest.c | |||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c | |||||
clean :: | clean :: | ||||
@rm -f *.def *.dylib __.SYMDEF* *.renamed | @rm -f *.def *.dylib __.SYMDEF* *.renamed | ||||
@@ -30,7 +30,7 @@ | |||||
icamax,icamin,idamax,idamin,idmax,idmin,isamax,isamin,ismax,ismin, | icamax,icamin,idamax,idamin,idmax,idmin,isamax,isamin,ismax,ismin, | ||||
izamax,izamin,lsame,samax,samin,sasum,saxpy,scabs1,scamax, | izamax,izamin,lsame,samax,samin,sasum,saxpy,scabs1,scamax, | ||||
scamin,scasum,scnrm2,scopy,sdot,sdsdot,sgbmv,sgemm,sgemv,sger, | scamin,scasum,scnrm2,scopy,sdot,sdsdot,sgbmv,sgemm,sgemv,sger, | ||||
shgemm, smax,smin,snrm2, | |||||
smax,smin,snrm2, | |||||
srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap, | srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap, | ||||
ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv, | ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv, | ||||
strmm,strmv,strsm,strsv,zaxpy,zcopy,zdotc,zdotu,zdrot, | strmm,strmv,strsm,strsv,zaxpy,zcopy,zdotc,zdotu,zdrot, | ||||
@@ -51,6 +51,7 @@ | |||||
zimatcopy, | zimatcopy, | ||||
); | ); | ||||
@halfblasobjs = (shgemm); | |||||
@cblasobjs = ( | @cblasobjs = ( | ||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, | cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, | ||||
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, | cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, | ||||
@@ -67,7 +68,7 @@ | |||||
cblas_isamax, cblas_izamax, | cblas_isamax, cblas_izamax, | ||||
cblas_sasum, cblas_saxpy, | cblas_sasum, cblas_saxpy, | ||||
cblas_scasum, cblas_scnrm2, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm, | cblas_scasum, cblas_scnrm2, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm, | ||||
cblas_sgemv, cblas_sger, cblas_shgemm, cblas_snrm2, cblas_srot, cblas_srotg, | |||||
cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg, | |||||
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr, | cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr, | ||||
cblas_sswap, cblas_ssymm, cblas_ssymv, cblas_ssyr2, cblas_ssyr2k, cblas_ssyr, cblas_ssyrk, | cblas_sswap, cblas_ssymm, cblas_ssymv, cblas_ssyr2, cblas_ssyr2k, cblas_ssyr, cblas_ssyrk, | ||||
cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm, | cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm, | ||||
@@ -83,6 +84,8 @@ | |||||
cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd | cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd | ||||
); | ); | ||||
@halfcblasobjs = (cblas_shgemm); | |||||
@exblasobjs = ( | @exblasobjs = ( | ||||
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, | qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, | ||||
qgemv,qger,qmax,qmin, | qgemv,qger,qmax,qmin, | ||||
@@ -3454,6 +3457,10 @@ use File::Spec; | |||||
use File::Basename; | use File::Basename; | ||||
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib"); | my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib"); | ||||
if ($ARGV[12] == 1) { | |||||
@blasobjs = (@blasobjs, @halfblasobjs); | |||||
@cblasobjs = (@cblasobjs, @halfcblasobjs); | |||||
} | |||||
if ($ARGV[8] == 1) { | if ($ARGV[8] == 1) { | ||||
#ONLY_CBLAS=1 | #ONLY_CBLAS=1 | ||||
@underscore_objs = (@misc_underscore_objs); | @underscore_objs = (@misc_underscore_objs); | ||||
@@ -46,7 +46,9 @@ SBLAS3OBJS = \ | |||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\ | somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\ | ||||
sgeadd.$(SUFFIX) | sgeadd.$(SUFFIX) | ||||
ifeq ($(BUILD_HALF),1) | |||||
SHBLAS3OBJS = shgemm.$(SUFFIX) | SHBLAS3OBJS = shgemm.$(SUFFIX) | ||||
endif | |||||
DBLAS1OBJS = \ | DBLAS1OBJS = \ | ||||
daxpy.$(SUFFIX) dswap.$(SUFFIX) \ | daxpy.$(SUFFIX) dswap.$(SUFFIX) \ | ||||
@@ -278,7 +280,9 @@ CSBLAS3OBJS = \ | |||||
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\ | cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\ | ||||
cblas_sgeadd.$(SUFFIX) | cblas_sgeadd.$(SUFFIX) | ||||
ifeq ($(BUILD_HALF),1) | |||||
CSHBLAS3OBJS = cblas_shgemm.$(SUFFIX) | CSHBLAS3OBJS = cblas_shgemm.$(SUFFIX) | ||||
endif | |||||
CDBLAS1OBJS = \ | CDBLAS1OBJS = \ | ||||
cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ | cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ | ||||
@@ -1214,8 +1218,10 @@ zhpr2.$(SUFFIX) zhpr2.$(PSUFFIX) : zhpr2.c | |||||
xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c | xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c | ||||
$(CC) -c $(CFLAGS) $< -o $(@F) | $(CC) -c $(CFLAGS) $< -o $(@F) | ||||
ifeq ($(BUILD_HALF),1) | |||||
shgemm.$(SUFFIX) shgemm.$(PSUFFIX) : gemm.c ../param.h | shgemm.$(SUFFIX) shgemm.$(PSUFFIX) : gemm.c ../param.h | ||||
$(CC) -c $(CFLAGS) $< -o $(@F) | $(CC) -c $(CFLAGS) $< -o $(@F) | ||||
endif | |||||
sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h | sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h | ||||
$(CC) -c $(CFLAGS) $< -o $(@F) | $(CC) -c $(CFLAGS) $< -o $(@F) | ||||
@@ -1778,8 +1784,10 @@ cblas_zhemv.$(SUFFIX) cblas_zhemv.$(PSUFFIX) : zhemv.c | |||||
cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h | cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h | ||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | ||||
ifeq ($(BUILD_HALF),1) | |||||
cblas_shgemm.$(SUFFIX) cblas_shgemm.$(PSUFFIX) : gemm.c ../param.h | cblas_shgemm.$(SUFFIX) cblas_shgemm.$(PSUFFIX) : gemm.c ../param.h | ||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | ||||
endif | |||||
cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h | cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h | ||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | ||||
@@ -137,7 +137,11 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
foreach (float_type SINGLE DOUBLE HALF) | foreach (float_type SINGLE DOUBLE HALF) | ||||
string(SUBSTRING ${float_type} 0 1 float_char) | string(SUBSTRING ${float_type} 0 1 float_char) | ||||
if (${float_type} STREQUAL "HALF") | if (${float_type} STREQUAL "HALF") | ||||
set (float_char "SH") | |||||
if (NOT ${BUILD_HALF}) | |||||
continue () | |||||
else () | |||||
set (float_char "SH") | |||||
endif () | |||||
endif () | endif () | ||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) | ||||
endforeach() | endforeach() | ||||
@@ -59,7 +59,8 @@ ifeq ($(CORE), Z14) | |||||
USE_TRMM = 1 | USE_TRMM = 1 | ||||
endif | endif | ||||
#ifndef SHGEMMKERNEL | |||||
ifeq ($(BUILD_HALF), 1) | |||||
ifndef SHGEMMKERNEL | |||||
SHGEMM_BETA = ../generic/gemm_beta.c | SHGEMM_BETA = ../generic/gemm_beta.c | ||||
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c | SHGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c | SHGEMMINCOPY = ../generic/gemm_ncopy_2.c | ||||
@@ -70,12 +71,13 @@ SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX) | SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX) | SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX) | SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
#endif | |||||
endif | |||||
SHKERNELOBJS += \ | SHKERNELOBJS += \ | ||||
shgemm_kernel$(TSUFFIX).$(SUFFIX) \ | shgemm_kernel$(TSUFFIX).$(SUFFIX) \ | ||||
$(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \ | $(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \ | ||||
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) | $(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) | ||||
endif | |||||
SKERNELOBJS += \ | SKERNELOBJS += \ | ||||
sgemm_kernel$(TSUFFIX).$(SUFFIX) \ | sgemm_kernel$(TSUFFIX).$(SUFFIX) \ | ||||
@@ -110,7 +112,9 @@ XKERNELOBJS += \ | |||||
$(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \ | $(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \ | ||||
$(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ) | $(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ) | ||||
ifeq ($(BUILD_HALF),1) | |||||
SHBLASOBJS += $(SHKERNELOBJS) | SHBLASOBJS += $(SHKERNELOBJS) | ||||
endif | |||||
SBLASOBJS += $(SKERNELOBJS) | SBLASOBJS += $(SKERNELOBJS) | ||||
DBLASOBJS += $(DKERNELOBJS) | DBLASOBJS += $(DKERNELOBJS) | ||||
QBLASOBJS += $(QKERNELOBJS) | QBLASOBJS += $(QKERNELOBJS) | ||||
@@ -118,7 +122,10 @@ CBLASOBJS += $(CKERNELOBJS) | |||||
ZBLASOBJS += $(ZKERNELOBJS) | ZBLASOBJS += $(ZKERNELOBJS) | ||||
XBLASOBJS += $(XKERNELOBJS) | XBLASOBJS += $(XKERNELOBJS) | ||||
ifeq ($(BUILD_HALF),1) | |||||
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) | SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) | ||||
endif | |||||
SBLASOBJS += \ | SBLASOBJS += \ | ||||
sgemm_beta$(TSUFFIX).$(SUFFIX) \ | sgemm_beta$(TSUFFIX).$(SUFFIX) \ | ||||
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | ||||
@@ -408,11 +415,13 @@ ZBLASOBJS += \ | |||||
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | ||||
zgeadd_k$(TSUFFIX).$(SUFFIX) | zgeadd_k$(TSUFFIX).$(SUFFIX) | ||||
ifeq ($(BUILD_HALF), 1) | |||||
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
endif | |||||
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
@@ -438,8 +447,10 @@ XGEMMITCOPYOBJ_P = $(XGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||||
XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
ifeq ($(BUILD_HALF),1) | |||||
$(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | $(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | ||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | |||||
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | $(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | ||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -459,10 +470,14 @@ $(KDIR)zgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_BETA) | |||||
$(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) | $(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) | ||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ | ||||
ifeq ($(BUILD_HALF), 1) | |||||
$(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY) | $(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY) | ||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY) | $(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY) | ||||
ifeq ($(OS), AIX) | ifeq ($(OS), AIX) | ||||
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s | $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s | ||||
m4 shgemmotcopy.s > shgemmotcopy_nomacros.s | m4 shgemmotcopy.s > shgemmotcopy_nomacros.s | ||||
@@ -487,6 +502,7 @@ else | |||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
endif | |||||
endif | endif | ||||
$(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | ||||
@@ -646,6 +662,8 @@ else | |||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
ifeq ($(BUILD_HALF), 1) | |||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | $(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | ||||
ifeq ($(OS), AIX) | ifeq ($(OS), AIX) | ||||
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s | $(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s | ||||
@@ -655,6 +673,7 @@ ifeq ($(OS), AIX) | |||||
else | else | ||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
endif | |||||
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) | $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) | ||||
ifeq ($(OS), AIX) | ifeq ($(OS), AIX) | ||||
@@ -2272,8 +2291,10 @@ $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_ | |||||
$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | $(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | ||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
ifeq ($(BUILD_HALF),1) | |||||
$(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | $(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | ||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | |||||
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) | $(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) | ||||
$(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -2290,6 +2311,8 @@ $(KDIR)zgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMM_BETA) | |||||
$(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) | $(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA) | ||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@ | ||||
ifeq ($(BUILD_HALF), 1) | |||||
$(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY) | $(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY) | ||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -2304,6 +2327,8 @@ $(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY) | |||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
endif | |||||
$(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY) | $(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY) | ||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -2408,8 +2433,11 @@ endif | |||||
endif | endif | ||||
ifeq ($(BUILD_HALF), 1) | |||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | $(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | ||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | |||||
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) | $(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) | ||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -53,6 +53,7 @@ gotoblas_t TABLE_NAME = { | |||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | ||||
#ifdef BUILD_HALF | |||||
0, 0, 0, | 0, 0, 0, | ||||
SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N, | SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N, | ||||
#ifdef SHGEMM_DEFAULT_UNROLL_MN | #ifdef SHGEMM_DEFAULT_UNROLL_MN | ||||
@@ -109,7 +110,7 @@ gotoblas_t TABLE_NAME = { | |||||
#else | #else | ||||
NULL,NULL, | NULL,NULL, | ||||
#endif | #endif | ||||
#endif | |||||
0, 0, 0, | 0, 0, 0, | ||||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | ||||
@@ -706,19 +707,25 @@ gotoblas_t TABLE_NAME = { | |||||
#if defined(ARCH_ARM64) | #if defined(ARCH_ARM64) | ||||
static void init_parameter(void) { | static void init_parameter(void) { | ||||
#if defined(BUILD_HALF) | |||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | ||||
#endif | |||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | ||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | ||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | ||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | ||||
#if defined(BUILD_HALF) | |||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | ||||
#endif | |||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | ||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | ||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | ||||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | ||||
#if defined(BUILD_HALF) | |||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | ||||
#endif | |||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | ||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | ||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | ||||
@@ -782,20 +789,26 @@ static void init_parameter(void) { | |||||
#if defined(ARCH_POWER) | #if defined(ARCH_POWER) | ||||
static void init_parameter(void) { | static void init_parameter(void) { | ||||
#ifdef BUILD_HALF | |||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | ||||
#endif | |||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | ||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | ||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | ||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | ||||
#ifdef BUILD_HALF | |||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | ||||
#endif | |||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | ||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | ||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | ||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | ||||
#ifdef BUILD_HALF | |||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | ||||
#endif | |||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | ||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | ||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | ||||
@@ -805,20 +818,26 @@ static void init_parameter(void) { | |||||
#if defined(ARCH_ZARCH) | #if defined(ARCH_ZARCH) | ||||
static void init_parameter(void) { | static void init_parameter(void) { | ||||
#ifdef BUILD_HALF | |||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | ||||
#endif | |||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | ||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | ||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | ||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | ||||
#ifdef BUILD_HALF | |||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | ||||
#endif | |||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | ||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | ||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | ||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | ||||
#ifdef BUILD_HALF | |||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | ||||
#endif | |||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | ||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | ||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | ||||
@@ -958,9 +977,11 @@ static void init_parameter(void) { | |||||
(void) l2; /* dirty trick to suppress unused variable warning for targets */ | (void) l2; /* dirty trick to suppress unused variable warning for targets */ | ||||
/* where the GEMM unrolling parameters do not depend on l2 */ | /* where the GEMM unrolling parameters do not depend on l2 */ | ||||
#ifdef BUILD_HALF | |||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | ||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | ||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | ||||
#endif | |||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | ||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | ||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | ||||