Support building only a subset of variable typestags/v0.3.11^2
@@ -304,6 +304,18 @@ else | |||
endif | |||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1) | |||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_SINGLE), 1) | |||
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_DOUBLE), 1) | |||
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_COMPLEX), 1) | |||
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_COMPLEX16), 1) | |||
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | |||
@@ -9,7 +9,7 @@ ifndef TOPDIR | |||
TOPDIR = . | |||
endif | |||
# If ARCH is not set, we use the host system's architecture for getarch compile options. | |||
# If ARCH is not set, we use the host system's architecture for getarch compile options. | |||
ifndef ARCH | |||
HOSTARCH := $(shell uname -m) | |||
else | |||
@@ -73,6 +73,18 @@ endif | |||
# | |||
# Beginning of system configuration | |||
# | |||
ifneq ($(BUILD_SINGLE),1) | |||
ifneq ($(BUILD_DOUBLE),1) | |||
ifneq ($(BUILD_COMPLEX),1) | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
override BUILD_SINGLE=1 | |||
override BUILD_DOUBLE=1 | |||
override BUILD_COMPLEX=1 | |||
override BUILD_COMPLEX16=1 | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifndef HOSTCC | |||
HOSTCC = $(CC) | |||
@@ -1224,16 +1236,16 @@ ifeq ($(BUILD_HALF), 1) | |||
CCOMMON_OPT += -DBUILD_HALF | |||
endif | |||
ifeq ($(BUILD_SINGLE), 1) | |||
CCOMMON_OPT += -DBUILD_SINGLE | |||
CCOMMON_OPT += -DBUILD_SINGLE=1 | |||
endif | |||
ifeq ($(BUILD_DOUBLE), 1) | |||
CCOMMON_OPT += -DBUILD_DOUBLE | |||
CCOMMON_OPT += -DBUILD_DOUBLE=1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX), 1) | |||
CCOMMON_OPT += -DBUILD_COMPLEX | |||
CCOMMON_OPT += -DBUILD_COMPLEX=1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16), 1) | |||
CCOMMON_OPT += -DBUILD_COMPLEX16 | |||
CCOMMON_OPT += -DBUILD_COMPLEX16=1 | |||
endif | |||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\" | |||
@@ -11,8 +11,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) | |||
BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) | |||
BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS) | |||
BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P) | |||
ifdef EXPRECISION | |||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
@@ -1,11 +1,12 @@ | |||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. | |||
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F | |||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f | |||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f dlaset.f | |||
../INSTALL/ilaver.f xerbla_array.f | |||
../INSTALL/slamch.f) | |||
set(SCLAUX | |||
scombssq.f sbdsvdx.f sstevx.f sstein.f | |||
sbdsdc.f | |||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f | |||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f | |||
@@ -25,6 +26,7 @@ set(SCLAUX | |||
set(DZLAUX | |||
dbdsdc.f | |||
dbdsvdx.f dstevx.f dstein.f | |||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f | |||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f | |||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f | |||
@@ -35,14 +37,14 @@ set(DZLAUX | |||
dlartg.f dlaruv.f dlas2.f dlascl.f | |||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f | |||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f | |||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f | |||
dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f | |||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f | |||
dsteqr.f dsterf.f dlaisnan.f disnan.f | |||
dlartgp.f dlartgs.f | |||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f) | |||
set(SLASRC | |||
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | |||
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | |||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f | |||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f | |||
sgehd2.f sgehrd.f sgelq2.f sgelqf.f | |||
@@ -83,8 +85,8 @@ set(SLASRC | |||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f | |||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f | |||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f | |||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f | |||
sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f | |||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstev.f sstevd.f sstevr.f | |||
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f | |||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f | |||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f | |||
ssyswapr.f ssytrs.f ssytrs2.f | |||
@@ -116,7 +118,7 @@ set(SLASRC | |||
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f | |||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f | |||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f | |||
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f | |||
sgesvdq.f slaorhr_col_getrfnp.f | |||
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f ) | |||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f | |||
@@ -229,7 +231,7 @@ set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f | |||
cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f) | |||
set(DLASRC | |||
dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | |||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | |||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f | |||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f | |||
dgehd2.f dgehrd.f dgelq2.f dgelqf.f | |||
@@ -270,8 +272,8 @@ set(DLASRC | |||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f | |||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f | |||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f | |||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f | |||
dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f | |||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstev.f dstevd.f dstevr.f | |||
dsycon.f dsyev.f dsyevd.f dsyevr.f | |||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f | |||
dsysv.f dsysvx.f | |||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f | |||
@@ -474,12 +476,16 @@ endif() | |||
if(BUILD_COMPLEX) | |||
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX}) | |||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN}) | |||
message(STATUS "Building Complex Precision") | |||
message(STATUS "Building Single Precision Complex") | |||
endif() | |||
if(BUILD_COMPLEX16) | |||
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX}) | |||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN}) | |||
message(STATUS "Building Double Complex Precision") | |||
# for zlange/zlanhe | |||
if (NOT BUILD_DOUBLE) | |||
set (LA_REL_SRC ${LA_REL_SRC} dcombssq.f) | |||
endif () | |||
message(STATUS "Building Double Precision Complex") | |||
endif() | |||
# add lapack-netlib folder to the sources | |||
@@ -146,26 +146,34 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG); | |||
int (*shlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); | |||
#endif | |||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) | |||
int sgemm_p, sgemm_q, sgemm_r; | |||
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn; | |||
#endif | |||
int exclusive_cache; | |||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) | |||
float (*samax_k) (BLASLONG, float *, BLASLONG); | |||
float (*samin_k) (BLASLONG, float *, BLASLONG); | |||
float (*smax_k) (BLASLONG, float *, BLASLONG); | |||
float (*smin_k) (BLASLONG, float *, BLASLONG); | |||
BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG); | |||
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG); | |||
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG); | |||
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||
float (*snrm2_k) (BLASLONG, float *, BLASLONG); | |||
float (*sasum_k) (BLASLONG, float *, BLASLONG); | |||
#endif | |||
#ifdef BUILD_SINGLE | |||
float (*ssum_k) (BLASLONG, float *, BLASLONG); | |||
#endif | |||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) | |||
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); | |||
@@ -175,11 +183,15 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
#endif | |||
#ifdef BUILD_SINGLE | |||
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
#endif | |||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) | |||
#ifdef ARCH_X86_64 | |||
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); | |||
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K); | |||
@@ -193,7 +205,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||
int (*sgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); | |||
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); | |||
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); | |||
#endif | |||
#ifdef BUILD_SINGLE | |||
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); | |||
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); | |||
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); | |||
@@ -245,10 +258,14 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||
int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *); | |||
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); | |||
#endif | |||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
int dgemm_p, dgemm_q, dgemm_r; | |||
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn; | |||
#endif | |||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
double (*damax_k) (BLASLONG, double *, BLASLONG); | |||
double (*damin_k) (BLASLONG, double *, BLASLONG); | |||
double (*dmax_k) (BLASLONG, double *, BLASLONG); | |||
@@ -257,25 +274,37 @@ BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG); | |||
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG); | |||
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG); | |||
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); | |||
#endif | |||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
double (*dnrm2_k) (BLASLONG, double *, BLASLONG); | |||
double (*dasum_k) (BLASLONG, double *, BLASLONG); | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
double (*dsum_k) (BLASLONG, double *, BLASLONG); | |||
#endif | |||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
#endif | |||
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE) | |||
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
#endif | |||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); | |||
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
#endif | |||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG); | |||
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
@@ -283,7 +312,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); | |||
int (*dgemm_itcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); | |||
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); | |||
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); | |||
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); | |||
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); | |||
@@ -335,7 +365,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); | |||
int (*dneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *); | |||
int (*dlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *); | |||
#endif | |||
#ifdef EXPRECISION | |||
int qgemm_p, qgemm_q, qgemm_r; | |||
@@ -430,6 +460,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX | |||
int cgemm_p, cgemm_q, cgemm_r; | |||
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn; | |||
@@ -593,7 +624,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG); | |||
int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *); | |||
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
int zgemm_p, zgemm_q, zgemm_r; | |||
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn; | |||
@@ -757,6 +790,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG); | |||
int (*zneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *); | |||
int (*zlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *); | |||
#endif | |||
#ifdef EXPRECISION | |||
@@ -930,22 +964,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
void (*init)(void); | |||
int snum_opt, dnum_opt, qnum_opt; | |||
#ifdef BUILD_SINGLE | |||
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX | |||
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_SINGLE | |||
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | |||
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | |||
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | |||
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX | |||
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | |||
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | |||
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | |||
@@ -955,7 +1001,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | |||
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | |||
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
@@ -965,17 +1013,23 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_SINGLE | |||
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX | |||
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||
@@ -985,7 +1039,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||
@@ -995,12 +1051,20 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||
#endif | |||
#ifdef BUILD_SINGLE | |||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX | |||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); | |||
#endif | |||
} gotoblas_t; | |||
extern gotoblas_t *gotoblas; | |||
@@ -1021,19 +1085,23 @@ extern gotoblas_t *gotoblas; | |||
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn | |||
#endif | |||
#if defined (BUILD_SINGLE) | |||
#define SGEMM_P gotoblas -> sgemm_p | |||
#define SGEMM_Q gotoblas -> sgemm_q | |||
#define SGEMM_R gotoblas -> sgemm_r | |||
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m | |||
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n | |||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn | |||
#endif | |||
#if defined (BUILD_DOUBLE) | |||
#define DGEMM_P gotoblas -> dgemm_p | |||
#define DGEMM_Q gotoblas -> dgemm_q | |||
#define DGEMM_R gotoblas -> dgemm_r | |||
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m | |||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n | |||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn | |||
#endif | |||
#define QGEMM_P gotoblas -> qgemm_p | |||
#define QGEMM_Q gotoblas -> qgemm_q | |||
@@ -1042,19 +1110,39 @@ extern gotoblas_t *gotoblas; | |||
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n | |||
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn | |||
#ifdef BUILD_COMPLEX | |||
#define CGEMM_P gotoblas -> cgemm_p | |||
#define CGEMM_Q gotoblas -> cgemm_q | |||
#define CGEMM_R gotoblas -> cgemm_r | |||
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m | |||
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n | |||
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn | |||
#ifndef BUILD_SINGLE | |||
#define SGEMM_P gotoblas -> sgemm_p | |||
#define SGEMM_Q gotoblas -> sgemm_q | |||
#define SGEMM_R 1024 | |||
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m | |||
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n | |||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn | |||
#endif | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
#define ZGEMM_P gotoblas -> zgemm_p | |||
#define ZGEMM_Q gotoblas -> zgemm_q | |||
#define ZGEMM_R gotoblas -> zgemm_r | |||
#define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m | |||
#define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n | |||
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn | |||
#ifndef BUILD_DOUBLE | |||
#define DGEMM_P gotoblas -> dgemm_p | |||
#define DGEMM_Q gotoblas -> dgemm_q | |||
#define DGEMM_R 1024 | |||
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m | |||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n | |||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn | |||
#endif | |||
#endif | |||
#define XGEMM_P gotoblas -> xgemm_p | |||
#define XGEMM_Q gotoblas -> xgemm_q | |||
@@ -46,56 +46,155 @@ else | |||
all :: all1 all2 all3 | |||
endif | |||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | |||
ifeq ($(BUILD_SINGLE),1) | |||
all1targets += xscblat1 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
all1targets += xdcblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
all1targets += xccblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
all1targets += xzcblat1 | |||
endif | |||
all1: $(all1targets) | |||
ifndef CROSS | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(BUILD_SINGLE),1) | |||
OMP_NUM_THREADS=2 ./xscblat1 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OMP_NUM_THREADS=2 ./xdcblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OMP_NUM_THREADS=2 ./xccblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OMP_NUM_THREADS=2 ./xzcblat1 | |||
endif | |||
else | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./xscblat1 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./xdcblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=2 ./xccblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=2 ./xzcblat1 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
all2targets += xscblat2 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
all2targets += xdcblat2 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
all2targets += xccblat2 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
all2targets += xzcblat2 | |||
endif | |||
all2: $(all2targets) | |||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | |||
ifndef CROSS | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(BUILD_SINGLE),1) | |||
OMP_NUM_THREADS=2 ./xscblat2 < sin2 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OMP_NUM_THREADS=2 ./xdcblat2 < din2 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OMP_NUM_THREADS=2 ./xccblat2 < cin2 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OMP_NUM_THREADS=2 ./xzcblat2 < zin2 | |||
endif | |||
else | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
all3targets += xscblat3 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
all3targets += xdcblat3 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
all3targets += xccblat3 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
all3targets += xzcblat3 | |||
endif | |||
all3: $(all3targets) | |||
all3: xscblat3 xdcblat3 xccblat3 xzcblat3 | |||
ifndef CROSS | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(BUILD_SINGLE),1) | |||
OMP_NUM_THREADS=2 ./xscblat3 < sin3 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OMP_NUM_THREADS=2 ./xdcblat3 < din3 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OMP_NUM_THREADS=2 ./xccblat3 < cin3 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OMP_NUM_THREADS=2 ./xzcblat3 < zin3 | |||
endif | |||
else | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3 | |||
endif | |||
endif | |||
endif | |||
all3_3m: xzcblat3_3m xccblat3_3m | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(BUILD_SINGLE),1) | |||
OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m | |||
endif | |||
else | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m | |||
endif | |||
endif | |||
@@ -115,13 +214,19 @@ endif | |||
endif | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
# Single real | |||
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xscblat1 c_sblat1.o $(stestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
xscblat2: $(stestl2o) c_sblat2.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xscblat2 c_sblat2.o $(stestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
xscblat3: $(stestl3o) c_sblat3.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xscblat3 c_sblat3.o $(stestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
# Double real | |||
xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
@@ -129,7 +234,10 @@ xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
# Single complex | |||
xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
@@ -140,7 +248,10 @@ xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME) | |||
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
# Double complex | |||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
@@ -152,6 +263,6 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME) | |||
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
include $(TOPDIR)/Makefile.tail |
@@ -197,6 +197,19 @@ foreach (float_type ${FLOAT_TYPES}) | |||
endif () | |||
endforeach () | |||
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
if (USE_THREAD) | |||
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "SINGLE") | |||
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "SINGLE") | |||
endif () | |||
endif () | |||
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
if (USE_THREAD) | |||
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "DOUBLE") | |||
endif () | |||
endif () | |||
if (USE_THREAD) | |||
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) | |||
endif () | |||
@@ -417,19 +417,63 @@ XBLASOBJS += \ | |||
endif | |||
ifneq ($(BUILD_SINGLE),1) | |||
SBLASOBJS= | |||
ifeq ($(BUILD_DOUBLE),1) | |||
ifdef SMP | |||
SBLASOBJS += \ | |||
sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) \ | |||
strsv_NUU.$(SUFFIX) strsv_NUN.$(SUFFIX) strsv_NLU.$(SUFFIX) strsv_NLN.$(SUFFIX) \ | |||
strsv_TUU.$(SUFFIX) strsv_TUN.$(SUFFIX) strsv_TLU.$(SUFFIX) strsv_TLN.$(SUFFIX) | |||
endif | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
ifdef SMP | |||
SBLASOBJS = sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ifdef SMP | |||
DBLASOBJS = dgemv_thread_n.$(SUFFIX) dgemv_thread_t.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
ifneq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS= | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
CBLASOBJS= \ | |||
ctrsv_NUU.$(SUFFIX) ctrsv_NUN.$(SUFFIX) ctrsv_NLU.$(SUFFIX) ctrsv_NLN.$(SUFFIX) \ | |||
ctrsv_TUU.$(SUFFIX) ctrsv_TUN.$(SUFFIX) ctrsv_TLU.$(SUFFIX) ctrsv_TLN.$(SUFFIX) \ | |||
ctrsv_RUU.$(SUFFIX) ctrsv_RUN.$(SUFFIX) ctrsv_RLU.$(SUFFIX) ctrsv_RLN.$(SUFFIX) \ | |||
ctrsv_CUU.$(SUFFIX) ctrsv_CUN.$(SUFFIX) ctrsv_CLU.$(SUFFIX) ctrsv_CLN.$(SUFFIX) | |||
endif | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
all :: | |||
ifeq ($(BUILD_SINGLE),1) | |||
sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< | |||
sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c | |||
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< | |||
dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c | |||
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< | |||
endif | |||
qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< | |||
@@ -437,6 +481,8 @@ qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c | |||
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< | |||
ifeq ($(BUILD_COMPLEX),1) | |||
cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c | |||
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
@@ -460,6 +506,9 @@ cgbmv_s.$(SUFFIX) cgbmv_s.$(PSUFFIX) : zgbmv_k.c | |||
cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c | |||
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c | |||
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
@@ -484,6 +533,7 @@ zgbmv_s.$(SUFFIX) zgbmv_s.$(PSUFFIX) : zgbmv_k.c | |||
zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c | |||
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
endif | |||
xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c | |||
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
@@ -509,24 +559,34 @@ xgbmv_s.$(SUFFIX) xgbmv_s.$(PSUFFIX) : zgbmv_k.c | |||
xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c | |||
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
ifeq ($(BUILD_SINGLE),1) | |||
sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< | |||
sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< | |||
dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< | |||
endif | |||
qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< | |||
qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< | |||
ifeq ($(BUILD_COMPLEX),1) | |||
cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
@@ -550,6 +610,10 @@ cgbmv_thread_s.$(SUFFIX) cgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c | |||
cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
@@ -574,6 +638,7 @@ zgbmv_thread_s.$(SUFFIX) zgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c | |||
zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
endif | |||
xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
@@ -599,24 +664,32 @@ xgbmv_thread_s.$(SUFFIX) xgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c | |||
xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c | |||
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | |||
sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
endif | |||
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
endif | |||
qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
@@ -640,6 +713,10 @@ cgemv_thread_s.$(SUFFIX) cgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common. | |||
cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
@@ -664,6 +741,7 @@ zgemv_thread_s.$(SUFFIX) zgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common. | |||
zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) | |||
endif | |||
xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
@@ -14,6 +14,24 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
endif () | |||
endforeach () | |||
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) | |||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE") | |||
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE") | |||
endif() | |||
endforeach() | |||
endif() | |||
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) | |||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE") | |||
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE") | |||
endif() | |||
endforeach() | |||
endif() | |||
set(TRMM_TRSM_SOURCES | |||
trmm_L.c | |||
@@ -100,7 +118,24 @@ foreach (float_type ${FLOAT_TYPES}) | |||
endif() | |||
endif () | |||
endforeach () | |||
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) | |||
string(TOLOWER ${gemm_define} gemm_define_LC) | |||
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "DOUBLE" ) | |||
endif() | |||
endforeach() | |||
endif () | |||
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) | |||
string(TOLOWER ${gemm_define} gemm_define_LC) | |||
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "SINGLE" ) | |||
endif() | |||
endforeach() | |||
endif () | |||
# for gemm3m | |||
if(USE_GEMM3M) | |||
foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
@@ -287,6 +287,60 @@ HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \ | |||
dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | |||
endif | |||
ifneq ($(BUILD_SINGLE),1) | |||
SBLASOBJS= | |||
ifeq ($(BUILD_DOUBLE),1) | |||
SBLASOBJS= \ | |||
strsm_LNUU.$(SUFFIX) strsm_LNUN.$(SUFFIX) strsm_LNLU.$(SUFFIX) strsm_LNLN.$(SUFFIX) \ | |||
strsm_LTUU.$(SUFFIX) strsm_LTUN.$(SUFFIX) strsm_LTLU.$(SUFFIX) strsm_LTLN.$(SUFFIX) \ | |||
strsm_RNUU.$(SUFFIX) strsm_RNUN.$(SUFFIX) strsm_RNLU.$(SUFFIX) strsm_RNLN.$(SUFFIX) \ | |||
strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \ | |||
ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \ | |||
ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX) | |||
ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX) | |||
endif | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) | |||
ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX) | |||
ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
ifneq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS= | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
CBLASOBJS= \ | |||
cherk_UN.$(SUFFIX) cherk_UC.$(SUFFIX) cherk_LN.$(SUFFIX) cherk_LC.$(SUFFIX) \ | |||
cherk_kernel_UN.$(SUFFIX) cherk_kernel_UC.$(SUFFIX) \ | |||
cherk_kernel_LN.$(SUFFIX) cherk_kernel_LC.$(SUFFIX) \ | |||
ctrsm_LNUU.$(SUFFIX) ctrsm_LNUN.$(SUFFIX) ctrsm_LNLU.$(SUFFIX) ctrsm_LNLN.$(SUFFIX) \ | |||
ctrsm_LTUU.$(SUFFIX) ctrsm_LTUN.$(SUFFIX) ctrsm_LTLU.$(SUFFIX) ctrsm_LTLN.$(SUFFIX) \ | |||
ctrsm_LRUU.$(SUFFIX) ctrsm_LRUN.$(SUFFIX) ctrsm_LRLU.$(SUFFIX) ctrsm_LRLN.$(SUFFIX) \ | |||
ctrsm_LCUU.$(SUFFIX) ctrsm_LCUN.$(SUFFIX) ctrsm_LCLU.$(SUFFIX) ctrsm_LCLN.$(SUFFIX) \ | |||
ctrsm_RNUU.$(SUFFIX) ctrsm_RNUN.$(SUFFIX) ctrsm_RNLU.$(SUFFIX) ctrsm_RNLN.$(SUFFIX) \ | |||
ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \ | |||
ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \ | |||
ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX) | |||
ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
all :: | |||
shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
@@ -56,12 +56,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||
if (!(mode & BLAS_COMPLEX)) { | |||
switch (mode & BLAS_PREC) { | |||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) | |||
case BLAS_SINGLE: | |||
mask = SGEMM_UNROLL_MN - 1; | |||
break; | |||
#endif | |||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
case BLAS_DOUBLE: | |||
mask = DGEMM_UNROLL_MN - 1; | |||
break; | |||
#endif | |||
#ifdef EXPRECISION | |||
case BLAS_XDOUBLE: | |||
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; | |||
@@ -70,12 +74,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||
} | |||
} else { | |||
switch (mode & BLAS_PREC) { | |||
#ifdef BUILD_COMPLEX | |||
case BLAS_SINGLE: | |||
mask = CGEMM_UNROLL_MN - 1; | |||
break; | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
case BLAS_DOUBLE: | |||
mask = ZGEMM_UNROLL_MN - 1; | |||
break; | |||
#endif | |||
#ifdef EXPRECISION | |||
case BLAS_XDOUBLE: | |||
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; | |||
@@ -459,13 +459,16 @@ blas_queue_t *tscq; | |||
} else | |||
#endif | |||
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) { | |||
#ifdef BUILD_DOUBLE | |||
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
#endif | |||
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { | |||
#ifdef BUILD_SINGLE | |||
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
} else { | |||
#endif | |||
} else { | |||
/* Other types in future */ | |||
} | |||
} else { | |||
@@ -476,11 +479,15 @@ blas_queue_t *tscq; | |||
} else | |||
#endif | |||
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ | |||
#ifdef BUILD_COMPLEX16 | |||
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
#endif | |||
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { | |||
#ifdef BUILD_COMPLEX | |||
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
#endif | |||
} else { | |||
/* Other types in future */ | |||
} | |||
@@ -315,12 +315,15 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ | |||
} else | |||
#endif | |||
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ | |||
#if defined ( BUILD_DOUBLE) || defined (BUILD_COMPLEX16) | |||
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
#endif | |||
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE){ | |||
#if defined (BUILD_SINGLE) || defined (BUILD_COMPLEX) | |||
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
#endif | |||
} else { | |||
/* Other types in future */ | |||
} | |||
@@ -332,15 +335,24 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ | |||
} else | |||
#endif | |||
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ | |||
#ifdef BUILD_COMPLEX16 | |||
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
#else | |||
fprintf(stderr,"UNHANDLED COMPLEX16\n"); | |||
#endif | |||
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { | |||
#ifdef BUILD_COMPLEX | |||
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) | |||
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
#else | |||
fprintf(stderr,"UNHANDLED COMPLEX\n"); | |||
#endif | |||
} else { | |||
/* Other types in future */ | |||
} | |||
} | |||
if (!sb) fprintf(stderr,"SB not declared!!!\n"); | |||
queue->sb=sb; | |||
} | |||
} | |||
@@ -2201,11 +2201,17 @@ static void *alloc_mmap(void *address){ | |||
#endif | |||
#endif | |||
allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||
start = (BLASULONG)map_address; | |||
current = (SCALING - 1) * BUFFER_SIZE; | |||
#ifdef BUILD_DOUBLE | |||
allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||
#elif defined(BUILD_COMPLEX16) | |||
allocsize = ZGEMM_P * ZGEMM_Q * sizeof(double); | |||
#elif defined(BUILD_COMPLEX) | |||
allocsize = CGEMM_P * CGEMM_Q * sizeof(double); | |||
#else | |||
allocsize = SGEMM_P * SGEMM_Q * sizeof(double); | |||
#endif | |||
start = (BLASULONG)map_address; | |||
current = (SCALING - 1) * BUFFER_SIZE; | |||
while(current > 0) { | |||
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE; | |||
@@ -33,6 +33,18 @@ endif | |||
ifndef BUILD_HALF | |||
BUILD_HALF = 0 | |||
endif | |||
ifndef BUILD_SINGLE | |||
BUILD_SINGLE = 0 | |||
endif | |||
ifndef BUILD_DOUBLE | |||
BUILD_DOUBLE = 0 | |||
endif | |||
ifndef BUILD_COMPLEX | |||
BUILD_COMPLEX = 0 | |||
endif | |||
ifndef BUILD_COMPLEX16 | |||
BUILD_COMPLEX16 = 0 | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
@@ -108,10 +120,10 @@ dll : ../$(LIBDLLNAME) | |||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) | |||
$(LIBPREFIX).def : gensymbol | |||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
libgoto_hpl.def : gensymbol | |||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
ifeq ($(OSNAME), Darwin) | |||
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib | |||
@@ -246,23 +258,23 @@ static : ../$(LIBNAME) | |||
rm -f goto.$(SUFFIX) | |||
osx.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
aix.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
objcopy.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
objconv.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
test : linktest.c | |||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | |||
rm -f linktest | |||
linktest.c : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c | |||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c | |||
clean :: | |||
@rm -f *.def *.dylib __.SYMDEF* *.renamed | |||
@@ -83,8 +83,12 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) | |||
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||
#sdsdot, dsdot | |||
if (BUILD_SINGLE OR BUILD_DOUBLE) | |||
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||
endif () | |||
if (BUILD_DOUBLE) | |||
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||
endif () | |||
# trmm is trsm with a compiler flag set | |||
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) | |||
@@ -167,4 +171,31 @@ if (NOT DEFINED NO_LAPACK) | |||
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) | |||
endif () | |||
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "SINGLE") | |||
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "SINGLE") | |||
endif () | |||
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "DOUBLE") | |||
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "DOUBLE") | |||
endif () | |||
add_library(interface OBJECT ${OPENBLAS_SRC}) |
@@ -329,7 +329,10 @@ CCBLAS3OBJS = \ | |||
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ | |||
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ | |||
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ | |||
cblas_cgeadd.$(SUFFIX) cblas_xerbla.$(SUFFIX) | |||
cblas_cgeadd.$(SUFFIX) | |||
CXERBLAOBJ = \ | |||
cblas_xerbla.$(SUFFIX) | |||
@@ -391,6 +394,8 @@ ZBLAS2OBJS += $(CZBLAS2OBJS) | |||
ZBLAS3OBJS += $(CZBLAS3OBJS) | |||
SHEXTOBJS += $(CSHEXTOBJS) | |||
CBAUXOBJS += $(CXERBLAOBJ) | |||
endif | |||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | |||
@@ -434,13 +439,11 @@ QLAPACKOBJS = \ | |||
# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | |||
# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) | |||
CLAPACKOBJS = \ | |||
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ | |||
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | |||
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) | |||
#ZLAPACKOBJS = \ | |||
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ | |||
# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ | |||
@@ -469,8 +472,42 @@ ZBLASOBJS += $(ZLAPACKOBJS) | |||
endif | |||
FUNCOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) | |||
ifneq ($(BUILD_SINGLE),1) | |||
SBLASOBJS= | |||
ifeq ($(BUILD_DOUBLE),1) | |||
SBLASOBJS = dsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) strsm.$(SUFFIX) \ | |||
sgetrs.$(SUFFIX) sgetrf.$(SUFFIX) spotf2.$(SUFFIX) spotrf.$(SUFFIX) \ | |||
ssyrk.$(SUFFIX) sgemv.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
SBLASOBJS = \ | |||
sdot.$(SUFFIX) srot.$(SUFFIX) snrm2.$(SUFFIX) sswap.$(SUFFIX) \ | |||
isamax.$(SUFFIX) saxpy.$(SUFFIX) sscal.$(SUFFIX) scopy.$(SUFFIX) \ | |||
sgemv.$(SUFFIX) sgemm.$(SUFFIX) | |||
endif | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
DBLASOBJS = \ | |||
ddot.$(SUFFIX) drot.$(SUFFIX) dnrm2.$(SUFFIX) dswap.$(SUFFIX) \ | |||
idamax.$(SUFFIX) daxpy.$(SUFFIX) dscal.$(SUFFIX) dcopy.$(SUFFIX) \ | |||
dgemv.$(SUFFIX) dgemm.$(SUFFIX) | |||
endif | |||
endif | |||
ifneq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS= | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
CBLASOBJS = cgetrs.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) cgetrf.$(SUFFIX) \ | |||
cpotrf.$(SUFFIX) ctrsm.$(SUFFIX) cblas_cdotc_sub.$(SUFFIX) | |||
endif | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
FUNCOBJS = $(SHEXTOBJS) $(CXERBLAOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) | |||
$(info FUNCOBJS = {[$(FUNCOBJS)]} ) | |||
ifdef EXPRECISION | |||
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
endif | |||
@@ -481,6 +518,7 @@ endif | |||
FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) | |||
include $(TOPDIR)/Makefile.tail | |||
all :: libs | |||
@@ -503,11 +541,14 @@ level1 : $(BEXTOBJS) $(SHBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $( | |||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||
level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
aux : $(CBAUXOBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
$(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | |||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS | |||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS | |||
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
@@ -2268,3 +2309,4 @@ cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c | |||
cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c | |||
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
@@ -91,6 +91,59 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") | |||
if ((BUILD_COMPLEX OR BUILD_DOUBLE) AND NOT BUILD_SINGLE) | |||
GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE") | |||
if (DEFINED SMAXKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE") | |||
endif () | |||
if (DEFINED SMINKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE") | |||
endif () | |||
if (DEFINED ISMINKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE") | |||
endif () | |||
if (DEFINED ISMAXKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") | |||
endif () | |||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") | |||
if (DEFINED DMAXKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") | |||
endif () | |||
if (DEFINED DMINKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") | |||
endif () | |||
if (DEFINED IDMINKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") | |||
endif () | |||
if (DEFINED IDMAXKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | |||
endif () | |||
# Makefile.L2 | |||
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) | |||
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) | |||
@@ -124,7 +177,14 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) | |||
endif () | |||
endforeach () | |||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") | |||
endif () | |||
if (BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") | |||
endif () | |||
# Makefile.L3 | |||
set(USE_TRMM false) | |||
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE)) | |||
@@ -159,6 +219,38 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) | |||
endforeach() | |||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") | |||
if (DGEMMINCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") | |||
endif () | |||
if (DGEMMITCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") | |||
endif () | |||
if (DGEMMONCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") | |||
endif () | |||
if (DGEMMOTCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") | |||
endif () | |||
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE") | |||
if (SGEMMINCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
if (SGEMMITCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
if (SGEMMONCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
if (SGEMMOTCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE") | |||
endif () | |||
foreach (float_type ${FLOAT_TYPES}) | |||
string(SUBSTRING ${float_type} 0 1 float_char) | |||
@@ -499,7 +591,31 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
#geadd | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) | |||
endforeach () | |||
if (BUILD_DOUBLE AND NOT BUILD_SINGLE) | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE") | |||
endif () | |||
# Makefile.LA | |||
if(NOT NO_LAPACK) | |||
@@ -526,6 +642,28 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type}) | |||
endforeach() | |||
if (BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
if (NOT DEFINED SNEG_TCOPY) | |||
set(SNEG_TCOPY ../generic/neg_tcopy.c) | |||
endif () | |||
if (NOT DEFINED SLASWP_NCOPY) | |||
set(SLASWP_NCOPY ../generic/laswp_ncopy.c) | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}_${SGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}_${SGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "SINGLE") | |||
endif() | |||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
if (NOT DEFINED DNEG_TCOPY) | |||
set(DNEG_TCOPY ../generic/neg_tcopy.c) | |||
endif () | |||
if (NOT DEFINED DLASWP_NCOPY) | |||
set(DLASWP_NCOPY ../generic/laswp_ncopy.c) | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "DOUBLE") | |||
endif() | |||
endif() | |||
if (${DYNAMIC_ARCH}) | |||
@@ -557,8 +695,147 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) | |||
endforeach () | |||
if (BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") | |||
GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") | |||
endif () | |||
if (BUILD_DOUBLE AND NOT BUILD_SINGLE) | |||
GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "SINGLE") | |||
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "SINGLE") | |||
if (SGEMMINCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
if (SGEMMITCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
if (SGEMMONCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
if (SGEMMOTCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") | |||
endif () | |||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") | |||
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") | |||
endif () | |||
if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX) | |||
GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX") | |||
if (DEFINED CMAXKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${CMAXKERNEL}" "" "max_k" false "" "" false "COMPLEX") | |||
endif () | |||
if (DEFINED CMINKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${CMINKERNEL}" "USE_MIN" "min_k" false "" "" false "COMPLEX") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${ICAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${ICAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "COMPLEX") | |||
if (DEFINED ICMAXKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${ICMAXKERNEL}" "" "i*max_k" false "" "" false "COMPLEX") | |||
endif () | |||
if (DEFINED ICMINKERNEL) | |||
GenerateNamedObjects("${KERNELDIR}/${ICMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "COMPLEX") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${CASUMKERNEL}" "" "asum_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "" "axpy_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CNRM2KERNEL}" "" "nrm2_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CROTKERNEL}" "" "rot_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CSCALKERNEL}" "" "scal_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CSWAPKERNEL}" "" "swap_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CAXPBYKERNEL}" "" "axpby_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CSUMKERNEL}" "" "sum_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "" "dotu_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "CONJ" "dotc_k" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "" "gemv_n" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false "COMPLEX") | |||
if (CGEMMINCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMINCOPY}" "COMPLEX" "${CGEMMINCOPYOBJ}" false "" "" true "COMPLEX") | |||
endif () | |||
if (CGEMMITCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMITCOPY}" "COMPLEX" "${CGEMMITCOPYOBJ}" false "" "" true "COMPLEX") | |||
endif () | |||
if (CGEMMONCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMONCOPY}" "COMPLEX" "${CGEMMONCOPYOBJ}" false "" "" true "COMPLEX") | |||
endif () | |||
if (CGEMMOTCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX") | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "COMPLEX") | |||
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
endif () | |||
endif () | |||
add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) | |||
set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}") | |||
@@ -573,7 +850,7 @@ if (${DYNAMIC_ARCH}) | |||
set(BUILD_KERNEL 1) | |||
set(KDIR "") | |||
set(TSUFFIX "_${TARGET_CORE}") | |||
set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") | |||
set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") | |||
build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") | |||
set(ADD_COMMONOBJS 0) | |||
endforeach() | |||
@@ -186,31 +186,46 @@ ifndef XHEMV_M_KERNEL | |||
XHEMV_M_KERNEL = ../generic/zhemv_k.c | |||
endif | |||
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" | |||
SBLASOBJS += \ | |||
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \ | |||
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
SBLASOBJS += \ | |||
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \ | |||
sger_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS += \ | |||
dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \ | |||
dger_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
QBLASOBJS += \ | |||
qgemv_n$(TSUFFIX).$(SUFFIX) qgemv_t$(TSUFFIX).$(SUFFIX) qsymv_U$(TSUFFIX).$(SUFFIX) qsymv_L$(TSUFFIX).$(SUFFIX) \ | |||
qger_k$(TSUFFIX).$(SUFFIX) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
SBLASOBJS += \ | |||
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) | |||
CBLASOBJS += \ | |||
cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) \ | |||
csymv_U$(TSUFFIX).$(SUFFIX) csymv_L$(TSUFFIX).$(SUFFIX) \ | |||
chemv_U$(TSUFFIX).$(SUFFIX) chemv_L$(TSUFFIX).$(SUFFIX) chemv_V$(TSUFFIX).$(SUFFIX) chemv_M$(TSUFFIX).$(SUFFIX) \ | |||
cgeru_k$(TSUFFIX).$(SUFFIX) cgerc_k$(TSUFFIX).$(SUFFIX) cgerv_k$(TSUFFIX).$(SUFFIX) cgerd_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
CBLASOBJS += \ | |||
cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) | |||
DBLASOBJS += \ | |||
dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) | |||
ZBLASOBJS += \ | |||
zgemv_n$(TSUFFIX).$(SUFFIX) zgemv_t$(TSUFFIX).$(SUFFIX) zgemv_r$(TSUFFIX).$(SUFFIX) zgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
zgemv_o$(TSUFFIX).$(SUFFIX) zgemv_u$(TSUFFIX).$(SUFFIX) zgemv_s$(TSUFFIX).$(SUFFIX) zgemv_d$(TSUFFIX).$(SUFFIX) \ | |||
zsymv_U$(TSUFFIX).$(SUFFIX) zsymv_L$(TSUFFIX).$(SUFFIX) \ | |||
zhemv_U$(TSUFFIX).$(SUFFIX) zhemv_L$(TSUFFIX).$(SUFFIX) zhemv_V$(TSUFFIX).$(SUFFIX) zhemv_M$(TSUFFIX).$(SUFFIX) \ | |||
zgeru_k$(TSUFFIX).$(SUFFIX) zgerc_k$(TSUFFIX).$(SUFFIX) zgerv_k$(TSUFFIX).$(SUFFIX) zgerd_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
XBLASOBJS += \ | |||
xgemv_n$(TSUFFIX).$(SUFFIX) xgemv_t$(TSUFFIX).$(SUFFIX) xgemv_r$(TSUFFIX).$(SUFFIX) xgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
@@ -219,17 +234,21 @@ XBLASOBJS += \ | |||
xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \ | |||
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX) | |||
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" | |||
$(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@ | |||
$(KDIR)sgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@ | |||
endif | |||
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
$(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@ | |||
$(KDIR)dgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@ | |||
endif | |||
$(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL) | |||
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@ | |||
@@ -237,6 +256,8 @@ $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)qgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVTKERNEL) | |||
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@ | |||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
$(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ | |||
@@ -260,6 +281,10 @@ $(KDIR)cgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE | |||
$(KDIR)cgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
$(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ | |||
@@ -284,6 +309,7 @@ $(KDIR)zgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE | |||
$(KDIR)zgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ | |||
endif | |||
$(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL) | |||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ | |||
@@ -309,17 +335,25 @@ $(KDIR)xgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE | |||
$(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVTKERNEL) | |||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ | |||
ifeq ($(BUILD_SINGLE),1) | |||
$(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@ | |||
$(KDIR)ssymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_L_KERNEL) $(SSYMV_L_PARAM) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@ | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
$(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@ | |||
$(KDIR)dsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_L_KERNEL) $(DSYMV_L_PARAM) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@ | |||
endif | |||
$(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@ | |||
@@ -327,17 +361,23 @@ $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)qsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_L_KERNEL) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@ | |||
ifeq ($(BUILD_COMPLEX),1) | |||
$(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@ | |||
$(KDIR)csymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_L_KERNEL) $(CSYMV_L_PARAM) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@ | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
$(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@ | |||
$(KDIR)zsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_L_KERNEL) $(ZSYMV_L_PARAM) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@ | |||
endif | |||
$(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@ | |||
@@ -345,15 +385,23 @@ $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)xsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_L_KERNEL) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@ | |||
ifeq ($(BUILD_SINGLE),1) | |||
$(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM) | |||
$(CC) -c $(CFLAGS) -UDOUBLE $< -o $@ | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
$(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM) | |||
$(CC) -c $(CFLAGS) -DDOUBLE $< -o $@ | |||
endif | |||
$(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM) | |||
$(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@ | |||
ifeq ($(BUILD_COMPLEX),1) | |||
$(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM) | |||
$(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@ | |||
@@ -365,6 +413,9 @@ $(KDIR)cgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)cgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERCKERNEL) $(CGERPARAM) | |||
$(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@ | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
$(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM) | |||
$(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@ | |||
@@ -377,6 +428,7 @@ $(KDIR)zgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)zgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERCKERNEL) $(ZGERPARAM) | |||
$(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@ | |||
endif | |||
$(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM) | |||
$(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@ | |||
@@ -390,6 +442,8 @@ $(KDIR)xgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM) | |||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@ | |||
ifeq ($(BUILD_COMPLEX),1) | |||
$(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@ | |||
@@ -401,6 +455,9 @@ $(KDIR)chemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_V$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)chemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_M_KERNEL) $(CHEMV_L_PARAM) ../symcopy.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
$(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@ | |||
@@ -413,7 +470,7 @@ $(KDIR)zhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)zhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_M_KERNEL) $(ZHEMV_L_PARAM) ../symcopy.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ | |||
endif | |||
$(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@ | |||
@@ -426,3 +483,4 @@ $(KDIR)xhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER | |||
$(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ | |||
@@ -100,8 +100,10 @@ SHKERNELOBJS += \ | |||
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) | |||
endif | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | |||
SKERNELOBJS += \ | |||
sgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
sgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
$(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ | |||
$(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ) | |||
@@ -110,28 +112,36 @@ SKERNELOBJS += \ | |||
sgemm_direct$(TSUFFIX).$(SUFFIX) \ | |||
sgemm_direct_performant$(TSUFFIX).$(SUFFIX) | |||
endif | |||
endif | |||
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
DKERNELOBJS += \ | |||
dgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
dgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
$(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ | |||
$(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) | |||
endif | |||
QKERNELOBJS += \ | |||
qgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
$(QGEMMINCOPYOBJ) $(QGEMMITCOPYOBJ) \ | |||
$(QGEMMONCOPYOBJ) $(QGEMMOTCOPYOBJ) | |||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CKERNELOBJS += \ | |||
cgemm_kernel_n$(TSUFFIX).$(SUFFIX) cgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ | |||
cgemm_kernel_l$(TSUFFIX).$(SUFFIX) cgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ | |||
$(CGEMMINCOPYOBJ) $(CGEMMITCOPYOBJ) \ | |||
$(CGEMMONCOPYOBJ) $(CGEMMOTCOPYOBJ) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZKERNELOBJS += \ | |||
zgemm_kernel_n$(TSUFFIX).$(SUFFIX) zgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ | |||
zgemm_kernel_l$(TSUFFIX).$(SUFFIX) zgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ | |||
$(ZGEMMINCOPYOBJ) $(ZGEMMITCOPYOBJ) \ | |||
$(ZGEMMONCOPYOBJ) $(ZGEMMOTCOPYOBJ) | |||
endif | |||
XKERNELOBJS += \ | |||
xgemm_kernel_n$(TSUFFIX).$(SUFFIX) xgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ | |||
@@ -153,38 +163,48 @@ ifeq ($(BUILD_HALF),1) | |||
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS += \ | |||
sgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
strmm_kernel_RN$(TSUFFIX).$(SUFFIX) strmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
strsm_kernel_LN$(TSUFFIX).$(SUFFIX) strsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS += \ | |||
dgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
endif | |||
QBLASOBJS += \ | |||
qgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS += \ | |||
cgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS += \ | |||
cgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS += \ | |||
zgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
@@ -194,7 +214,8 @@ ZBLASOBJS += \ | |||
ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
endif | |||
XBLASOBJS += \ | |||
xgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
@@ -205,7 +226,7 @@ XBLASOBJS += \ | |||
xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
ifeq ($(USE_GEMM3M), 1) | |||
@@ -215,6 +236,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
SBLASOBJS += \ | |||
strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -223,7 +245,10 @@ SBLASOBJS += \ | |||
strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS += \ | |||
strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -231,10 +256,15 @@ SBLASOBJS += \ | |||
strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
SBLASOBJS += \ | |||
ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS += \ | |||
dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -254,6 +284,7 @@ DBLASOBJS += \ | |||
dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
QBLASOBJS += \ | |||
qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -273,8 +304,9 @@ QBLASOBJS += \ | |||
qtrsm_outucopy$(TSUFFIX).$(SUFFIX) qtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
qsymm_iutcopy$(TSUFFIX).$(SUFFIX) qsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS += \ | |||
ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -284,6 +316,13 @@ CBLASOBJS += \ | |||
ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_outucopy$(TSUFFIX).$(SUFFIX) ctrmm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS += \ | |||
ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -291,12 +330,10 @@ CBLASOBJS += \ | |||
ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_outucopy$(TSUFFIX).$(SUFFIX) ctrsm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS += \ | |||
ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -318,6 +355,7 @@ ZBLASOBJS += \ | |||
zsymm_outcopy$(TSUFFIX).$(SUFFIX) zsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
zhemm_iutcopy$(TSUFFIX).$(SUFFIX) zhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
zhemm_outcopy$(TSUFFIX).$(SUFFIX) zhemm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
XBLASOBJS += \ | |||
xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
@@ -343,6 +381,7 @@ XBLASOBJS += \ | |||
ifeq ($(USE_GEMM3M), 1) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS += \ | |||
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ | |||
@@ -362,7 +401,9 @@ CBLASOBJS += \ | |||
chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ | |||
chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ | |||
chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS += \ | |||
zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ | |||
@@ -382,6 +423,7 @@ ZBLASOBJS += \ | |||
zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ | |||
zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ | |||
zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) | |||
endif | |||
XBLASOBJS += \ | |||
xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
@@ -406,20 +448,25 @@ XBLASOBJS += \ | |||
endif | |||
###### BLAS extensions ##### | |||
ifeq ($(BUILD_SINGLE),1) | |||
SBLASOBJS += \ | |||
somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
sgeadd_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS += \ | |||
domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
dgeadd_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS += \ | |||
comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
@@ -430,7 +477,9 @@ CBLASOBJS += \ | |||
cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ | |||
cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | |||
cgeadd_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS += \ | |||
zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
@@ -441,6 +490,7 @@ ZBLASOBJS += \ | |||
zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ | |||
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | |||
zgeadd_k$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_HALF), 1) | |||
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
@@ -114,6 +114,7 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
#endif | |||
#if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | |||
0, 0, 0, | |||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | |||
#ifdef SGEMM_DEFAULT_UNROLL_MN | |||
@@ -121,7 +122,7 @@ gotoblas_t TABLE_NAME = { | |||
#else | |||
MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), | |||
#endif | |||
#endif | |||
#ifdef HAVE_EXCLUSIVE_CACHE | |||
1, | |||
@@ -129,19 +130,38 @@ gotoblas_t TABLE_NAME = { | |||
0, | |||
#endif | |||
#if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
samax_kTS, samin_kTS, smax_kTS, smin_kTS, | |||
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | |||
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS, | |||
dsdot_kTS, | |||
srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||
sgemv_nTS, sgemv_tTS, sger_kTS, | |||
snrm2_kTS, sasum_kTS, | |||
#endif | |||
#if BUILD_SINGLE == 1 | |||
ssum_kTS, | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
scopy_kTS, sdot_kTS, | |||
// dsdot_kTS, | |||
srot_kTS, saxpy_kTS, | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | |||
sscal_kTS, | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
sswap_kTS, | |||
sgemv_nTS, sgemv_tTS, | |||
#endif | |||
#if BUILD_SINGLE == 1 | |||
sger_kTS, | |||
ssymv_LTS, ssymv_UTS, | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
#ifdef ARCH_X86_64 | |||
sgemm_directTS, | |||
sgemm_direct_performantTS, | |||
#endif | |||
sgemm_kernelTS, sgemm_betaTS, | |||
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N | |||
sgemm_incopyTS, sgemm_itcopyTS, | |||
@@ -149,6 +169,9 @@ gotoblas_t TABLE_NAME = { | |||
sgemm_oncopyTS, sgemm_otcopyTS, | |||
#endif | |||
sgemm_oncopyTS, sgemm_otcopyTS, | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) | |||
strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS, | |||
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N | |||
strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS, | |||
@@ -159,6 +182,8 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS, | |||
strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS, | |||
#endif | |||
#if BUILD_SINGLE == 1 | |||
strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS, | |||
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N | |||
strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS, | |||
@@ -175,13 +200,16 @@ gotoblas_t TABLE_NAME = { | |||
ssymm_outcopyTS, ssymm_oltcopyTS, | |||
#endif | |||
ssymm_outcopyTS, ssymm_oltcopyTS, | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) | |||
#ifndef NO_LAPACK | |||
sneg_tcopyTS, slaswp_ncopyTS, | |||
#else | |||
NULL,NULL, | |||
#endif | |||
#endif | |||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
0, 0, 0, | |||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, | |||
#ifdef DGEMM_DEFAULT_UNROLL_MN | |||
@@ -189,14 +217,36 @@ gotoblas_t TABLE_NAME = { | |||
#else | |||
MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||
#endif | |||
#endif | |||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, | |||
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, | |||
dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS, | |||
drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS, | |||
dgemv_nTS, dgemv_tTS, dger_kTS, | |||
dnrm2_kTS, dasum_kTS, | |||
#endif | |||
#if (BUILD_DOUBLE==1) | |||
dsum_kTS, | |||
#endif | |||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
dcopy_kTS, ddot_kTS, | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) | |||
dsdot_kTS, | |||
#endif | |||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
drot_kTS, | |||
daxpy_kTS, | |||
dscal_kTS, | |||
dswap_kTS, | |||
dgemv_nTS, dgemv_tTS, | |||
#endif | |||
#if (BUILD_DOUBLE==1) | |||
dger_kTS, | |||
dsymv_LTS, dsymv_UTS, | |||
#endif | |||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
dgemm_kernelTS, dgemm_betaTS, | |||
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N | |||
dgemm_incopyTS, dgemm_itcopyTS, | |||
@@ -204,6 +254,9 @@ gotoblas_t TABLE_NAME = { | |||
dgemm_oncopyTS, dgemm_otcopyTS, | |||
#endif | |||
dgemm_oncopyTS, dgemm_otcopyTS, | |||
#endif | |||
#if (BUILD_DOUBLE==1) | |||
dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS, | |||
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N | |||
dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS, | |||
@@ -237,6 +290,8 @@ gotoblas_t TABLE_NAME = { | |||
NULL, NULL, | |||
#endif | |||
#endif | |||
#ifdef EXPRECISION | |||
0, 0, 0, | |||
@@ -291,6 +346,7 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
#if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
0, 0, 0, | |||
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, | |||
#ifdef CGEMM_DEFAULT_UNROLL_MN | |||
@@ -298,21 +354,34 @@ gotoblas_t TABLE_NAME = { | |||
#else | |||
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), | |||
#endif | |||
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, | |||
cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS, | |||
cdotu_kTS, cdotc_kTS, csrot_kTS, | |||
caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS, | |||
#endif | |||
#if (BUILD_COMPLEX) | |||
cnrm2_kTS, casum_kTS, csum_kTS, | |||
#endif | |||
#if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
ccopy_kTS, cdotu_kTS, cdotc_kTS, | |||
#endif | |||
#if (BUILD_COMPLEX) | |||
csrot_kTS, | |||
#endif | |||
#if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
caxpy_kTS, | |||
caxpyc_kTS, | |||
cscal_kTS, | |||
cswap_kTS, | |||
cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS, | |||
cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS, | |||
#endif | |||
#if (BUILD_COMPLEX) | |||
cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS, | |||
csymv_LTS, csymv_UTS, | |||
chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS, | |||
#endif | |||
#if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS, | |||
cgemm_betaTS, | |||
#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N | |||
cgemm_incopyTS, cgemm_itcopyTS, | |||
#else | |||
@@ -332,6 +401,8 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS, | |||
ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS, | |||
#endif | |||
#if (BUILD_COMPLEX) | |||
ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS, | |||
ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS, | |||
@@ -361,7 +432,7 @@ gotoblas_t TABLE_NAME = { | |||
0, 0, 0, | |||
#if defined(USE_GEMM3M) | |||
#if (USE_GEMM3M) | |||
#ifdef CGEMM3M_DEFAULT_UNROLL_M | |||
CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), | |||
#else | |||
@@ -419,13 +490,20 @@ gotoblas_t TABLE_NAME = { | |||
NULL, NULL, | |||
NULL, NULL, | |||
#endif | |||
#endif | |||
#if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
#ifndef NO_LAPACK | |||
cneg_tcopyTS, claswp_ncopyTS, | |||
cneg_tcopyTS, | |||
claswp_ncopyTS, | |||
#else | |||
NULL, NULL, | |||
#endif | |||
#endif | |||
#if BUILD_COMPLEX16 == 1 | |||
0, 0, 0, | |||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, | |||
#ifdef ZGEMM_DEFAULT_UNROLL_MN | |||
@@ -495,7 +573,7 @@ gotoblas_t TABLE_NAME = { | |||
zhemm_outcopyTS, zhemm_oltcopyTS, | |||
0, 0, 0, | |||
#if defined(USE_GEMM3M) | |||
#if (USE_GEMM3M) | |||
#ifdef ZGEMM3M_DEFAULT_UNROLL_M | |||
ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), | |||
#else | |||
@@ -560,6 +638,8 @@ gotoblas_t TABLE_NAME = { | |||
NULL, NULL, | |||
#endif | |||
#endif | |||
#ifdef EXPRECISION | |||
0, 0, 0, | |||
@@ -626,7 +706,7 @@ gotoblas_t TABLE_NAME = { | |||
xhemm_outcopyTS, xhemm_oltcopyTS, | |||
0, 0, 0, | |||
#if defined(USE_GEMM3M) | |||
#if (USE_GEMM3M) | |||
QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), | |||
xgemm3m_kernelTS, | |||
@@ -691,52 +771,112 @@ gotoblas_t TABLE_NAME = { | |||
init_parameter, | |||
SNUMOPT, DNUMOPT, QNUMOPT, | |||
#if BUILD_SINGLE == 1 | |||
saxpby_kTS, | |||
#endif | |||
#if BUILD_DOUBLE == 1 | |||
daxpby_kTS, | |||
#endif | |||
#if BUILD_COMPLEX == 1 | |||
caxpby_kTS, | |||
#endif | |||
#if BUILD_COMPLEX16== 1 | |||
zaxpby_kTS, | |||
#endif | |||
saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS, | |||
#if BUILD_SINGLE == 1 | |||
somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS, | |||
#endif | |||
#if BUILD_DOUBLE== 1 | |||
domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS, | |||
#endif | |||
#if BUILD_COMPLEX == 1 | |||
comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS, | |||
comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS, | |||
#endif | |||
#if BUILD_COMPLEX16 == 1 | |||
zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, | |||
zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, | |||
#endif | |||
#if BUILD_SINGLE == 1 | |||
simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, | |||
#endif | |||
#if BUILD_DOUBLE== 1 | |||
dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, | |||
#endif | |||
#if BUILD_COMPLEX== 1 | |||
cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, | |||
cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, | |||
zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, | |||
#endif | |||
sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS | |||
#if BUILD_SINGLE == 1 | |||
sgeadd_kTS, | |||
#endif | |||
#if BUILD_DOUBLE==1 | |||
dgeadd_kTS, | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
cgeadd_kTS, | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
zgeadd_kTS | |||
#endif | |||
}; | |||
#if defined(ARCH_ARM64) | |||
#if (ARCH_ARM64) | |||
static void init_parameter(void) { | |||
#if defined(BUILD_HALF) | |||
#if (BUILD_HALF) | |||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE == 1 | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#if defined(BUILD_HALF) | |||
#if (BUILD_HALF) | |||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
#endif | |||
#if BUILD_SINGLE == 1 | |||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
#endif | |||
#if BUILD_DOUBLE== 1 | |||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
#endif | |||
#if BUILD_COMPLEX== 1 | |||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
#endif | |||
#if defined(BUILD_HALF) | |||
#if (BUILD_HALF) | |||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
#endif | |||
#if BUILD_SINGLE == 1 | |||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
#endif | |||
#if BUILD_DOUBLE==1 | |||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
@@ -747,7 +887,7 @@ static void init_parameter(void) { | |||
TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R; | |||
#endif | |||
#if defined(USE_GEMM3M) | |||
#if (USE_GEMM3M) | |||
#ifdef CGEMM3M_DEFAULT_P | |||
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; | |||
#else | |||
@@ -792,8 +932,8 @@ static void init_parameter(void) { | |||
#endif | |||
} | |||
#else // defined(ARCH_ARM64) | |||
#if defined(ARCH_POWER) | |||
#else // (ARCH_ARM64) | |||
#if (ARCH_POWER) | |||
static void init_parameter(void) { | |||
#ifdef BUILD_HALF | |||
@@ -823,7 +963,7 @@ static void init_parameter(void) { | |||
} | |||
#else //POWER | |||
#if defined(ARCH_ZARCH) | |||
#if (ARCH_ZARCH) | |||
static void init_parameter(void) { | |||
#ifdef BUILD_HALF | |||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
@@ -989,22 +1129,34 @@ static void init_parameter(void) { | |||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
#endif | |||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
#endif | |||
#if BUILD_COMPLEX == 1 | |||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
#endif | |||
#if BUILD_COMPLEX == 1 | |||
#ifdef CGEMM3M_DEFAULT_Q | |||
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; | |||
#else | |||
TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q; | |||
#endif | |||
#endif | |||
#if BUILD_COMPLEX16 == 1 | |||
#ifdef ZGEMM3M_DEFAULT_Q | |||
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; | |||
#else | |||
TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q; | |||
#endif | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; | |||
@@ -1012,16 +1164,24 @@ static void init_parameter(void) { | |||
TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q; | |||
#endif | |||
#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) | |||
#if (CORE_KATMAI) || (CORE_COPPERMINE) || (CORE_BANIAS) || (CORE_YONAH) || (CORE_ATHLON) | |||
#ifdef DEBUG | |||
fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 64 * (l2 >> 7); | |||
#endif | |||
#if BUILD_DOUBLE == 1 | |||
TABLE_NAME.dgemm_p = 32 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = 32 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = 16 * (l2 >> 7); | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 16 * (l2 >> 7); | |||
TABLE_NAME.xgemm_p = 8 * (l2 >> 7); | |||
@@ -1034,10 +1194,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Northwood\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 96 * (l2 >> 7); | |||
#endif | |||
#if BUILD_DOUBLE == 1 | |||
TABLE_NAME.dgemm_p = 48 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = 48 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = 24 * (l2 >> 7); | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 24 * (l2 >> 7); | |||
TABLE_NAME.xgemm_p = 12 * (l2 >> 7); | |||
@@ -1050,10 +1218,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Atom\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 256; | |||
#endif | |||
#if BUILD_DOUBLE ==1 | |||
TABLE_NAME.dgemm_p = 128; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = 128; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = 64; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 64; | |||
TABLE_NAME.xgemm_p = 32; | |||
@@ -1066,10 +1242,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Prescott\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 56 * (l2 >> 7); | |||
#endif | |||
#if BUILD_DOUBLE ==1 | |||
TABLE_NAME.dgemm_p = 28 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = 28 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX16 == 1 | |||
TABLE_NAME.zgemm_p = 14 * (l2 >> 7); | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 14 * (l2 >> 7); | |||
TABLE_NAME.xgemm_p = 7 * (l2 >> 7); | |||
@@ -1082,10 +1266,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Core2\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; | |||
#endif | |||
#if BUILD_DOUBLE==1 | |||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; | |||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; | |||
@@ -1098,10 +1290,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Penryn\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; | |||
#endif | |||
#if BUILD_DOUBLE == 1 | |||
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; | |||
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; | |||
@@ -1114,10 +1314,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Dunnington\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; | |||
#endif | |||
#if BUILD_DOUBLE ==1 | |||
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; | |||
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; | |||
@@ -1131,10 +1339,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Nehalem\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1147,10 +1363,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Sandybridge\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1163,26 +1387,42 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Haswell\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
#endif | |||
#endif | |||
#if defined (SKYLAKEX) || defined (COOPERLAKE) | |||
#if defined(SKYLAKEX) || defined(COOPERLAKE) | |||
#ifdef DEBUG | |||
fprintf(stderr, "SkylakeX\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1196,10 +1436,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Opteron\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7); | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7); | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7); | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7); | |||
TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7); | |||
@@ -1212,10 +1460,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Barcelona\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1228,10 +1484,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Bobcate\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1244,10 +1508,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Bulldozer\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1260,10 +1532,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Excavator\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1277,10 +1557,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Piledriver\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1293,10 +1581,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Steamroller\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1309,10 +1605,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Zen\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_DOUBLE | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if BUILD_COMPLEX16 | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
@@ -1326,11 +1630,18 @@ static void init_parameter(void) { | |||
fprintf(stderr, "NANO\n"); | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
#endif | |||
#if (BUILD_DOUBLE==1) | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
#endif | |||
#if (BUILD_COMPLEX==1) | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
#endif | |||
#if (BUILD_COMPLEX16==1) | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
#endif | |||
#ifdef EXPRECISION | |||
@@ -1340,41 +1651,55 @@ static void init_parameter(void) { | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
#ifdef CGEMM3M_DEFAULT_P | |||
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; | |||
#else | |||
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; | |||
#endif | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
#ifdef ZGEMM3M_DEFAULT_P | |||
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; | |||
#else | |||
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; | |||
#endif | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; | |||
#endif | |||
#if BUILD_SINGLE == 1 | |||
TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; | |||
#endif | |||
#if BUILD_DOUBLE== 1 | |||
TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M; | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M; | |||
#endif | |||
#if BUILD_COMPLEX==1 | |||
#ifdef CGEMM3M_DEFAULT_UNROLL_M | |||
TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M; | |||
#else | |||
TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; | |||
#endif | |||
#endif | |||
#if BUILD_COMPLEX16==1 | |||
#ifdef ZGEMM3M_DEFAULT_UNROLL_M | |||
TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M; | |||
#else | |||
TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; | |||
#endif | |||
#endif | |||
#ifdef QUAD_PRECISION | |||
TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M; | |||
@@ -1386,15 +1711,19 @@ static void init_parameter(void) { | |||
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); | |||
#endif | |||
#if BUILD_SINGLE==1 | |||
TABLE_NAME.sgemm_r = (((BUFFER_SIZE - | |||
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA | |||
+ TABLE_NAME.align) & ~TABLE_NAME.align) | |||
) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15); | |||
#endif | |||
#if BUILD_DOUBLE==1 | |||
TABLE_NAME.dgemm_r = (((BUFFER_SIZE - | |||
((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA | |||
+ TABLE_NAME.align) & ~TABLE_NAME.align) | |||
) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15); | |||
#endif | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_r = (((BUFFER_SIZE - | |||
@@ -1403,26 +1732,33 @@ static void init_parameter(void) { | |||
) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15); | |||
#endif | |||
#if BUILD_COMPLEX ==1 | |||
TABLE_NAME.cgemm_r = (((BUFFER_SIZE - | |||
((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA | |||
+ TABLE_NAME.align) & ~TABLE_NAME.align) | |||
) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15); | |||
#endif | |||
#if BUILD_COMPLEX16 ==1 | |||
TABLE_NAME.zgemm_r = (((BUFFER_SIZE - | |||
((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA | |||
+ TABLE_NAME.align) & ~TABLE_NAME.align) | |||
) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15); | |||
#endif | |||
#if BUILD_COMPLEX == 1 | |||
TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE - | |||
((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA | |||
+ TABLE_NAME.align) & ~TABLE_NAME.align) | |||
) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15); | |||
#endif | |||
#if BUILD_COMPLEX16 == 1 | |||
TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE - | |||
((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA | |||
+ TABLE_NAME.align) & ~TABLE_NAME.align) | |||
) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15); | |||
#endif | |||
@@ -1444,4 +1780,4 @@ static void init_parameter(void) { | |||
} | |||
#endif //POWER | |||
#endif //ZARCH | |||
#endif //defined(ARCH_ARM64) | |||
#endif //(ARCH_ARM64) |
@@ -46,6 +46,7 @@ OBJ = \ | |||
lapacke_ilaver.o \ | |||
lapacke_nancheck.o | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OBJ_C = \ | |||
lapacke_cbbcsd.o \ | |||
lapacke_cbbcsd_work.o \ | |||
@@ -653,7 +654,9 @@ lapacke_cupgtr.o \ | |||
lapacke_cupgtr_work.o \ | |||
lapacke_cupmtr.o \ | |||
lapacke_cupmtr_work.o | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OBJ_D = \ | |||
lapacke_dbbcsd.o \ | |||
lapacke_dbbcsd_work.o \ | |||
@@ -1218,8 +1221,12 @@ lapacke_dtrttf_work.o \ | |||
lapacke_dtrttp.o \ | |||
lapacke_dtrttp_work.o \ | |||
lapacke_dtzrzf.o \ | |||
lapacke_dtzrzf_work.o | |||
lapacke_dtzrzf_work.o \ | |||
lapacke_slag2d.o \ | |||
lapacke_slag2d_work.o | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
OBJ_S = \ | |||
lapacke_sbbcsd.o \ | |||
lapacke_sbbcsd_work.o \ | |||
@@ -1395,8 +1402,6 @@ lapacke_slacn2.o \ | |||
lapacke_slacn2_work.o \ | |||
lapacke_slacpy.o \ | |||
lapacke_slacpy_work.o \ | |||
lapacke_slag2d.o \ | |||
lapacke_slag2d_work.o \ | |||
lapacke_slamch.o \ | |||
lapacke_slamch_work.o \ | |||
lapacke_slange.o \ | |||
@@ -1781,7 +1786,9 @@ lapacke_strttp.o \ | |||
lapacke_strttp_work.o \ | |||
lapacke_stzrzf.o \ | |||
lapacke_stzrzf_work.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OBJ_Z = \ | |||
lapacke_zbbcsd.o \ | |||
lapacke_zbbcsd_work.o \ | |||
@@ -2393,35 +2400,52 @@ lapacke_zupgtr.o \ | |||
lapacke_zupgtr_work.o \ | |||
lapacke_zupmtr.o \ | |||
lapacke_zupmtr_work.o | |||
endif | |||
ifdef BUILD_DEPRECATED | |||
DEPRECATED = \ | |||
ifeq ($(BUILD_COMPLEX),1) | |||
DEPRECATEDC = \ | |||
lapacke_cggsvp.o \ | |||
lapacke_cggsvp_work.o \ | |||
lapacke_dggsvp.o \ | |||
lapacke_dggsvp_work.o \ | |||
lapacke_sggsvp.o \ | |||
lapacke_sggsvp_work.o \ | |||
lapacke_zggsvp.o \ | |||
lapacke_zggsvp_work.o \ | |||
lapacke_cggsvd.o \ | |||
lapacke_cggsvd_work.o \ | |||
lapacke_cgeqpf.o \ | |||
lapacke_cgeqpf_work.o | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DEPRECATEDD = \ | |||
lapacke_dggsvp.o \ | |||
lapacke_dggsvp_work.o \ | |||
lapacke_dggsvd.o \ | |||
lapacke_dggsvd_work.o \ | |||
lapacke_dgeqpf.o \ | |||
lapacke_dgeqpf_work.o | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
DEPRECATEDS = \ | |||
lapacke_sggsvp.o \ | |||
lapacke_sggsvp_work.o \ | |||
lapacke_sggsvd.o \ | |||
lapacke_sggsvd_work.o \ | |||
lapacke_sgeqpf.o \ | |||
lapacke_sgeqpf_work.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
DEPRECATEDZ = \ | |||
lapacke_zggsvp.o \ | |||
lapacke_zggsvp_work.o \ | |||
lapacke_zggsvd.o \ | |||
lapacke_zggsvd_work.o \ | |||
lapacke_cgeqpf.o \ | |||
lapacke_cgeqpf_work.o \ | |||
lapacke_dgeqpf.o \ | |||
lapacke_dgeqpf_work.o \ | |||
lapacke_sgeqpf.o \ | |||
lapacke_sgeqpf_work.o \ | |||
lapacke_zgeqpf.o \ | |||
lapacke_zgeqpf_work.o | |||
endif | |||
DEPRECATED = $(DEPRECATEDS) $(DEPRECATEDD) $(DEPRECATEDC) $(DEPRECATEDZ) | |||
endif | |||
ifdef USEXBLAS | |||
EXTENDED = \ | |||
lapacke_cgbrfsx.o lapacke_cporfsx.o lapacke_dgerfsx.o lapacke_sgbrfsx.o lapacke_ssyrfsx.o lapacke_zherfsx.o \ | |||
@@ -2440,37 +2464,50 @@ endif | |||
ifdef LAPACKE_WITH_TMG | |||
# FILE PARTS OF TMGLIB | |||
MATGEN = \ | |||
ifeq ($(BUILD_COMPLEX),1) | |||
MATGENC = \ | |||
lapacke_clatms.o \ | |||
lapacke_clatms_work.o \ | |||
lapacke_dlatms.o \ | |||
lapacke_dlatms_work.o \ | |||
lapacke_slatms.o \ | |||
lapacke_slatms_work.o \ | |||
lapacke_zlatms.o \ | |||
lapacke_zlatms_work.o \ | |||
lapacke_clagge.o \ | |||
lapacke_clagge_work.o \ | |||
lapacke_claghe.o \ | |||
lapacke_claghe_work.o \ | |||
lapacke_clagsy.o \ | |||
lapacke_clagsy_work.o | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
MATGEND = \ | |||
lapacke_dlatms.o \ | |||
lapacke_dlatms_work.o \ | |||
lapacke_dlagge.o \ | |||
lapacke_dlagge_work.o \ | |||
lapacke_dlagsy.o \ | |||
lapacke_dlagsy_work.o | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
MATGENS = \ | |||
lapacke_slatms.o \ | |||
lapacke_slatms_work.o \ | |||
lapacke_slagge.o \ | |||
lapacke_slagge_work.o \ | |||
lapacke_slagsy.o \ | |||
lapacke_slagsy_work.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
MATGENZ = \ | |||
lapacke_zlatms.o \ | |||
lapacke_zlatms_work.o \ | |||
lapacke_zlagge.o \ | |||
lapacke_zlagge_work.o \ | |||
lapacke_claghe.o \ | |||
lapacke_claghe_work.o \ | |||
lapacke_zlaghe.o \ | |||
lapacke_zlaghe_work.o \ | |||
lapacke_clagsy.o \ | |||
lapacke_clagsy_work.o \ | |||
lapacke_dlagsy.o \ | |||
lapacke_dlagsy_work.o \ | |||
lapacke_slagsy.o \ | |||
lapacke_slagsy_work.o \ | |||
lapacke_zlagsy.o \ | |||
lapacke_zlagsy_work.o | |||
endif | |||
MATGEN = $(MATGENS) $(MATGEND) $(MATGENC) $(MATGENZ) | |||
endif | |||
.PHONY: all | |||
all: $(LAPACKELIB) | |||
@@ -66,7 +66,9 @@ ALLAUX_O = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \ | |||
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ | |||
../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" | |||
SCLAUX = \ | |||
sbdsvdx.o sstevx.o sstein.o \ | |||
sbdsdc.o \ | |||
sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \ | |||
slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \ | |||
@@ -81,10 +83,14 @@ SCLAUX = \ | |||
slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \ | |||
slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \ | |||
ssteqr.o ssterf.o slaisnan.o sisnan.o \ | |||
slartgp.o slartgs.o \ | |||
slartgp.o slartgs.o scombssq.o \ | |||
../INSTALL/second_$(TIMER).o | |||
endif | |||
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
DZLAUX = \ | |||
dcombssq.o \ | |||
dbdsvdx.o dstevx.o dstein.o \ | |||
dbdsdc.o \ | |||
dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \ | |||
dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \ | |||
@@ -101,9 +107,12 @@ DZLAUX = \ | |||
dsteqr.o dsterf.o dlaisnan.o disnan.o \ | |||
dlartgp.o dlartgs.o \ | |||
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o | |||
endif | |||
#ifeq ($(BUILD_SINGLE),1) | |||
ifdef BUILD_SINGLE | |||
SLASRC_O = \ | |||
sbdsvdx.o spotrf2.o sgetrf2.o \ | |||
spotrf2.o sgetrf2.o \ | |||
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \ | |||
sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \ | |||
sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \ | |||
@@ -145,8 +154,7 @@ SLASRC_O = \ | |||
ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \ | |||
ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \ | |||
sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \ | |||
ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \ | |||
sstevx.o \ | |||
ssptrf.o ssptri.o ssptrs.o sstegr.o sstev.o sstevd.o sstevr.o \ | |||
ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \ | |||
ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \ | |||
ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \ | |||
@@ -180,9 +188,13 @@ SLASRC_O = \ | |||
ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ | |||
ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ | |||
ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \ | |||
sgesvdq.o scombssq.o | |||
sgesvdq.o | |||
endif | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
DSLASRC_O = spotrs.o sgetrs.o spotrf.o sgetrf.o | |||
endif | |||
ifdef USEXBLAS | |||
SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ | |||
@@ -194,6 +206,7 @@ SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ | |||
slascl2.o sla_wwaddw.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CLASRC_O = \ | |||
cpotrf2.o cgetrf2.o \ | |||
cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \ | |||
@@ -284,6 +297,7 @@ CLASRC_O = \ | |||
cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ | |||
chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \ | |||
cgesvdq.o | |||
endif | |||
ifdef USEXBLAS | |||
CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ | |||
@@ -299,11 +313,13 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ | |||
cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o | |||
endif | |||
ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o | |||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o clag2z.o | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DLASRC_O = \ | |||
dpotrf2.o dgetrf2.o \ | |||
dbdsvdx.o \ | |||
dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \ | |||
dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ | |||
dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ | |||
@@ -345,8 +361,7 @@ DLASRC_O = \ | |||
dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \ | |||
dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \ | |||
dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \ | |||
dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \ | |||
dstevx.o \ | |||
dsptrf.o dsptri.o dsptrs.o dstegr.o dstev.o dstevd.o dstevr.o \ | |||
dsycon.o dsyev.o dsyevd.o dsyevr.o \ | |||
dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \ | |||
dsysv.o dsysvx.o \ | |||
@@ -381,7 +396,8 @@ DLASRC_O = \ | |||
dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ | |||
dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ | |||
dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \ | |||
dgesvdq.o dcombssq.o | |||
dgesvdq.o | |||
endif | |||
ifdef USEXBLAS | |||
DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ | |||
@@ -393,6 +409,7 @@ DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ | |||
dlascl2.o dla_wwaddw.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZLASRC_O = \ | |||
zpotrf2.o zgetrf2.o \ | |||
zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \ | |||
@@ -471,7 +488,7 @@ ZLASRC_O = \ | |||
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ | |||
zunmtr.o zupgtr.o \ | |||
zupmtr.o izmax1.o dzsum1.o zstemr.o \ | |||
zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \ | |||
zcgesv.o zcposv.o zlag2c.o zlat2c.o \ | |||
zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \ | |||
ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \ | |||
zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \ | |||
@@ -488,6 +505,7 @@ ZLASRC_O = \ | |||
zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ | |||
zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \ | |||
zgesvdq.o | |||
endif | |||
ifdef USEXBLAS | |||
ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ | |||
@@ -501,18 +519,30 @@ ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ | |||
zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o | |||
endif | |||
DEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CDEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ | |||
DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \ | |||
DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o \ | |||
DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DDEPRECSRC = \ | |||
DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \ | |||
DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \ | |||
DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o \ | |||
DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
SDEPRECSRC = \ | |||
DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \ | |||
DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \ | |||
DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o \ | |||
DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZDEPRECSRC = \ | |||
DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \ | |||
DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \ | |||
DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o | |||
endif | |||
# filter out optimized codes from OpenBLAS | |||
ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o | |||
@@ -560,7 +590,7 @@ ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) | |||
endif | |||
ifdef BUILD_DEPRECATED | |||
DEPRECATED = $(DEPRECSRC) | |||
DEPRECATED = $(SDEPRECSRC) $(DDEPRECSRC) $(CDEPRECSRC) $(ZDEPRECSRC) | |||
endif | |||
.PHONY: all | |||
@@ -33,25 +33,37 @@ | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" | |||
SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \ | |||
slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \ | |||
slatm3.o slatm5.o slatm6.o slahilb.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \ | |||
clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \ | |||
clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o | |||
endif | |||
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \ | |||
dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \ | |||
dlatm3.o dlatm5.o dlatm6.o dlahilb.o | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \ | |||
zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \ | |||
zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o | |||
endif | |||
.PHONY: all | |||
all: $(TMGLIB) | |||
@@ -97,5 +109,9 @@ cleanobj: | |||
cleanlib: | |||
rm -f $(TMGLIB) | |||
ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) | |||
slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
endif | |||
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) | |||
dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
endif |
@@ -1,11 +1,19 @@ | |||
TOPDIR = ../.. | |||
include ../../Makefile.system | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS = sgetf2_k.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS = dgetf2_k.$(SUFFIX) | |||
endif | |||
QBLASOBJS = qgetf2_k.$(SUFFIX) | |||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS = cgetf2_k.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS = zgetf2_k.$(SUFFIX) | |||
endif | |||
XBLASOBJS = xgetf2_k.$(SUFFIX) | |||
sgetf2_k.$(SUFFIX) : getf2_k.c | |||
@@ -17,6 +17,19 @@ ZBLASOBJS += zgetrf_parallel.$(SUFFIX) | |||
XBLASOBJS += xgetrf_parallel.$(SUFFIX) | |||
endif | |||
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
endif | |||
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
ifeq ($(USE_OPENMP), 1) | |||
GETRF_SRC = getrf_parallel_omp.c | |||
else | |||
@@ -17,6 +17,19 @@ ZBLASOBJS += zgetrs_N_parallel.$(SUFFIX) zgetrs_T_parallel.$(SUFFIX) zgetrs_R_pa | |||
XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX) | |||
endif | |||
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
endif | |||
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
sgetrs_N_single.$(SUFFIX) : getrs_single.c | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANS $< -o $(@F) | |||
@@ -1,11 +1,19 @@ | |||
TOPDIR = ../.. | |||
include ../../Makefile.system | |||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS = slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS = dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) | |||
endif | |||
QBLASOBJS = qlaswp_plus.$(SUFFIX) qlaswp_minus.$(SUFFIX) | |||
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS = claswp_plus.$(SUFFIX) claswp_minus.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS = zlaswp_plus.$(SUFFIX) zlaswp_minus.$(SUFFIX) | |||
endif | |||
XBLASOBJS = xlaswp_plus.$(SUFFIX) xlaswp_minus.$(SUFFIX) | |||
slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) \ | |||
@@ -1,11 +1,19 @@ | |||
TOPDIR = ../.. | |||
include ../../Makefile.system | |||
ifeq ($(BUILD_SINGLE),1) | |||
SBLASOBJS = slauu2_U.$(SUFFIX) slauu2_L.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS = dlauu2_U.$(SUFFIX) dlauu2_L.$(SUFFIX) | |||
endif | |||
QBLASOBJS = qlauu2_U.$(SUFFIX) qlauu2_L.$(SUFFIX) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS = clauu2_U.$(SUFFIX) clauu2_L.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS = zlauu2_U.$(SUFFIX) zlauu2_L.$(SUFFIX) | |||
endif | |||
XBLASOBJS = xlauu2_U.$(SUFFIX) xlauu2_L.$(SUFFIX) | |||
slauu2_U.$(SUFFIX) : lauu2_U.c | |||
@@ -17,6 +17,19 @@ ZBLASOBJS += zlauum_U_parallel.$(SUFFIX) zlauum_L_parallel.$(SUFFIX) | |||
XBLASOBJS += xlauum_U_parallel.$(SUFFIX) xlauum_L_parallel.$(SUFFIX) | |||
endif | |||
ifneq ($(BUILD_SINGLE),1) | |||
SBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
slauum_U_single.$(SUFFIX) : lauum_U_single.c | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) | |||
@@ -8,6 +8,19 @@ CBLASOBJS = cpotf2_U.$(SUFFIX) cpotf2_L.$(SUFFIX) | |||
ZBLASOBJS = zpotf2_U.$(SUFFIX) zpotf2_L.$(SUFFIX) | |||
XBLASOBJS = xpotf2_U.$(SUFFIX) xpotf2_L.$(SUFFIX) | |||
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
endif | |||
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
spotf2_U.$(SUFFIX) : potf2_U.c | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) | |||
@@ -17,6 +17,20 @@ ZBLASOBJS += zpotrf_U_parallel.$(SUFFIX) zpotrf_L_parallel.$(SUFFIX) | |||
XBLASOBJS += xpotrf_U_parallel.$(SUFFIX) xpotrf_L_parallel.$(SUFFIX) | |||
endif | |||
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
SBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
endif | |||
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
CBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
spotrf_U_single.$(SUFFIX) : potrf_U_single.c | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) | |||
@@ -1,11 +1,19 @@ | |||
TOPDIR = ../.. | |||
include ../../Makefile.system | |||
ifeq ($(BUILD_SINGLE),1) | |||
SBLASOBJS = strti2_UU.$(SUFFIX) strti2_UN.$(SUFFIX) strti2_LU.$(SUFFIX) strti2_LN.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS = dtrti2_UU.$(SUFFIX) dtrti2_UN.$(SUFFIX) dtrti2_LU.$(SUFFIX) dtrti2_LN.$(SUFFIX) | |||
endif | |||
QBLASOBJS = qtrti2_UU.$(SUFFIX) qtrti2_UN.$(SUFFIX) qtrti2_LU.$(SUFFIX) qtrti2_LN.$(SUFFIX) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS = ctrti2_UU.$(SUFFIX) ctrti2_UN.$(SUFFIX) ctrti2_LU.$(SUFFIX) ctrti2_LN.$(SUFFIX) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS = ztrti2_UU.$(SUFFIX) ztrti2_UN.$(SUFFIX) ztrti2_LU.$(SUFFIX) ztrti2_LN.$(SUFFIX) | |||
endif | |||
XBLASOBJS = xtrti2_UU.$(SUFFIX) xtrti2_UN.$(SUFFIX) xtrti2_LU.$(SUFFIX) xtrti2_LN.$(SUFFIX) | |||
strti2_UU.$(SUFFIX) : trti2_U.c | |||
@@ -23,6 +23,19 @@ ZBLASOBJS += ztrtri_UU_parallel.$(SUFFIX) ztrtri_UN_parallel.$(SUFFIX) ztrtri_LU | |||
XBLASOBJS += xtrtri_UU_parallel.$(SUFFIX) xtrtri_UN_parallel.$(SUFFIX) xtrtri_LU_parallel.$(SUFFIX) xtrtri_LN_parallel.$(SUFFIX) | |||
endif | |||
ifneq ($(BUILD_SINGLE),1) | |||
SBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
strtri_UU_single.$(SUFFIX) : trtri_U_single.c | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUNIT $< -o $(@F) | |||
@@ -17,6 +17,19 @@ ZBLASOBJS += ztrtrs_UNU_parallel.$(SUFFIX) ztrtrs_UNN_parallel.$(SUFFIX) ztrtrs_ | |||
XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX) | |||
endif | |||
ifneq ($(BUILD_SINGLE),1) | |||
SBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_DOUBLE),1) | |||
DBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX),1) | |||
CBLASOBJS= | |||
endif | |||
ifneq ($(BUILD_COMPLEX16),1) | |||
ZBLASOBJS= | |||
endif | |||
strtrs_UNU_single.$(SUFFIX) : trtrs_single.c | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) | |||
@@ -4,7 +4,7 @@ include_directories(${PROJECT_BINARY_DIR}) | |||
enable_language(Fortran) | |||
if (BUILD_SINGLE) | |||
list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3) | |||
list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3) | |||
endif() | |||
if (BUILD_DOUBLE) | |||
list (APPEND OpenBLAS_Tests dblat1 dblat2 dblat3) | |||
@@ -17,7 +17,7 @@ if (BUILD_COMPLEX16) | |||
endif() | |||
foreach(test_bin ${OpenBLAS_Tests}) | |||
add_executable(${test_bin} ${test_bin}.f) | |||
add_executable(${test_bin} ${test_bin}.f) | |||
target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}) | |||
endforeach() | |||
@@ -34,7 +34,19 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh | |||
"fi\n" | |||
) | |||
set(float_types s d c z) | |||
#set(float_types s d c z) | |||
if (BUILD_SINGLE) | |||
list (APPEND float_types s) | |||
endif() | |||
if (BUILD_DOUBLE) | |||
list (APPEND float_types d) | |||
endif() | |||
if (BUILD_COMPLEX) | |||
list (APPEND float_types c) | |||
endif() | |||
if (BUILD_COMPLEX16) | |||
list (APPEND float_types z) | |||
endif() | |||
foreach(float_type ${float_types}) | |||
string(TOUPPER ${float_type} float_type_upper) | |||
add_test(NAME "${float_type}blas1" | |||
@@ -7,82 +7,241 @@ all :: | |||
else | |||
all :: level1 level2 level3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) | |||
level1: sblat1 dblat1 cblat1 zblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) | |||
level1: dblat1 cblat1 zblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) | |||
level1: sblat1 cblat1 zblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) | |||
level1: cblat1 zblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) | |||
level1: cblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) | |||
level1: zblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) | |||
level1: sblat1 zblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) | |||
level1: sblat1 dblat1 zblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) | |||
level1: sblat1 dblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) | |||
level1: sblat1 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) | |||
level1: dblat1 | |||
endif | |||
level1 : sblat1 dblat1 cblat1 zblat1 | |||
ifndef CROSS | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1 | |||
endif | |||
ifdef SMP | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(BUILD_SINGLE),1) | |||
OMP_NUM_THREADS=2 ./sblat1 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OMP_NUM_THREADS=2 ./dblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OMP_NUM_THREADS=2 ./cblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OMP_NUM_THREADS=2 ./zblat1 | |||
endif | |||
else | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./sblat1 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./dblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=2 ./cblat1 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=2 ./zblat1 | |||
endif | |||
endif | |||
endif | |||
endif | |||
#level2: sblat2 dblat2 cblat2 zblat2 | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) | |||
level2: sblat2 dblat2 cblat2 zblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) | |||
level2: dblat2 cblat2 zblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) | |||
level2: sblat2 cblat2 zblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) | |||
level2: cblat2 zblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) | |||
level2: cblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) | |||
level2: zblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) | |||
level2: sblat2 zblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) | |||
level2: sblat2 dblat2 zblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) | |||
level2: sblat2 dblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) | |||
level2: sblat2 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) | |||
level2: dblat2 | |||
endif | |||
level2 : sblat2 dblat2 cblat2 zblat2 | |||
ifndef CROSS | |||
rm -f ?BLAT2.SUMM | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat | |||
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat | |||
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat | |||
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat | |||
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 | |||
endif | |||
ifdef SMP | |||
rm -f ?BLAT2.SUMM | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(BUILD_SINGLE),1) | |||
OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat | |||
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat | |||
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat | |||
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat | |||
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 | |||
endif | |||
else | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat | |||
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat | |||
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=2 ./cblat2 < ./cblat2.dat | |||
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat | |||
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifeq ($(BUILD_HALF),1) | |||
level3 : test_shgemm sblat3 dblat3 cblat3 zblat3 | |||
else | |||
level3 : sblat3 dblat3 cblat3 zblat3 | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) | |||
level3: sblat3 dblat3 cblat3 zblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) | |||
level3: dblat3 cblat3 zblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) | |||
level3: sblat3 cblat3 zblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) | |||
level3: cblat3 zblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) | |||
level3: cblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) | |||
level3: zblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) | |||
level3: sblat3 zblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) | |||
level3: sblat3 dblat3 zblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) | |||
level3: sblat3 dblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) | |||
level3: sblat3 | |||
endif | |||
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) | |||
level3: dblat3 | |||
endif | |||
#ifeq ($(BUILD_HALF),1) | |||
#level3 : test_shgemm sblat3 dblat3 cblat3 zblat3 | |||
#else | |||
#level3 : sblat3 dblat3 cblat3 zblat3 | |||
#endif | |||
ifndef CROSS | |||
rm -f ?BLAT3.SUMM | |||
ifeq ($(BUILD_HALF),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM | |||
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat | |||
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat | |||
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat | |||
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat | |||
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 | |||
endif | |||
ifdef SMP | |||
rm -f ?BLAT3.SUMM | |||
ifeq ($(USE_OPENMP), 1) | |||
@@ -90,30 +249,46 @@ ifeq ($(BUILD_HALF),1) | |||
OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM | |||
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat | |||
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat | |||
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat | |||
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat | |||
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 | |||
endif | |||
else | |||
ifeq ($(BUILD_HALF),1) | |||
OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM | |||
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat | |||
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat | |||
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
OPENBLAS_NUM_THREADS=2 ./cblat3 < ./cblat3.dat | |||
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat | |||
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 | |||
endif | |||
endif | |||
endif | |||
endif | |||
level3_3m : zblat3_3m cblat3_3m | |||
@@ -151,56 +326,71 @@ endif | |||
endif | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
sblat1 : sblat1.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o sblat1 sblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
dblat1 : dblat1.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o dblat1 dblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
else | |||
dblat2: | |||
dblat3: | |||
endif | |||
qblat1 : qblat1.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o qblat1 qblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
cblat1 : cblat1.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o cblat1 cblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
cblat2 : cblat2.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o cblat2 cblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
zblat2 : zblat2.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o zblat2 zblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
ifeq ($(BUILD_HALF),1) | |||
test_shgemm : compare_sgemm_shgemm.c ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
ifeq ($(BUILD_COMPLEX),1) | |||
cblat3_3m : cblat3_3m.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o cblat3_3m cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
zblat3_3m : zblat3_3m.$(SUFFIX) ../$(LIBNAME) | |||
$(FC) $(FLDFLAGS) -o zblat3_3m zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
endif | |||
@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
**********************************************************************************/ | |||
#include "openblas_utest.h" | |||
#if defined(BUILD_SINGLE) && defined(BUILD_DOUBLE) | |||
CTEST(dsdot,dsdot_n_1) | |||
{ | |||
float x= 0.172555164F; | |||
@@ -47,17 +47,4 @@ CTEST(dsdot,dsdot_n_1) | |||
ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS); | |||
} | |||
CTEST(dsdot,dsdot_n_2) | |||
{ | |||
float x[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F}; | |||
float y[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F}; | |||
blasint incx=1; | |||
blasint incy=1; | |||
blasint n=8; | |||
double res1=0.0f, res2= 2.0400000444054616; | |||
res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); | |||
ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS); | |||
} | |||
#endif |
@@ -48,6 +48,7 @@ void* xmalloc(size_t n) | |||
} | |||
} | |||
#ifdef BUILD_DOUBLE | |||
void check_dgemm(double *a, double *b, double *result, double *expected, blasint n) | |||
{ | |||
char trans1 = 'T'; | |||
@@ -59,9 +60,13 @@ void check_dgemm(double *a, double *b, double *result, double *expected, blasint | |||
ASSERT_DBL_NEAR_TOL(expected[i], result[i], DOUBLE_EPS); | |||
} | |||
} | |||
#endif | |||
CTEST(fork, safety) | |||
{ | |||
#ifndef BUILD_DOUBLE | |||
exit(0); | |||
#else | |||
blasint n = 1000; | |||
int i; | |||
@@ -124,4 +129,5 @@ CTEST(fork, safety) | |||
ASSERT_EQUAL(wait_pid, fork_pid); | |||
ASSERT_EQUAL(0, WEXITSTATUS (child_status)); | |||
} | |||
#endif | |||
} |
@@ -529,16 +529,20 @@ CTEST(potrf, smoketest_trivial){ | |||
for (j = 0; j < n; ++j) { | |||
double err; | |||
#ifdef BUILD_SINGLE | |||
err = fabs(A1s[i+n*j] - Bs[i+n*j]); | |||
if (err > 1e-5) { | |||
CTEST_ERR("%s:%d %c s(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
} | |||
#endif | |||
#ifdef BUILD_DOUBLE | |||
err = fabs(A1d[i+n*j] - Bd[i+n*j]); | |||
if (err > 1e-12) { | |||
CTEST_ERR("%s:%d %c d(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
} | |||
#endif | |||
#ifdef BUILD_COMPLEX | |||
#ifdef OPENBLAS_COMPLEX_C99 | |||
err = cabsf(A1c[i+n*j] - Bc[i+n*j]); | |||
#else | |||
@@ -548,7 +552,9 @@ CTEST(potrf, smoketest_trivial){ | |||
if (err > 1e-5) { | |||
CTEST_ERR("%s:%d %c c(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
} | |||
#endif | |||
#ifdef BUILD_COMPLEX16 | |||
#ifdef OPENBLAS_COMPLEX_C99 | |||
err = cabs(A1z[i+n*j] - Bz[i+n*j]); | |||
#else | |||
@@ -558,6 +564,7 @@ CTEST(potrf, smoketest_trivial){ | |||
if (err > 1e-12) { | |||
CTEST_ERR("%s:%d %c z(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
} | |||
#endif | |||
} | |||
} | |||
} | |||