| @@ -9,7 +9,7 @@ ifndef TOPDIR | |||
| TOPDIR = . | |||
| endif | |||
| # If ARCH is not set, we use the host system's architecture for getarch compile options. | |||
| # If ARCH is not set, we use the host system's architecture for getarch compile options. | |||
| ifndef ARCH | |||
| HOSTARCH := $(shell uname -m) | |||
| else | |||
| @@ -73,6 +73,18 @@ endif | |||
| # | |||
| # Beginning of system configuration | |||
| # | |||
| ifneq ($(BUILD_SINGLE),1) | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| ifneq ($(BUILD_COMPLEX),1) | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| override BUILD_SINGLE=1 | |||
| override BUILD_DOUBLE=1 | |||
| override BUILD_COMPLEX=1 | |||
| override BUILD_COMPLEX16=1 | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| ifndef HOSTCC | |||
| HOSTCC = $(CC) | |||
| @@ -1224,16 +1236,16 @@ ifeq ($(BUILD_HALF), 1) | |||
| CCOMMON_OPT += -DBUILD_HALF | |||
| endif | |||
| ifeq ($(BUILD_SINGLE), 1) | |||
| CCOMMON_OPT += -DBUILD_SINGLE | |||
| CCOMMON_OPT += -DBUILD_SINGLE=1 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE), 1) | |||
| CCOMMON_OPT += -DBUILD_DOUBLE | |||
| CCOMMON_OPT += -DBUILD_DOUBLE=1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX), 1) | |||
| CCOMMON_OPT += -DBUILD_COMPLEX | |||
| CCOMMON_OPT += -DBUILD_COMPLEX=1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16), 1) | |||
| CCOMMON_OPT += -DBUILD_COMPLEX16 | |||
| CCOMMON_OPT += -DBUILD_COMPLEX16=1 | |||
| endif | |||
| CCOMMON_OPT += -DVERSION=\"$(VERSION)\" | |||
| @@ -9,9 +9,11 @@ endif | |||
| endif | |||
| ifdef HAVE_SSE3 | |||
| ifndef DYNAMIC_ARCH | |||
| CCOMMON_OPT += -msse3 | |||
| FCOMMON_OPT += -msse3 | |||
| endif | |||
| endif | |||
| ifeq ($(CORE), SKYLAKEX) | |||
| ifndef DYNAMIC_ARCH | |||
| @@ -167,6 +167,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||
| float (*snrm2_k) (BLASLONG, float *, BLASLONG); | |||
| float (*sasum_k) (BLASLONG, float *, BLASLONG); | |||
| #endif | |||
| #if BUILD_SINGLE | |||
| float (*ssum_k) (BLASLONG, float *, BLASLONG); | |||
| #endif | |||
| @@ -188,13 +189,15 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||
| int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
| int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
| #endif | |||
| #if BUILD_SINGLE | |||
| #if BUILD_SINGLE | |||
| int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
| int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
| int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||
| #endif | |||
| #if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) | |||
| #ifdef ARCH_X86_64 | |||
| void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); | |||
| @@ -210,6 +213,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||
| int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); | |||
| int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); | |||
| #endif | |||
| #if (BUILD_SINGLE) || (BUILD_DOUBLE) | |||
| int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); | |||
| int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); | |||
| @@ -304,12 +308,14 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); | |||
| int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
| int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
| int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
| int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||
| #endif | |||
| #if (BUILD_DOUBLE) || (BUILD_COMPLEX16) | |||
| int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG); | |||
| int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
| @@ -319,6 +325,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); | |||
| int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); | |||
| int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); | |||
| int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); | |||
| @@ -466,6 +473,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG); | |||
| #endif | |||
| #if (BUILD_COMPLEX) || (BUILD_COMPLEX16) | |||
| int cgemm_p, cgemm_q, cgemm_r; | |||
| int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn; | |||
| @@ -644,6 +652,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG); | |||
| int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| int zgemm_p, zgemm_q, zgemm_r; | |||
| int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn; | |||
| @@ -982,6 +991,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
| void (*init)(void); | |||
| int snum_opt, dnum_opt, qnum_opt; | |||
| #if BUILD_SINGLE | |||
| int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG); | |||
| #endif | |||
| @@ -995,14 +1005,14 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||
| int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); | |||
| #endif | |||
| #if BUILD_SINGLE | |||
| #if BUILD_SINGLE | |||
| int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
| int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
| int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
| int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| #if BUILD_DOUBLE | |||
| int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | |||
| int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | |||
| int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); | |||
| @@ -46,56 +46,155 @@ else | |||
| all :: all1 all2 all3 | |||
| endif | |||
| all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| all1targets += xscblat1 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| all1targets += xdcblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| all1targets += xccblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| all1targets += xzcblat1 | |||
| endif | |||
| all1: $(all1targets) | |||
| ifndef CROSS | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./xscblat1 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OMP_NUM_THREADS=2 ./xdcblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OMP_NUM_THREADS=2 ./xccblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OMP_NUM_THREADS=2 ./xzcblat1 | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xscblat1 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xdcblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xccblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xzcblat1 | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| all2targets += xscblat2 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| all2targets += xdcblat2 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| all2targets += xccblat2 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| all2targets += xzcblat2 | |||
| endif | |||
| all2: $(all2targets) | |||
| all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | |||
| ifndef CROSS | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./xscblat2 < sin2 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OMP_NUM_THREADS=2 ./xdcblat2 < din2 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OMP_NUM_THREADS=2 ./xccblat2 < cin2 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OMP_NUM_THREADS=2 ./xzcblat2 < zin2 | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| all3targets += xscblat3 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| all3targets += xdcblat3 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| all3targets += xccblat3 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| all3targets += xzcblat3 | |||
| endif | |||
| all3: $(all3targets) | |||
| all3: xscblat3 xdcblat3 xccblat3 xzcblat3 | |||
| ifndef CROSS | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./xscblat3 < sin3 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OMP_NUM_THREADS=2 ./xdcblat3 < din3 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OMP_NUM_THREADS=2 ./xccblat3 < cin3 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OMP_NUM_THREADS=2 ./xzcblat3 < zin3 | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3 | |||
| endif | |||
| endif | |||
| endif | |||
| all3_3m: xzcblat3_3m xccblat3_3m | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m | |||
| endif | |||
| endif | |||
| @@ -115,13 +214,19 @@ endif | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| # Single real | |||
| xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xscblat1 c_sblat1.o $(stestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| xscblat2: $(stestl2o) c_sblat2.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xscblat2 c_sblat2.o $(stestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| xscblat3: $(stestl3o) c_sblat3.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xscblat3 c_sblat3.o $(stestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| # Double real | |||
| xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| @@ -129,7 +234,10 @@ xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| # Single complex | |||
| xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| @@ -140,7 +248,10 @@ xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME) | |||
| xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| # Double complex | |||
| xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| @@ -152,6 +263,6 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME) | |||
| xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| include $(TOPDIR)/Makefile.tail | |||
| @@ -197,6 +197,19 @@ foreach (float_type ${FLOAT_TYPES}) | |||
| endif () | |||
| endforeach () | |||
| if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
| if (USE_THREAD) | |||
| GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "SINGLE") | |||
| endif () | |||
| endif () | |||
| if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| if (USE_THREAD) | |||
| GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "DOUBLE") | |||
| endif () | |||
| endif () | |||
| if (USE_THREAD) | |||
| GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) | |||
| endif () | |||
| @@ -417,19 +417,63 @@ XBLASOBJS += \ | |||
| endif | |||
| ifneq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS= | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| ifdef SMP | |||
| SBLASOBJS += \ | |||
| sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) \ | |||
| strsv_NUU.$(SUFFIX) strsv_NUN.$(SUFFIX) strsv_NLU.$(SUFFIX) strsv_NLN.$(SUFFIX) \ | |||
| strsv_TUU.$(SUFFIX) strsv_TUN.$(SUFFIX) strsv_TLU.$(SUFFIX) strsv_TLN.$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| ifdef SMP | |||
| SBLASOBJS = sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) | |||
| endif | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ifdef SMP | |||
| DBLASOBJS = dgemv_thread_n.$(SUFFIX) dgemv_thread_t.$(SUFFIX) | |||
| endif | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS= | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| CBLASOBJS= \ | |||
| ctrsv_NUU.$(SUFFIX) ctrsv_NUN.$(SUFFIX) ctrsv_NLU.$(SUFFIX) ctrsv_NLN.$(SUFFIX) \ | |||
| ctrsv_TUU.$(SUFFIX) ctrsv_TUN.$(SUFFIX) ctrsv_TLU.$(SUFFIX) ctrsv_TLN.$(SUFFIX) \ | |||
| ctrsv_RUU.$(SUFFIX) ctrsv_RUN.$(SUFFIX) ctrsv_RLU.$(SUFFIX) ctrsv_RLN.$(SUFFIX) \ | |||
| ctrsv_CUU.$(SUFFIX) ctrsv_CUN.$(SUFFIX) ctrsv_CLU.$(SUFFIX) ctrsv_CLN.$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| all :: | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
| $(CC) -c -UCOMPLEX -UDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< | |||
| sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c | |||
| $(CC) -c -UCOMPLEX -UDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
| $(CC) -c -UCOMPLEX -DDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< | |||
| dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c | |||
| $(CC) -c -UCOMPLEX -DDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
| $(CC) -c -UCOMPLEX -DXDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< | |||
| @@ -437,6 +481,8 @@ qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c | |||
| qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c | |||
| $(CC) -c -UCOMPLEX -DXDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c | |||
| $(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
| @@ -460,6 +506,9 @@ cgbmv_s.$(SUFFIX) cgbmv_s.$(PSUFFIX) : zgbmv_k.c | |||
| cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c | |||
| $(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c | |||
| $(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
| @@ -484,6 +533,7 @@ zgbmv_s.$(SUFFIX) zgbmv_s.$(PSUFFIX) : zgbmv_k.c | |||
| zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c | |||
| $(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c | |||
| $(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
| @@ -509,24 +559,34 @@ xgbmv_s.$(SUFFIX) xgbmv_s.$(PSUFFIX) : zgbmv_k.c | |||
| xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c | |||
| $(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -UCOMPLEX -UDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< | |||
| sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -UCOMPLEX -UDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -UCOMPLEX -DDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< | |||
| dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -UCOMPLEX -DDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -UCOMPLEX -DXDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< | |||
| qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -UCOMPLEX -DXDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
| @@ -550,6 +610,10 @@ cgbmv_thread_s.$(SUFFIX) cgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c | |||
| cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
| @@ -574,6 +638,7 @@ zgbmv_thread_s.$(SUFFIX) zgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c | |||
| zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
| endif | |||
| xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< | |||
| @@ -599,24 +664,32 @@ xgbmv_thread_s.$(SUFFIX) xgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c | |||
| xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c | |||
| $(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | |||
| sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| endif | |||
| ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
| dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| endif | |||
| qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| @@ -640,6 +713,10 @@ cgemv_thread_s.$(SUFFIX) cgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common. | |||
| cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| @@ -664,6 +741,7 @@ zgemv_thread_s.$(SUFFIX) zgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common. | |||
| zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) | |||
| endif | |||
| xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||
| @@ -14,6 +14,24 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
| endif () | |||
| endforeach () | |||
| if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
| string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) | |||
| GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE") | |||
| if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
| GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE") | |||
| endif() | |||
| endforeach() | |||
| endif() | |||
| if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
| foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
| string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) | |||
| GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE") | |||
| if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
| GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE") | |||
| endif() | |||
| endforeach() | |||
| endif() | |||
| set(TRMM_TRSM_SOURCES | |||
| trmm_L.c | |||
| @@ -100,7 +118,24 @@ foreach (float_type ${FLOAT_TYPES}) | |||
| endif() | |||
| endif () | |||
| endforeach () | |||
| if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) | |||
| string(TOLOWER ${gemm_define} gemm_define_LC) | |||
| if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
| GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "DOUBLE" ) | |||
| endif() | |||
| endforeach() | |||
| endif () | |||
| if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
| foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) | |||
| string(TOLOWER ${gemm_define} gemm_define_LC) | |||
| if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||
| GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "SINGLE" ) | |||
| endif() | |||
| endforeach() | |||
| endif () | |||
| # for gemm3m | |||
| if(USE_GEMM3M) | |||
| foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||
| @@ -287,6 +287,60 @@ HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \ | |||
| dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | |||
| endif | |||
| ifneq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS= | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| SBLASOBJS= \ | |||
| strsm_LNUU.$(SUFFIX) strsm_LNUN.$(SUFFIX) strsm_LNLU.$(SUFFIX) strsm_LNLN.$(SUFFIX) \ | |||
| strsm_LTUU.$(SUFFIX) strsm_LTUN.$(SUFFIX) strsm_LTLU.$(SUFFIX) strsm_LTLN.$(SUFFIX) \ | |||
| strsm_RNUU.$(SUFFIX) strsm_RNUN.$(SUFFIX) strsm_RNLU.$(SUFFIX) strsm_RNLN.$(SUFFIX) \ | |||
| strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \ | |||
| ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \ | |||
| ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX) | |||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
| SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) | |||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
| SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | |||
| endif | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX) | |||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
| DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | |||
| endif | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS= | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| CBLASOBJS= \ | |||
| cherk_UN.$(SUFFIX) cherk_UC.$(SUFFIX) cherk_LN.$(SUFFIX) cherk_LC.$(SUFFIX) \ | |||
| cherk_kernel_UN.$(SUFFIX) cherk_kernel_UC.$(SUFFIX) \ | |||
| cherk_kernel_LN.$(SUFFIX) cherk_kernel_LC.$(SUFFIX) \ | |||
| ctrsm_LNUU.$(SUFFIX) ctrsm_LNUN.$(SUFFIX) ctrsm_LNLU.$(SUFFIX) ctrsm_LNLN.$(SUFFIX) \ | |||
| ctrsm_LTUU.$(SUFFIX) ctrsm_LTUN.$(SUFFIX) ctrsm_LTLU.$(SUFFIX) ctrsm_LTLN.$(SUFFIX) \ | |||
| ctrsm_LRUU.$(SUFFIX) ctrsm_LRUN.$(SUFFIX) ctrsm_LRLU.$(SUFFIX) ctrsm_LRLN.$(SUFFIX) \ | |||
| ctrsm_LCUU.$(SUFFIX) ctrsm_LCUN.$(SUFFIX) ctrsm_LCLU.$(SUFFIX) ctrsm_LCLN.$(SUFFIX) \ | |||
| ctrsm_RNUU.$(SUFFIX) ctrsm_RNUN.$(SUFFIX) ctrsm_RNLU.$(SUFFIX) ctrsm_RNLN.$(SUFFIX) \ | |||
| ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \ | |||
| ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \ | |||
| ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX) | |||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||
| CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX) | |||
| endif | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| all :: | |||
| shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
| @@ -56,12 +56,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||
| if (!(mode & BLAS_COMPLEX)) { | |||
| switch (mode & BLAS_PREC) { | |||
| #if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) | |||
| case BLAS_SINGLE: | |||
| mask = SGEMM_UNROLL_MN - 1; | |||
| break; | |||
| #endif | |||
| #if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) | |||
| case BLAS_DOUBLE: | |||
| mask = DGEMM_UNROLL_MN - 1; | |||
| break; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| case BLAS_XDOUBLE: | |||
| mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; | |||
| @@ -70,12 +74,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||
| } | |||
| } else { | |||
| switch (mode & BLAS_PREC) { | |||
| #ifdef BUILD_COMPLEX | |||
| case BLAS_SINGLE: | |||
| mask = CGEMM_UNROLL_MN - 1; | |||
| break; | |||
| #endif | |||
| #ifdef BUILD_COMPLEX16 | |||
| case BLAS_DOUBLE: | |||
| mask = ZGEMM_UNROLL_MN - 1; | |||
| break; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| case BLAS_XDOUBLE: | |||
| mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; | |||
| @@ -459,13 +459,16 @@ blas_queue_t *tscq; | |||
| } else | |||
| #endif | |||
| if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) { | |||
| #ifdef BUILD_DOUBLE | |||
| sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| #endif | |||
| } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { | |||
| #ifdef BUILD_SINGLE | |||
| sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| } else { | |||
| #endif | |||
| } else { | |||
| /* Other types in future */ | |||
| } | |||
| } else { | |||
| @@ -476,11 +479,15 @@ blas_queue_t *tscq; | |||
| } else | |||
| #endif | |||
| if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ | |||
| #ifdef BUILD_COMPLEX16 | |||
| sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| #endif | |||
| } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { | |||
| #ifdef BUILD_COMPLEX | |||
| sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| #endif | |||
| } else { | |||
| /* Other types in future */ | |||
| } | |||
| @@ -315,12 +315,15 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ | |||
| } else | |||
| #endif | |||
| if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ | |||
| #if defined ( BUILD_DOUBLE) || defined (BUILD_COMPLEX16) | |||
| sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| #endif | |||
| } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE){ | |||
| #if defined (BUILD_SINGLE) || defined (BUILD_COMPLEX) | |||
| sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| #endif | |||
| } else { | |||
| /* Other types in future */ | |||
| } | |||
| @@ -332,15 +335,24 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ | |||
| } else | |||
| #endif | |||
| if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ | |||
| #ifdef BUILD_COMPLEX16 | |||
| sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| #else | |||
| fprintf(stderr,"UNHANDLED COMPLEX16\n"); | |||
| #endif | |||
| } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { | |||
| #ifdef BUILD_COMPLEX | |||
| sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) | |||
| + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); | |||
| #else | |||
| fprintf(stderr,"UNHANDLED COMPLEX\n"); | |||
| #endif | |||
| } else { | |||
| /* Other types in future */ | |||
| } | |||
| } | |||
| if (!sb) fprintf(stderr,"SB not declared!!!\n"); | |||
| queue->sb=sb; | |||
| } | |||
| } | |||
| @@ -2201,11 +2201,17 @@ static void *alloc_mmap(void *address){ | |||
| #endif | |||
| #endif | |||
| allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||
| start = (BLASULONG)map_address; | |||
| current = (SCALING - 1) * BUFFER_SIZE; | |||
| #ifdef BUILD_DOUBLE | |||
| allocsize = DGEMM_P * DGEMM_Q * sizeof(double); | |||
| #elif defined(BUILD_COMPLEX16) | |||
| allocsize = ZGEMM_P * ZGEMM_Q * sizeof(double); | |||
| #elif defined(BUILD_COMPLEX) | |||
| allocsize = CGEMM_P * CGEMM_Q * sizeof(double); | |||
| #else | |||
| allocsize = SGEMM_P * SGEMM_Q * sizeof(double); | |||
| #endif | |||
| start = (BLASULONG)map_address; | |||
| current = (SCALING - 1) * BUFFER_SIZE; | |||
| while(current > 0) { | |||
| *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; | |||
| @@ -33,6 +33,18 @@ endif | |||
| ifndef BUILD_HALF | |||
| BUILD_HALF = 0 | |||
| endif | |||
| ifndef BUILD_SINGLE | |||
| BUILD_SINGLE = 0 | |||
| endif | |||
| ifndef BUILD_DOUBLE | |||
| BUILD_DOUBLE = 0 | |||
| endif | |||
| ifndef BUILD_COMPLEX | |||
| BUILD_COMPLEX = 0 | |||
| endif | |||
| ifndef BUILD_COMPLEX16 | |||
| BUILD_COMPLEX16 = 0 | |||
| endif | |||
| ifeq ($(OSNAME), WINNT) | |||
| ifeq ($(F_COMPILER), GFORTRAN) | |||
| @@ -108,10 +120,10 @@ dll : ../$(LIBDLLNAME) | |||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) | |||
| $(LIBPREFIX).def : gensymbol | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||
| perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
| libgoto_hpl.def : gensymbol | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) | |||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
| ifeq ($(OSNAME), Darwin) | |||
| INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib | |||
| @@ -246,23 +258,23 @@ static : ../$(LIBNAME) | |||
| rm -f goto.$(SUFFIX) | |||
| osx.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
| perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
| aix.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
| perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
| objcopy.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
| perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
| objconv.def : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) | |||
| perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) | |||
| test : linktest.c | |||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | |||
| rm -f linktest | |||
| linktest.c : gensymbol ../Makefile.system ../getarch.c | |||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c | |||
| perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c | |||
| clean :: | |||
| @rm -f *.def *.dylib __.SYMDEF* *.renamed | |||
| @@ -83,8 +83,12 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) | |||
| GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||
| #sdsdot, dsdot | |||
| if (BUILD_SINGLE OR BUILD_DOUBLE) | |||
| GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||
| endif () | |||
| if (BUILD_DOUBLE) | |||
| GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") | |||
| endif () | |||
| # trmm is trsm with a compiler flag set | |||
| GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) | |||
| @@ -167,4 +171,31 @@ if (NOT DEFINED NO_LAPACK) | |||
| GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) | |||
| endif () | |||
| if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
| GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "SINGLE") | |||
| GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "SINGLE") | |||
| endif () | |||
| if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "DOUBLE") | |||
| GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "DOUBLE") | |||
| endif () | |||
| add_library(interface OBJECT ${OPENBLAS_SRC}) | |||
| @@ -329,7 +329,10 @@ CCBLAS3OBJS = \ | |||
| cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ | |||
| cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ | |||
| cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ | |||
| cblas_cgeadd.$(SUFFIX) cblas_xerbla.$(SUFFIX) | |||
| cblas_cgeadd.$(SUFFIX) | |||
| CXERBLAOBJ = \ | |||
| cblas_xerbla.$(SUFFIX) | |||
| @@ -391,6 +394,8 @@ ZBLAS2OBJS += $(CZBLAS2OBJS) | |||
| ZBLAS3OBJS += $(CZBLAS3OBJS) | |||
| SHEXTOBJS += $(CSHEXTOBJS) | |||
| CBAUXOBJS += $(CXERBLAOBJ) | |||
| endif | |||
| SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | |||
| @@ -434,13 +439,11 @@ QLAPACKOBJS = \ | |||
| # cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | |||
| # clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) | |||
| CLAPACKOBJS = \ | |||
| cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ | |||
| cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ | |||
| clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) | |||
| #ZLAPACKOBJS = \ | |||
| # zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ | |||
| # zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ | |||
| @@ -469,8 +472,42 @@ ZBLASOBJS += $(ZLAPACKOBJS) | |||
| endif | |||
| FUNCOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) | |||
| ifneq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS= | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| SBLASOBJS = dsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) strsm.$(SUFFIX) \ | |||
| sgetrs.$(SUFFIX) sgetrf.$(SUFFIX) spotf2.$(SUFFIX) spotrf.$(SUFFIX) \ | |||
| ssyrk.$(SUFFIX) sgemv.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| SBLASOBJS = \ | |||
| sdot.$(SUFFIX) srot.$(SUFFIX) snrm2.$(SUFFIX) sswap.$(SUFFIX) \ | |||
| isamax.$(SUFFIX) saxpy.$(SUFFIX) sscal.$(SUFFIX) scopy.$(SUFFIX) \ | |||
| sgemv.$(SUFFIX) sgemm.$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| DBLASOBJS = \ | |||
| ddot.$(SUFFIX) drot.$(SUFFIX) dnrm2.$(SUFFIX) dswap.$(SUFFIX) \ | |||
| idamax.$(SUFFIX) daxpy.$(SUFFIX) dscal.$(SUFFIX) dcopy.$(SUFFIX) \ | |||
| dgemv.$(SUFFIX) dgemm.$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS= | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| CBLASOBJS = cgetrs.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) cgetrf.$(SUFFIX) \ | |||
| cpotrf.$(SUFFIX) ctrsm.$(SUFFIX) cblas_cdotc_sub.$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| FUNCOBJS = $(SHEXTOBJS) $(CXERBLAOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) | |||
| $(info FUNCOBJS = {[$(FUNCOBJS)]} ) | |||
| ifdef EXPRECISION | |||
| FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
| endif | |||
| @@ -481,6 +518,7 @@ endif | |||
| FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) | |||
| include $(TOPDIR)/Makefile.tail | |||
| all :: libs | |||
| @@ -503,11 +541,14 @@ level1 : $(BEXTOBJS) $(SHBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $( | |||
| level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||
| level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| aux : $(CBAUXOBJS) | |||
| $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
| $(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | |||
| $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS | |||
| $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS | |||
| srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | |||
| $(CC) $(CFLAGS) -c $< -o $(@F) | |||
| @@ -2268,3 +2309,4 @@ cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c | |||
| cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| @@ -91,6 +91,59 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") | |||
| if ((BUILD_COMPLEX OR BUILD_DOUBLE) AND NOT BUILD_SINGLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE") | |||
| if (DEFINED SMAXKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE") | |||
| endif () | |||
| if (DEFINED SMINKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE") | |||
| endif () | |||
| if (DEFINED ISMINKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE") | |||
| endif () | |||
| if (DEFINED ISMAXKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") | |||
| endif () | |||
| if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") | |||
| if (DEFINED DMAXKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") | |||
| endif () | |||
| if (DEFINED DMINKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") | |||
| endif () | |||
| if (DEFINED IDMINKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") | |||
| endif () | |||
| if (DEFINED IDMAXKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | |||
| endif () | |||
| # Makefile.L2 | |||
| GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) | |||
| GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) | |||
| @@ -124,7 +177,14 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) | |||
| endif () | |||
| endforeach () | |||
| if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") | |||
| endif () | |||
| if (BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") | |||
| endif () | |||
| # Makefile.L3 | |||
| set(USE_TRMM false) | |||
| if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE)) | |||
| @@ -159,6 +219,38 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) | |||
| endforeach() | |||
| if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") | |||
| if (DGEMMINCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") | |||
| endif () | |||
| if (DGEMMITCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") | |||
| endif () | |||
| if (DGEMMONCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") | |||
| endif () | |||
| if (DGEMMOTCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") | |||
| endif () | |||
| if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE") | |||
| if (SGEMMINCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| if (SGEMMITCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| if (SGEMMONCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| if (SGEMMOTCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE") | |||
| endif () | |||
| foreach (float_type ${FLOAT_TYPES}) | |||
| string(SUBSTRING ${float_type} 0 1 float_char) | |||
| @@ -499,7 +591,31 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| #geadd | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) | |||
| endforeach () | |||
| if (BUILD_DOUBLE AND NOT BUILD_SINGLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE") | |||
| endif () | |||
| # Makefile.LA | |||
| if(NOT NO_LAPACK) | |||
| @@ -526,6 +642,28 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type}) | |||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type}) | |||
| endforeach() | |||
| if (BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
| if (NOT DEFINED SNEG_TCOPY) | |||
| set(SNEG_TCOPY ../generic/neg_tcopy.c) | |||
| endif () | |||
| if (NOT DEFINED SLASWP_NCOPY) | |||
| set(SLASWP_NCOPY ../generic/laswp_ncopy.c) | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}_${SGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}_${SGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "SINGLE") | |||
| endif() | |||
| if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| if (NOT DEFINED DNEG_TCOPY) | |||
| set(DNEG_TCOPY ../generic/neg_tcopy.c) | |||
| endif () | |||
| if (NOT DEFINED DLASWP_NCOPY) | |||
| set(DLASWP_NCOPY ../generic/laswp_ncopy.c) | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "DOUBLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "DOUBLE") | |||
| endif() | |||
| endif() | |||
| if (${DYNAMIC_ARCH}) | |||
| @@ -557,8 +695,147 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||
| GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) | |||
| endforeach () | |||
| if (BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| endif () | |||
| if (BUILD_DOUBLE AND NOT BUILD_SINGLE) | |||
| GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "SINGLE") | |||
| GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "SINGLE") | |||
| if (SGEMMINCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| if (SGEMMITCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| if (SGEMMONCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| if (SGEMMOTCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") | |||
| GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") | |||
| endif () | |||
| if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
| GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") | |||
| GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") | |||
| endif () | |||
| if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX) | |||
| GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX") | |||
| if (DEFINED CMAXKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${CMAXKERNEL}" "" "max_k" false "" "" false "COMPLEX") | |||
| endif () | |||
| if (DEFINED CMINKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${CMINKERNEL}" "USE_MIN" "min_k" false "" "" false "COMPLEX") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${ICAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${ICAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "COMPLEX") | |||
| if (DEFINED ICMAXKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${ICMAXKERNEL}" "" "i*max_k" false "" "" false "COMPLEX") | |||
| endif () | |||
| if (DEFINED ICMINKERNEL) | |||
| GenerateNamedObjects("${KERNELDIR}/${ICMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "COMPLEX") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${CASUMKERNEL}" "" "asum_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "" "axpy_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CNRM2KERNEL}" "" "nrm2_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CROTKERNEL}" "" "rot_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CSCALKERNEL}" "" "scal_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CSWAPKERNEL}" "" "swap_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CAXPBYKERNEL}" "" "axpby_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CSUMKERNEL}" "" "sum_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "" "dotu_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "CONJ" "dotc_k" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "" "gemv_n" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false "COMPLEX") | |||
| if (CGEMMINCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMINCOPY}" "COMPLEX" "${CGEMMINCOPYOBJ}" false "" "" true "COMPLEX") | |||
| endif () | |||
| if (CGEMMITCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMITCOPY}" "COMPLEX" "${CGEMMITCOPYOBJ}" false "" "" true "COMPLEX") | |||
| endif () | |||
| if (CGEMMONCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMONCOPY}" "COMPLEX" "${CGEMMONCOPYOBJ}" false "" "" true "COMPLEX") | |||
| endif () | |||
| if (CGEMMOTCOPY) | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX") | |||
| endif () | |||
| GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "COMPLEX") | |||
| endif () | |||
| endif () | |||
| add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) | |||
| set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}") | |||
| @@ -573,7 +850,7 @@ if (${DYNAMIC_ARCH}) | |||
| set(BUILD_KERNEL 1) | |||
| set(KDIR "") | |||
| set(TSUFFIX "_${TARGET_CORE}") | |||
| set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") | |||
| set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") | |||
| build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") | |||
| set(ADD_COMMONOBJS 0) | |||
| endforeach() | |||
| @@ -41,6 +41,9 @@ ifdef NO_AVX2 | |||
| endif | |||
| ifdef TARGET_CORE | |||
| ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) | |||
| override CFLAGS += -msse3 | |||
| endif | |||
| ifeq ($(TARGET_CORE), COOPERLAKE) | |||
| override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | |||
| ifeq ($(GCCVERSIONGTEQ10), 1) | |||
| @@ -186,31 +186,46 @@ ifndef XHEMV_M_KERNEL | |||
| XHEMV_M_KERNEL = ../generic/zhemv_k.c | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" | |||
| SBLASOBJS += \ | |||
| sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \ | |||
| sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS += \ | |||
| ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \ | |||
| sger_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS += \ | |||
| dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \ | |||
| dger_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| QBLASOBJS += \ | |||
| qgemv_n$(TSUFFIX).$(SUFFIX) qgemv_t$(TSUFFIX).$(SUFFIX) qsymv_U$(TSUFFIX).$(SUFFIX) qsymv_L$(TSUFFIX).$(SUFFIX) \ | |||
| qger_k$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| SBLASOBJS += \ | |||
| sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) | |||
| CBLASOBJS += \ | |||
| cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
| cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) \ | |||
| csymv_U$(TSUFFIX).$(SUFFIX) csymv_L$(TSUFFIX).$(SUFFIX) \ | |||
| chemv_U$(TSUFFIX).$(SUFFIX) chemv_L$(TSUFFIX).$(SUFFIX) chemv_V$(TSUFFIX).$(SUFFIX) chemv_M$(TSUFFIX).$(SUFFIX) \ | |||
| cgeru_k$(TSUFFIX).$(SUFFIX) cgerc_k$(TSUFFIX).$(SUFFIX) cgerv_k$(TSUFFIX).$(SUFFIX) cgerd_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| CBLASOBJS += \ | |||
| cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
| cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) | |||
| DBLASOBJS += \ | |||
| dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) | |||
| ZBLASOBJS += \ | |||
| zgemv_n$(TSUFFIX).$(SUFFIX) zgemv_t$(TSUFFIX).$(SUFFIX) zgemv_r$(TSUFFIX).$(SUFFIX) zgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
| zgemv_o$(TSUFFIX).$(SUFFIX) zgemv_u$(TSUFFIX).$(SUFFIX) zgemv_s$(TSUFFIX).$(SUFFIX) zgemv_d$(TSUFFIX).$(SUFFIX) \ | |||
| zsymv_U$(TSUFFIX).$(SUFFIX) zsymv_L$(TSUFFIX).$(SUFFIX) \ | |||
| zhemv_U$(TSUFFIX).$(SUFFIX) zhemv_L$(TSUFFIX).$(SUFFIX) zhemv_V$(TSUFFIX).$(SUFFIX) zhemv_M$(TSUFFIX).$(SUFFIX) \ | |||
| zgeru_k$(TSUFFIX).$(SUFFIX) zgerc_k$(TSUFFIX).$(SUFFIX) zgerv_k$(TSUFFIX).$(SUFFIX) zgerd_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| XBLASOBJS += \ | |||
| xgemv_n$(TSUFFIX).$(SUFFIX) xgemv_t$(TSUFFIX).$(SUFFIX) xgemv_r$(TSUFFIX).$(SUFFIX) xgemv_c$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -219,17 +234,21 @@ XBLASOBJS += \ | |||
| xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \ | |||
| xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX) | |||
| ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" | |||
| $(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@ | |||
| $(KDIR)sgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@ | |||
| endif | |||
| ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
| $(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@ | |||
| $(KDIR)dgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@ | |||
| endif | |||
| $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL) | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@ | |||
| @@ -237,6 +256,8 @@ $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)qgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVTKERNEL) | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@ | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| $(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ | |||
| @@ -260,6 +281,10 @@ $(KDIR)cgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE | |||
| $(KDIR)cgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| $(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ | |||
| @@ -284,6 +309,7 @@ $(KDIR)zgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE | |||
| $(KDIR)zgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ | |||
| endif | |||
| $(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL) | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ | |||
| @@ -309,17 +335,25 @@ $(KDIR)xgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE | |||
| $(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVTKERNEL) | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| $(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@ | |||
| $(KDIR)ssymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_L_KERNEL) $(SSYMV_L_PARAM) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@ | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| $(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@ | |||
| $(KDIR)dsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_L_KERNEL) $(DSYMV_L_PARAM) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@ | |||
| endif | |||
| $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@ | |||
| @@ -327,17 +361,23 @@ $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)qsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_L_KERNEL) | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@ | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| $(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@ | |||
| $(KDIR)csymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_L_KERNEL) $(CSYMV_L_PARAM) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@ | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| $(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@ | |||
| $(KDIR)zsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_L_KERNEL) $(ZSYMV_L_PARAM) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@ | |||
| endif | |||
| $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@ | |||
| @@ -345,15 +385,23 @@ $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)xsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_L_KERNEL) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@ | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| $(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE $< -o $@ | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| $(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE $< -o $@ | |||
| endif | |||
| $(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM) | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@ | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| $(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@ | |||
| @@ -365,6 +413,9 @@ $(KDIR)cgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)cgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERCKERNEL) $(CGERPARAM) | |||
| $(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@ | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| $(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@ | |||
| @@ -377,6 +428,7 @@ $(KDIR)zgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)zgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERCKERNEL) $(ZGERPARAM) | |||
| $(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@ | |||
| endif | |||
| $(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM) | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@ | |||
| @@ -390,6 +442,8 @@ $(KDIR)xgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM) | |||
| $(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@ | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| $(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@ | |||
| @@ -401,6 +455,9 @@ $(KDIR)chemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_V$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)chemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_M_KERNEL) $(CHEMV_L_PARAM) ../symcopy.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| $(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@ | |||
| @@ -413,7 +470,7 @@ $(KDIR)zhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)zhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_M_KERNEL) $(ZHEMV_L_PARAM) ../symcopy.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ | |||
| endif | |||
| $(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL) | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@ | |||
| @@ -426,3 +483,4 @@ $(KDIR)xhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER | |||
| $(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h | |||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ | |||
| @@ -100,8 +100,10 @@ SHKERNELOBJS += \ | |||
| $(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | |||
| SKERNELOBJS += \ | |||
| sgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
| sgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ | |||
| $(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ) | |||
| @@ -110,28 +112,36 @@ SKERNELOBJS += \ | |||
| sgemm_direct$(TSUFFIX).$(SUFFIX) \ | |||
| sgemm_direct_performant$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| endif | |||
| ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
| DKERNELOBJS += \ | |||
| dgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| dgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
| $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ | |||
| $(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) | |||
| endif | |||
| QKERNELOBJS += \ | |||
| qgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||
| $(QGEMMINCOPYOBJ) $(QGEMMITCOPYOBJ) \ | |||
| $(QGEMMONCOPYOBJ) $(QGEMMOTCOPYOBJ) | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CKERNELOBJS += \ | |||
| cgemm_kernel_n$(TSUFFIX).$(SUFFIX) cgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ | |||
| cgemm_kernel_l$(TSUFFIX).$(SUFFIX) cgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ | |||
| $(CGEMMINCOPYOBJ) $(CGEMMITCOPYOBJ) \ | |||
| $(CGEMMONCOPYOBJ) $(CGEMMOTCOPYOBJ) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZKERNELOBJS += \ | |||
| zgemm_kernel_n$(TSUFFIX).$(SUFFIX) zgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ | |||
| zgemm_kernel_l$(TSUFFIX).$(SUFFIX) zgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ | |||
| $(ZGEMMINCOPYOBJ) $(ZGEMMITCOPYOBJ) \ | |||
| $(ZGEMMONCOPYOBJ) $(ZGEMMOTCOPYOBJ) | |||
| endif | |||
| XKERNELOBJS += \ | |||
| xgemm_kernel_n$(TSUFFIX).$(SUFFIX) xgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -153,38 +163,48 @@ ifeq ($(BUILD_HALF),1) | |||
| SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS += \ | |||
| sgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_kernel_RN$(TSUFFIX).$(SUFFIX) strmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_kernel_LN$(TSUFFIX).$(SUFFIX) strsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS += \ | |||
| dgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| QBLASOBJS += \ | |||
| qgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS += \ | |||
| cgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS += \ | |||
| cgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS += \ | |||
| zgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -194,7 +214,8 @@ ZBLASOBJS += \ | |||
| ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
| ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
| ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| XBLASOBJS += \ | |||
| xgemm_beta$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -205,7 +226,7 @@ XBLASOBJS += \ | |||
| xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ | |||
| xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ | |||
| xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
| xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
| xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(USE_GEMM3M), 1) | |||
| @@ -215,6 +236,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS += \ | |||
| strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -223,7 +245,10 @@ SBLASOBJS += \ | |||
| strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS += \ | |||
| strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -231,10 +256,15 @@ SBLASOBJS += \ | |||
| strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS += \ | |||
| ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS += \ | |||
| dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -254,6 +284,7 @@ DBLASOBJS += \ | |||
| dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| QBLASOBJS += \ | |||
| qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -273,8 +304,9 @@ QBLASOBJS += \ | |||
| qtrsm_outucopy$(TSUFFIX).$(SUFFIX) qtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
| qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| qsymm_iutcopy$(TSUFFIX).$(SUFFIX) qsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS += \ | |||
| ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -284,6 +316,13 @@ CBLASOBJS += \ | |||
| ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_outucopy$(TSUFFIX).$(SUFFIX) ctrmm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS += \ | |||
| ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -291,12 +330,10 @@ CBLASOBJS += \ | |||
| ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_outucopy$(TSUFFIX).$(SUFFIX) ctrsm_outncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ | |||
| csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
| ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS += \ | |||
| ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -318,6 +355,7 @@ ZBLASOBJS += \ | |||
| zsymm_outcopy$(TSUFFIX).$(SUFFIX) zsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| zhemm_iutcopy$(TSUFFIX).$(SUFFIX) zhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
| zhemm_outcopy$(TSUFFIX).$(SUFFIX) zhemm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| XBLASOBJS += \ | |||
| xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -343,6 +381,7 @@ XBLASOBJS += \ | |||
| ifeq ($(USE_GEMM3M), 1) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS += \ | |||
| cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
| cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -362,7 +401,9 @@ CBLASOBJS += \ | |||
| chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ | |||
| chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ | |||
| chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS += \ | |||
| zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
| zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -382,6 +423,7 @@ ZBLASOBJS += \ | |||
| zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ | |||
| zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ | |||
| zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| XBLASOBJS += \ | |||
| xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -406,20 +448,25 @@ XBLASOBJS += \ | |||
| endif | |||
| ###### BLAS extensions ##### | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS += \ | |||
| somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| sgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS += \ | |||
| domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| dgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS += \ | |||
| comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -430,7 +477,9 @@ CBLASOBJS += \ | |||
| cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ | |||
| cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | |||
| cgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS += \ | |||
| zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ | |||
| zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ | |||
| @@ -441,6 +490,7 @@ ZBLASOBJS += \ | |||
| zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ | |||
| zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ | |||
| zgeadd_k$(TSUFFIX).$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_HALF), 1) | |||
| SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||
| @@ -114,6 +114,7 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| #endif | |||
| #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | |||
| 0, 0, 0, | |||
| SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | |||
| #ifdef SGEMM_DEFAULT_UNROLL_MN | |||
| @@ -121,7 +122,7 @@ gotoblas_t TABLE_NAME = { | |||
| #else | |||
| MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), | |||
| #endif | |||
| #endif | |||
| #ifdef HAVE_EXCLUSIVE_CACHE | |||
| 1, | |||
| @@ -129,19 +130,38 @@ gotoblas_t TABLE_NAME = { | |||
| 0, | |||
| #endif | |||
| #if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
| samax_kTS, samin_kTS, smax_kTS, smin_kTS, | |||
| isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | |||
| snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS, | |||
| dsdot_kTS, | |||
| srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||
| sgemv_nTS, sgemv_tTS, sger_kTS, | |||
| snrm2_kTS, sasum_kTS, | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| ssum_kTS, | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
| scopy_kTS, sdot_kTS, | |||
| // dsdot_kTS, | |||
| srot_kTS, saxpy_kTS, | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | |||
| sscal_kTS, | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
| sswap_kTS, | |||
| sgemv_nTS, sgemv_tTS, | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| sger_kTS, | |||
| ssymv_LTS, ssymv_UTS, | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | |||
| #ifdef ARCH_X86_64 | |||
| sgemm_directTS, | |||
| sgemm_direct_performantTS, | |||
| #endif | |||
| sgemm_kernelTS, sgemm_betaTS, | |||
| #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N | |||
| sgemm_incopyTS, sgemm_itcopyTS, | |||
| @@ -149,6 +169,9 @@ gotoblas_t TABLE_NAME = { | |||
| sgemm_oncopyTS, sgemm_otcopyTS, | |||
| #endif | |||
| sgemm_oncopyTS, sgemm_otcopyTS, | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) | |||
| strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS, | |||
| #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N | |||
| strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS, | |||
| @@ -159,6 +182,8 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS, | |||
| strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS, | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS, | |||
| #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N | |||
| strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS, | |||
| @@ -175,13 +200,16 @@ gotoblas_t TABLE_NAME = { | |||
| ssymm_outcopyTS, ssymm_oltcopyTS, | |||
| #endif | |||
| ssymm_outcopyTS, ssymm_oltcopyTS, | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) | |||
| #ifndef NO_LAPACK | |||
| sneg_tcopyTS, slaswp_ncopyTS, | |||
| #else | |||
| NULL,NULL, | |||
| #endif | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
| 0, 0, 0, | |||
| DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, | |||
| #ifdef DGEMM_DEFAULT_UNROLL_MN | |||
| @@ -189,14 +217,36 @@ gotoblas_t TABLE_NAME = { | |||
| #else | |||
| MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||
| #endif | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
| damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, | |||
| idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, | |||
| dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS, | |||
| drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS, | |||
| dgemv_nTS, dgemv_tTS, dger_kTS, | |||
| dnrm2_kTS, dasum_kTS, | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) | |||
| dsum_kTS, | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
| dcopy_kTS, ddot_kTS, | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) | |||
| dsdot_kTS, | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
| drot_kTS, | |||
| daxpy_kTS, | |||
| dscal_kTS, | |||
| dswap_kTS, | |||
| dgemv_nTS, dgemv_tTS, | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) | |||
| dger_kTS, | |||
| dsymv_LTS, dsymv_UTS, | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
| dgemm_kernelTS, dgemm_betaTS, | |||
| #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N | |||
| dgemm_incopyTS, dgemm_itcopyTS, | |||
| @@ -204,6 +254,9 @@ gotoblas_t TABLE_NAME = { | |||
| dgemm_oncopyTS, dgemm_otcopyTS, | |||
| #endif | |||
| dgemm_oncopyTS, dgemm_otcopyTS, | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) | |||
| dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS, | |||
| #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N | |||
| dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS, | |||
| @@ -237,6 +290,8 @@ gotoblas_t TABLE_NAME = { | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| 0, 0, 0, | |||
| @@ -291,6 +346,7 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| #if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
| 0, 0, 0, | |||
| CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, | |||
| #ifdef CGEMM_DEFAULT_UNROLL_MN | |||
| @@ -298,21 +354,34 @@ gotoblas_t TABLE_NAME = { | |||
| #else | |||
| MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), | |||
| #endif | |||
| camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, | |||
| cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS, | |||
| cdotu_kTS, cdotc_kTS, csrot_kTS, | |||
| caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS, | |||
| #endif | |||
| #if (BUILD_COMPLEX) | |||
| cnrm2_kTS, casum_kTS, csum_kTS, | |||
| #endif | |||
| #if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
| ccopy_kTS, cdotu_kTS, cdotc_kTS, | |||
| #endif | |||
| #if (BUILD_COMPLEX) | |||
| csrot_kTS, | |||
| #endif | |||
| #if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
| caxpy_kTS, | |||
| caxpyc_kTS, | |||
| cscal_kTS, | |||
| cswap_kTS, | |||
| cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS, | |||
| cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS, | |||
| #endif | |||
| #if (BUILD_COMPLEX) | |||
| cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS, | |||
| csymv_LTS, csymv_UTS, | |||
| chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS, | |||
| #endif | |||
| #if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
| cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS, | |||
| cgemm_betaTS, | |||
| #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N | |||
| cgemm_incopyTS, cgemm_itcopyTS, | |||
| #else | |||
| @@ -332,6 +401,8 @@ gotoblas_t TABLE_NAME = { | |||
| #endif | |||
| ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS, | |||
| ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS, | |||
| #endif | |||
| #if (BUILD_COMPLEX) | |||
| ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS, | |||
| ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS, | |||
| @@ -361,7 +432,7 @@ gotoblas_t TABLE_NAME = { | |||
| 0, 0, 0, | |||
| #if defined(USE_GEMM3M) | |||
| #if (USE_GEMM3M) | |||
| #ifdef CGEMM3M_DEFAULT_UNROLL_M | |||
| CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), | |||
| #else | |||
| @@ -419,13 +490,20 @@ gotoblas_t TABLE_NAME = { | |||
| NULL, NULL, | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| #if (BUILD_COMPLEX || BUILD_COMPLEX16) | |||
| #ifndef NO_LAPACK | |||
| cneg_tcopyTS, claswp_ncopyTS, | |||
| cneg_tcopyTS, | |||
| claswp_ncopyTS, | |||
| #else | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| #if BUILD_COMPLEX16 == 1 | |||
| 0, 0, 0, | |||
| ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, | |||
| #ifdef ZGEMM_DEFAULT_UNROLL_MN | |||
| @@ -495,7 +573,7 @@ gotoblas_t TABLE_NAME = { | |||
| zhemm_outcopyTS, zhemm_oltcopyTS, | |||
| 0, 0, 0, | |||
| #if defined(USE_GEMM3M) | |||
| #if (USE_GEMM3M) | |||
| #ifdef ZGEMM3M_DEFAULT_UNROLL_M | |||
| ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), | |||
| #else | |||
| @@ -560,6 +638,8 @@ gotoblas_t TABLE_NAME = { | |||
| NULL, NULL, | |||
| #endif | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| 0, 0, 0, | |||
| @@ -626,7 +706,7 @@ gotoblas_t TABLE_NAME = { | |||
| xhemm_outcopyTS, xhemm_oltcopyTS, | |||
| 0, 0, 0, | |||
| #if defined(USE_GEMM3M) | |||
| #if (USE_GEMM3M) | |||
| QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), | |||
| xgemm3m_kernelTS, | |||
| @@ -691,52 +771,112 @@ gotoblas_t TABLE_NAME = { | |||
| init_parameter, | |||
| SNUMOPT, DNUMOPT, QNUMOPT, | |||
| #if BUILD_SINGLE == 1 | |||
| saxpby_kTS, | |||
| #endif | |||
| #if BUILD_DOUBLE == 1 | |||
| daxpby_kTS, | |||
| #endif | |||
| #if BUILD_COMPLEX == 1 | |||
| caxpby_kTS, | |||
| #endif | |||
| #if BUILD_COMPLEX16== 1 | |||
| zaxpby_kTS, | |||
| #endif | |||
| saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS, | |||
| #if BUILD_SINGLE == 1 | |||
| somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS, | |||
| #endif | |||
| #if BUILD_DOUBLE== 1 | |||
| domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS, | |||
| #endif | |||
| #if BUILD_COMPLEX == 1 | |||
| comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS, | |||
| comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS, | |||
| #endif | |||
| #if BUILD_COMPLEX16 == 1 | |||
| zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, | |||
| zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, | |||
| #endif | |||
| #if BUILD_DOUBLE== 1 | |||
| dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, | |||
| #endif | |||
| #if BUILD_COMPLEX== 1 | |||
| cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, | |||
| cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, | |||
| zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, | |||
| #endif | |||
| sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS | |||
| #if BUILD_SINGLE == 1 | |||
| sgeadd_kTS, | |||
| #endif | |||
| #if BUILD_DOUBLE==1 | |||
| dgeadd_kTS, | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| cgeadd_kTS, | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| zgeadd_kTS | |||
| #endif | |||
| }; | |||
| #if defined(ARCH_ARM64) | |||
| #if (ARCH_ARM64) | |||
| static void init_parameter(void) { | |||
| #if defined(BUILD_HALF) | |||
| #if (BUILD_HALF) | |||
| TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE == 1 | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #if defined(BUILD_HALF) | |||
| #if (BUILD_HALF) | |||
| TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_DOUBLE== 1 | |||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_COMPLEX== 1 | |||
| TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if defined(BUILD_HALF) | |||
| #if (BUILD_HALF) | |||
| TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
| #endif | |||
| #if BUILD_DOUBLE==1 | |||
| TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| @@ -747,7 +887,7 @@ static void init_parameter(void) { | |||
| TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R; | |||
| #endif | |||
| #if defined(USE_GEMM3M) | |||
| #if (USE_GEMM3M) | |||
| #ifdef CGEMM3M_DEFAULT_P | |||
| TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; | |||
| #else | |||
| @@ -792,8 +932,8 @@ static void init_parameter(void) { | |||
| #endif | |||
| } | |||
| #else // defined(ARCH_ARM64) | |||
| #if defined(ARCH_POWER) | |||
| #else // (ARCH_ARM64) | |||
| #if (ARCH_POWER) | |||
| static void init_parameter(void) { | |||
| #ifdef BUILD_HALF | |||
| @@ -823,7 +963,7 @@ static void init_parameter(void) { | |||
| } | |||
| #else //POWER | |||
| #if defined(ARCH_ZARCH) | |||
| #if (ARCH_ZARCH) | |||
| static void init_parameter(void) { | |||
| #ifdef BUILD_HALF | |||
| TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||
| @@ -989,22 +1129,34 @@ static void init_parameter(void) { | |||
| TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||
| TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
| TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_COMPLEX == 1 | |||
| TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if BUILD_COMPLEX == 1 | |||
| #ifdef CGEMM3M_DEFAULT_Q | |||
| TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; | |||
| #else | |||
| TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q; | |||
| #endif | |||
| #endif | |||
| #if BUILD_COMPLEX16 == 1 | |||
| #ifdef ZGEMM3M_DEFAULT_Q | |||
| TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; | |||
| #else | |||
| TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q; | |||
| #endif | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; | |||
| @@ -1012,16 +1164,24 @@ static void init_parameter(void) { | |||
| TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q; | |||
| #endif | |||
| #if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) | |||
| #if (CORE_KATMAI) || (CORE_COPPERMINE) || (CORE_BANIAS) || (CORE_YONAH) || (CORE_ATHLON) | |||
| #ifdef DEBUG | |||
| fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 64 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_DOUBLE == 1 | |||
| TABLE_NAME.dgemm_p = 32 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = 32 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = 16 * (l2 >> 7); | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 16 * (l2 >> 7); | |||
| TABLE_NAME.xgemm_p = 8 * (l2 >> 7); | |||
| @@ -1034,10 +1194,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Northwood\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 96 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_DOUBLE == 1 | |||
| TABLE_NAME.dgemm_p = 48 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = 48 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = 24 * (l2 >> 7); | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 24 * (l2 >> 7); | |||
| TABLE_NAME.xgemm_p = 12 * (l2 >> 7); | |||
| @@ -1050,10 +1218,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Atom\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 256; | |||
| #endif | |||
| #if BUILD_DOUBLE ==1 | |||
| TABLE_NAME.dgemm_p = 128; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = 128; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = 64; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 64; | |||
| TABLE_NAME.xgemm_p = 32; | |||
| @@ -1066,10 +1242,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Prescott\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 56 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_DOUBLE ==1 | |||
| TABLE_NAME.dgemm_p = 28 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = 28 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX16 == 1 | |||
| TABLE_NAME.zgemm_p = 14 * (l2 >> 7); | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 14 * (l2 >> 7); | |||
| TABLE_NAME.xgemm_p = 7 * (l2 >> 7); | |||
| @@ -1082,10 +1266,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Core2\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; | |||
| #endif | |||
| #if BUILD_DOUBLE==1 | |||
| TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; | |||
| TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; | |||
| @@ -1098,10 +1290,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Penryn\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; | |||
| #endif | |||
| #if BUILD_DOUBLE == 1 | |||
| TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; | |||
| TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; | |||
| @@ -1114,10 +1314,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Dunnington\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; | |||
| #endif | |||
| #if BUILD_DOUBLE ==1 | |||
| TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; | |||
| TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; | |||
| @@ -1131,10 +1339,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Nehalem\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1147,10 +1363,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Sandybridge\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1163,26 +1387,42 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Haswell\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| #endif | |||
| #endif | |||
| #if defined (SKYLAKEX) || defined (COOPERLAKE) | |||
| #if defined(SKYLAKEX) || defined(COOPERLAKE) | |||
| #ifdef DEBUG | |||
| fprintf(stderr, "SkylakeX\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1196,10 +1436,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Opteron\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7); | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7); | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7); | |||
| TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7); | |||
| @@ -1212,10 +1460,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Barcelona\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1228,10 +1484,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Bobcate\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1244,10 +1508,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Bulldozer\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1260,10 +1532,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Excavator\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1277,10 +1557,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Piledriver\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1293,10 +1581,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Steamroller\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1309,10 +1605,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "Zen\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_DOUBLE | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if BUILD_COMPLEX16 | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; | |||
| TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; | |||
| @@ -1326,11 +1630,18 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "NANO\n"); | |||
| #endif | |||
| #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
| TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
| #endif | |||
| #if (BUILD_DOUBLE==1) | |||
| TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
| #endif | |||
| #if (BUILD_COMPLEX==1) | |||
| TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
| #endif | |||
| #if (BUILD_COMPLEX16==1) | |||
| TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| @@ -1340,41 +1651,55 @@ static void init_parameter(void) { | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| #ifdef CGEMM3M_DEFAULT_P | |||
| TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; | |||
| #else | |||
| TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; | |||
| #endif | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| #ifdef ZGEMM3M_DEFAULT_P | |||
| TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; | |||
| #else | |||
| TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; | |||
| #endif | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; | |||
| #endif | |||
| #if BUILD_SINGLE == 1 | |||
| TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; | |||
| #endif | |||
| #if BUILD_DOUBLE== 1 | |||
| TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M; | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M; | |||
| #endif | |||
| #if BUILD_COMPLEX==1 | |||
| #ifdef CGEMM3M_DEFAULT_UNROLL_M | |||
| TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M; | |||
| #else | |||
| TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; | |||
| #endif | |||
| #endif | |||
| #if BUILD_COMPLEX16==1 | |||
| #ifdef ZGEMM3M_DEFAULT_UNROLL_M | |||
| TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M; | |||
| #else | |||
| TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; | |||
| #endif | |||
| #endif | |||
| #ifdef QUAD_PRECISION | |||
| TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M; | |||
| @@ -1386,15 +1711,19 @@ static void init_parameter(void) { | |||
| fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); | |||
| #endif | |||
| #if BUILD_SINGLE==1 | |||
| TABLE_NAME.sgemm_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA | |||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||
| ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15); | |||
| #endif | |||
| #if BUILD_DOUBLE==1 | |||
| TABLE_NAME.dgemm_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA | |||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||
| ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15); | |||
| #endif | |||
| #ifdef EXPRECISION | |||
| TABLE_NAME.qgemm_r = (((BUFFER_SIZE - | |||
| @@ -1403,26 +1732,33 @@ static void init_parameter(void) { | |||
| ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15); | |||
| #endif | |||
| #if BUILD_COMPLEX ==1 | |||
| TABLE_NAME.cgemm_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA | |||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||
| ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15); | |||
| #endif | |||
| #if BUILD_COMPLEX16 ==1 | |||
| TABLE_NAME.zgemm_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA | |||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||
| ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15); | |||
| #endif | |||
| #if BUILD_COMPLEX == 1 | |||
| TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA | |||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||
| ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15); | |||
| #endif | |||
| #if BUILD_COMPLEX16 == 1 | |||
| TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE - | |||
| ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA | |||
| + TABLE_NAME.align) & ~TABLE_NAME.align) | |||
| ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15); | |||
| #endif | |||
| @@ -1444,4 +1780,4 @@ static void init_parameter(void) { | |||
| } | |||
| #endif //POWER | |||
| #endif //ZARCH | |||
| #endif //defined(ARCH_ARM64) | |||
| #endif //(ARCH_ARM64) | |||
| @@ -259,8 +259,12 @@ SNRM2KERNEL = nrm2_sse.S | |||
| endif | |||
| ifndef DNRM2KERNEL | |||
| ifeq ($(OSNAME),WINNT) | |||
| DNRM2KERNEL = ../arm/nrm2.c | |||
| else | |||
| DNRM2KERNEL = nrm2.S | |||
| endif | |||
| endif | |||
| ifndef QNRM2KERNEL | |||
| QNRM2KERNEL = nrm2.S | |||
| @@ -271,8 +275,12 @@ CNRM2KERNEL = znrm2_sse.S | |||
| endif | |||
| ifndef ZNRM2KERNEL | |||
| ifeq ($(OSNAME),WINNT) | |||
| ZNRM2KERNEL = ../arm/znrm2.c | |||
| else | |||
| ZNRM2KERNEL = znrm2.S | |||
| endif | |||
| endif | |||
| ifndef XNRM2KERNEL | |||
| XNRM2KERNEL = znrm2.S | |||
| @@ -46,6 +46,7 @@ OBJ = \ | |||
| lapacke_ilaver.o \ | |||
| lapacke_nancheck.o | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OBJ_C = \ | |||
| lapacke_cbbcsd.o \ | |||
| lapacke_cbbcsd_work.o \ | |||
| @@ -653,7 +654,9 @@ lapacke_cupgtr.o \ | |||
| lapacke_cupgtr_work.o \ | |||
| lapacke_cupmtr.o \ | |||
| lapacke_cupmtr_work.o | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OBJ_D = \ | |||
| lapacke_dbbcsd.o \ | |||
| lapacke_dbbcsd_work.o \ | |||
| @@ -1218,8 +1221,12 @@ lapacke_dtrttf_work.o \ | |||
| lapacke_dtrttp.o \ | |||
| lapacke_dtrttp_work.o \ | |||
| lapacke_dtzrzf.o \ | |||
| lapacke_dtzrzf_work.o | |||
| lapacke_dtzrzf_work.o \ | |||
| lapacke_slag2d.o \ | |||
| lapacke_slag2d_work.o | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OBJ_S = \ | |||
| lapacke_sbbcsd.o \ | |||
| lapacke_sbbcsd_work.o \ | |||
| @@ -1395,8 +1402,6 @@ lapacke_slacn2.o \ | |||
| lapacke_slacn2_work.o \ | |||
| lapacke_slacpy.o \ | |||
| lapacke_slacpy_work.o \ | |||
| lapacke_slag2d.o \ | |||
| lapacke_slag2d_work.o \ | |||
| lapacke_slamch.o \ | |||
| lapacke_slamch_work.o \ | |||
| lapacke_slange.o \ | |||
| @@ -1781,7 +1786,9 @@ lapacke_strttp.o \ | |||
| lapacke_strttp_work.o \ | |||
| lapacke_stzrzf.o \ | |||
| lapacke_stzrzf_work.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OBJ_Z = \ | |||
| lapacke_zbbcsd.o \ | |||
| lapacke_zbbcsd_work.o \ | |||
| @@ -2393,35 +2400,52 @@ lapacke_zupgtr.o \ | |||
| lapacke_zupgtr_work.o \ | |||
| lapacke_zupmtr.o \ | |||
| lapacke_zupmtr_work.o | |||
| endif | |||
| ifdef BUILD_DEPRECATED | |||
| DEPRECATED = \ | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| DEPRECATEDC = \ | |||
| lapacke_cggsvp.o \ | |||
| lapacke_cggsvp_work.o \ | |||
| lapacke_dggsvp.o \ | |||
| lapacke_dggsvp_work.o \ | |||
| lapacke_sggsvp.o \ | |||
| lapacke_sggsvp_work.o \ | |||
| lapacke_zggsvp.o \ | |||
| lapacke_zggsvp_work.o \ | |||
| lapacke_cggsvd.o \ | |||
| lapacke_cggsvd_work.o \ | |||
| lapacke_cgeqpf.o \ | |||
| lapacke_cgeqpf_work.o | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DEPRECATEDD = \ | |||
| lapacke_dggsvp.o \ | |||
| lapacke_dggsvp_work.o \ | |||
| lapacke_dggsvd.o \ | |||
| lapacke_dggsvd_work.o \ | |||
| lapacke_dgeqpf.o \ | |||
| lapacke_dgeqpf_work.o | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| DEPRECATEDS = \ | |||
| lapacke_sggsvp.o \ | |||
| lapacke_sggsvp_work.o \ | |||
| lapacke_sggsvd.o \ | |||
| lapacke_sggsvd_work.o \ | |||
| lapacke_sgeqpf.o \ | |||
| lapacke_sgeqpf_work.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| DEPRECATEDZ = \ | |||
| lapacke_zggsvp.o \ | |||
| lapacke_zggsvp_work.o \ | |||
| lapacke_zggsvd.o \ | |||
| lapacke_zggsvd_work.o \ | |||
| lapacke_cgeqpf.o \ | |||
| lapacke_cgeqpf_work.o \ | |||
| lapacke_dgeqpf.o \ | |||
| lapacke_dgeqpf_work.o \ | |||
| lapacke_sgeqpf.o \ | |||
| lapacke_sgeqpf_work.o \ | |||
| lapacke_zgeqpf.o \ | |||
| lapacke_zgeqpf_work.o | |||
| endif | |||
| DEPRECATED = $(DEPRECATEDS) $(DEPRECATEDD) $(DEPRECATEDC) $(DEPRECATEDZ) | |||
| endif | |||
| ifdef USEXBLAS | |||
| EXTENDED = \ | |||
| lapacke_cgbrfsx.o lapacke_cporfsx.o lapacke_dgerfsx.o lapacke_sgbrfsx.o lapacke_ssyrfsx.o lapacke_zherfsx.o \ | |||
| @@ -2440,37 +2464,50 @@ endif | |||
| ifdef LAPACKE_WITH_TMG | |||
| # FILE PARTS OF TMGLIB | |||
| MATGEN = \ | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| MATGENC = \ | |||
| lapacke_clatms.o \ | |||
| lapacke_clatms_work.o \ | |||
| lapacke_dlatms.o \ | |||
| lapacke_dlatms_work.o \ | |||
| lapacke_slatms.o \ | |||
| lapacke_slatms_work.o \ | |||
| lapacke_zlatms.o \ | |||
| lapacke_zlatms_work.o \ | |||
| lapacke_clagge.o \ | |||
| lapacke_clagge_work.o \ | |||
| lapacke_claghe.o \ | |||
| lapacke_claghe_work.o \ | |||
| lapacke_clagsy.o \ | |||
| lapacke_clagsy_work.o | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| MATGEND = \ | |||
| lapacke_dlatms.o \ | |||
| lapacke_dlatms_work.o \ | |||
| lapacke_dlagge.o \ | |||
| lapacke_dlagge_work.o \ | |||
| lapacke_dlagsy.o \ | |||
| lapacke_dlagsy_work.o | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| MATGENS = \ | |||
| lapacke_slatms.o \ | |||
| lapacke_slatms_work.o \ | |||
| lapacke_slagge.o \ | |||
| lapacke_slagge_work.o \ | |||
| lapacke_slagsy.o \ | |||
| lapacke_slagsy_work.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| MATGENZ = \ | |||
| lapacke_zlatms.o \ | |||
| lapacke_zlatms_work.o \ | |||
| lapacke_zlagge.o \ | |||
| lapacke_zlagge_work.o \ | |||
| lapacke_claghe.o \ | |||
| lapacke_claghe_work.o \ | |||
| lapacke_zlaghe.o \ | |||
| lapacke_zlaghe_work.o \ | |||
| lapacke_clagsy.o \ | |||
| lapacke_clagsy_work.o \ | |||
| lapacke_dlagsy.o \ | |||
| lapacke_dlagsy_work.o \ | |||
| lapacke_slagsy.o \ | |||
| lapacke_slagsy_work.o \ | |||
| lapacke_zlagsy.o \ | |||
| lapacke_zlagsy_work.o | |||
| endif | |||
| MATGEN = $(MATGENS) $(MATGEND) $(MATGENC) $(MATGENZ) | |||
| endif | |||
| .PHONY: all | |||
| all: $(LAPACKELIB) | |||
| @@ -66,7 +66,9 @@ ALLAUX_O = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \ | |||
| ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ | |||
| ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" | |||
| SCLAUX = \ | |||
| sbdsvdx.o sstevx.o sstein.o \ | |||
| sbdsdc.o \ | |||
| sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \ | |||
| slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \ | |||
| @@ -81,10 +83,14 @@ SCLAUX = \ | |||
| slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \ | |||
| slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \ | |||
| ssteqr.o ssterf.o slaisnan.o sisnan.o \ | |||
| slartgp.o slartgs.o \ | |||
| slartgp.o slartgs.o scombssq.o \ | |||
| ../INSTALL/second_$(TIMER).o | |||
| endif | |||
| ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
| DZLAUX = \ | |||
| dcombssq.o \ | |||
| dbdsvdx.o dstevx.o dstein.o \ | |||
| dbdsdc.o \ | |||
| dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \ | |||
| dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \ | |||
| @@ -101,9 +107,12 @@ DZLAUX = \ | |||
| dsteqr.o dsterf.o dlaisnan.o disnan.o \ | |||
| dlartgp.o dlartgs.o \ | |||
| ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o | |||
| endif | |||
| #ifeq ($(BUILD_SINGLE),1) | |||
| ifdef BUILD_SINGLE | |||
| SLASRC_O = \ | |||
| sbdsvdx.o spotrf2.o sgetrf2.o \ | |||
| spotrf2.o sgetrf2.o \ | |||
| sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \ | |||
| sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \ | |||
| sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \ | |||
| @@ -145,8 +154,7 @@ SLASRC_O = \ | |||
| ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \ | |||
| ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \ | |||
| sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \ | |||
| ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \ | |||
| sstevx.o \ | |||
| ssptrf.o ssptri.o ssptrs.o sstegr.o sstev.o sstevd.o sstevr.o \ | |||
| ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \ | |||
| ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \ | |||
| ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \ | |||
| @@ -180,9 +188,13 @@ SLASRC_O = \ | |||
| ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ | |||
| ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ | |||
| ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \ | |||
| sgesvdq.o scombssq.o | |||
| sgesvdq.o | |||
| endif | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| DSLASRC_O = spotrs.o sgetrs.o spotrf.o sgetrf.o | |||
| endif | |||
| ifdef USEXBLAS | |||
| SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ | |||
| @@ -194,6 +206,7 @@ SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ | |||
| slascl2.o sla_wwaddw.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CLASRC_O = \ | |||
| cpotrf2.o cgetrf2.o \ | |||
| cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \ | |||
| @@ -284,6 +297,7 @@ CLASRC_O = \ | |||
| cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ | |||
| chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \ | |||
| cgesvdq.o | |||
| endif | |||
| ifdef USEXBLAS | |||
| CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ | |||
| @@ -299,11 +313,13 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ | |||
| cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o | |||
| endif | |||
| ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o clag2z.o | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DLASRC_O = \ | |||
| dpotrf2.o dgetrf2.o \ | |||
| dbdsvdx.o \ | |||
| dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \ | |||
| dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ | |||
| dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ | |||
| @@ -345,8 +361,7 @@ DLASRC_O = \ | |||
| dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \ | |||
| dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \ | |||
| dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \ | |||
| dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \ | |||
| dstevx.o \ | |||
| dsptrf.o dsptri.o dsptrs.o dstegr.o dstev.o dstevd.o dstevr.o \ | |||
| dsycon.o dsyev.o dsyevd.o dsyevr.o \ | |||
| dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \ | |||
| dsysv.o dsysvx.o \ | |||
| @@ -381,7 +396,8 @@ DLASRC_O = \ | |||
| dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ | |||
| dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ | |||
| dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \ | |||
| dgesvdq.o dcombssq.o | |||
| dgesvdq.o | |||
| endif | |||
| ifdef USEXBLAS | |||
| DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ | |||
| @@ -393,6 +409,7 @@ DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ | |||
| dlascl2.o dla_wwaddw.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZLASRC_O = \ | |||
| zpotrf2.o zgetrf2.o \ | |||
| zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \ | |||
| @@ -471,7 +488,7 @@ ZLASRC_O = \ | |||
| zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ | |||
| zunmtr.o zupgtr.o \ | |||
| zupmtr.o izmax1.o dzsum1.o zstemr.o \ | |||
| zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \ | |||
| zcgesv.o zcposv.o zlag2c.o zlat2c.o \ | |||
| zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \ | |||
| ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \ | |||
| zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \ | |||
| @@ -488,6 +505,7 @@ ZLASRC_O = \ | |||
| zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ | |||
| zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \ | |||
| zgesvdq.o | |||
| endif | |||
| ifdef USEXBLAS | |||
| ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ | |||
| @@ -501,18 +519,30 @@ ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ | |||
| zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o | |||
| endif | |||
| DEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CDEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ | |||
| DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \ | |||
| DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o \ | |||
| DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DDEPRECSRC = \ | |||
| DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \ | |||
| DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \ | |||
| DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o \ | |||
| DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SDEPRECSRC = \ | |||
| DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \ | |||
| DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \ | |||
| DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o \ | |||
| DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZDEPRECSRC = \ | |||
| DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \ | |||
| DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \ | |||
| DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o | |||
| endif | |||
| # filter out optimized codes from OpenBLAS | |||
| ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o | |||
| @@ -560,7 +590,7 @@ ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) | |||
| endif | |||
| ifdef BUILD_DEPRECATED | |||
| DEPRECATED = $(DEPRECSRC) | |||
| DEPRECATED = $(SDEPRECSRC) $(DDEPRECSRC) $(CDEPRECSRC) $(ZDEPRECSRC) | |||
| endif | |||
| .PHONY: all | |||
| @@ -33,25 +33,37 @@ | |||
| TOPSRCDIR = ../.. | |||
| include $(TOPSRCDIR)/make.inc | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" | |||
| SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \ | |||
| slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \ | |||
| slatm3.o slatm5.o slatm6.o slahilb.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \ | |||
| clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \ | |||
| clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o | |||
| endif | |||
| ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" | |||
| DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \ | |||
| dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \ | |||
| dlatm3.o dlatm5.o dlatm6.o dlahilb.o | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \ | |||
| zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \ | |||
| zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o | |||
| endif | |||
| .PHONY: all | |||
| all: $(TMGLIB) | |||
| @@ -97,5 +109,9 @@ cleanobj: | |||
| cleanlib: | |||
| rm -f $(TMGLIB) | |||
| ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) | |||
| slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
| endif | |||
| ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) | |||
| dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
| endif | |||
| @@ -1,11 +1,19 @@ | |||
| TOPDIR = ../.. | |||
| include ../../Makefile.system | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS = sgetf2_k.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS = dgetf2_k.$(SUFFIX) | |||
| endif | |||
| QBLASOBJS = qgetf2_k.$(SUFFIX) | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS = cgetf2_k.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS = zgetf2_k.$(SUFFIX) | |||
| endif | |||
| XBLASOBJS = xgetf2_k.$(SUFFIX) | |||
| sgetf2_k.$(SUFFIX) : getf2_k.c | |||
| @@ -17,6 +17,19 @@ ZBLASOBJS += zgetrf_parallel.$(SUFFIX) | |||
| XBLASOBJS += xgetrf_parallel.$(SUFFIX) | |||
| endif | |||
| ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| endif | |||
| ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| ifeq ($(USE_OPENMP), 1) | |||
| GETRF_SRC = getrf_parallel_omp.c | |||
| else | |||
| @@ -17,6 +17,19 @@ ZBLASOBJS += zgetrs_N_parallel.$(SUFFIX) zgetrs_T_parallel.$(SUFFIX) zgetrs_R_pa | |||
| XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX) | |||
| endif | |||
| ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| endif | |||
| ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| sgetrs_N_single.$(SUFFIX) : getrs_single.c | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANS $< -o $(@F) | |||
| @@ -1,11 +1,19 @@ | |||
| TOPDIR = ../.. | |||
| include ../../Makefile.system | |||
| ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS = slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS = dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) | |||
| endif | |||
| QBLASOBJS = qlaswp_plus.$(SUFFIX) qlaswp_minus.$(SUFFIX) | |||
| ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS = claswp_plus.$(SUFFIX) claswp_minus.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS = zlaswp_plus.$(SUFFIX) zlaswp_minus.$(SUFFIX) | |||
| endif | |||
| XBLASOBJS = xlaswp_plus.$(SUFFIX) xlaswp_minus.$(SUFFIX) | |||
| slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) \ | |||
| @@ -1,11 +1,19 @@ | |||
| TOPDIR = ../.. | |||
| include ../../Makefile.system | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS = slauu2_U.$(SUFFIX) slauu2_L.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS = dlauu2_U.$(SUFFIX) dlauu2_L.$(SUFFIX) | |||
| endif | |||
| QBLASOBJS = qlauu2_U.$(SUFFIX) qlauu2_L.$(SUFFIX) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS = clauu2_U.$(SUFFIX) clauu2_L.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS = zlauu2_U.$(SUFFIX) zlauu2_L.$(SUFFIX) | |||
| endif | |||
| XBLASOBJS = xlauu2_U.$(SUFFIX) xlauu2_L.$(SUFFIX) | |||
| slauu2_U.$(SUFFIX) : lauu2_U.c | |||
| @@ -17,6 +17,19 @@ ZBLASOBJS += zlauum_U_parallel.$(SUFFIX) zlauum_L_parallel.$(SUFFIX) | |||
| XBLASOBJS += xlauum_U_parallel.$(SUFFIX) xlauum_L_parallel.$(SUFFIX) | |||
| endif | |||
| ifneq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| slauum_U_single.$(SUFFIX) : lauum_U_single.c | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) | |||
| @@ -8,6 +8,19 @@ CBLASOBJS = cpotf2_U.$(SUFFIX) cpotf2_L.$(SUFFIX) | |||
| ZBLASOBJS = zpotf2_U.$(SUFFIX) zpotf2_L.$(SUFFIX) | |||
| XBLASOBJS = xpotf2_U.$(SUFFIX) xpotf2_L.$(SUFFIX) | |||
| ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| endif | |||
| ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| spotf2_U.$(SUFFIX) : potf2_U.c | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) | |||
| @@ -17,6 +17,20 @@ ZBLASOBJS += zpotrf_U_parallel.$(SUFFIX) zpotrf_L_parallel.$(SUFFIX) | |||
| XBLASOBJS += xpotrf_U_parallel.$(SUFFIX) xpotrf_L_parallel.$(SUFFIX) | |||
| endif | |||
| ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" | |||
| SBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| endif | |||
| ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" | |||
| CBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| spotrf_U_single.$(SUFFIX) : potrf_U_single.c | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) | |||
| @@ -1,11 +1,19 @@ | |||
| TOPDIR = ../.. | |||
| include ../../Makefile.system | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS = strti2_UU.$(SUFFIX) strti2_UN.$(SUFFIX) strti2_LU.$(SUFFIX) strti2_LN.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS = dtrti2_UU.$(SUFFIX) dtrti2_UN.$(SUFFIX) dtrti2_LU.$(SUFFIX) dtrti2_LN.$(SUFFIX) | |||
| endif | |||
| QBLASOBJS = qtrti2_UU.$(SUFFIX) qtrti2_UN.$(SUFFIX) qtrti2_LU.$(SUFFIX) qtrti2_LN.$(SUFFIX) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS = ctrti2_UU.$(SUFFIX) ctrti2_UN.$(SUFFIX) ctrti2_LU.$(SUFFIX) ctrti2_LN.$(SUFFIX) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS = ztrti2_UU.$(SUFFIX) ztrti2_UN.$(SUFFIX) ztrti2_LU.$(SUFFIX) ztrti2_LN.$(SUFFIX) | |||
| endif | |||
| XBLASOBJS = xtrti2_UU.$(SUFFIX) xtrti2_UN.$(SUFFIX) xtrti2_LU.$(SUFFIX) xtrti2_LN.$(SUFFIX) | |||
| strti2_UU.$(SUFFIX) : trti2_U.c | |||
| @@ -23,6 +23,19 @@ ZBLASOBJS += ztrtri_UU_parallel.$(SUFFIX) ztrtri_UN_parallel.$(SUFFIX) ztrtri_LU | |||
| XBLASOBJS += xtrtri_UU_parallel.$(SUFFIX) xtrtri_UN_parallel.$(SUFFIX) xtrtri_LU_parallel.$(SUFFIX) xtrtri_LN_parallel.$(SUFFIX) | |||
| endif | |||
| ifneq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| strtri_UU_single.$(SUFFIX) : trtri_U_single.c | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUNIT $< -o $(@F) | |||
| @@ -17,6 +17,19 @@ ZBLASOBJS += ztrtrs_UNU_parallel.$(SUFFIX) ztrtrs_UNN_parallel.$(SUFFIX) ztrtrs_ | |||
| XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX) | |||
| endif | |||
| ifneq ($(BUILD_SINGLE),1) | |||
| SBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_DOUBLE),1) | |||
| DBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX),1) | |||
| CBLASOBJS= | |||
| endif | |||
| ifneq ($(BUILD_COMPLEX16),1) | |||
| ZBLASOBJS= | |||
| endif | |||
| strtrs_UNU_single.$(SUFFIX) : trtrs_single.c | |||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) | |||
| @@ -4,7 +4,7 @@ include_directories(${PROJECT_BINARY_DIR}) | |||
| enable_language(Fortran) | |||
| if (BUILD_SINGLE) | |||
| list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3) | |||
| list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3) | |||
| endif() | |||
| if (BUILD_DOUBLE) | |||
| list (APPEND OpenBLAS_Tests dblat1 dblat2 dblat3) | |||
| @@ -17,7 +17,7 @@ if (BUILD_COMPLEX16) | |||
| endif() | |||
| foreach(test_bin ${OpenBLAS_Tests}) | |||
| add_executable(${test_bin} ${test_bin}.f) | |||
| add_executable(${test_bin} ${test_bin}.f) | |||
| target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}) | |||
| endforeach() | |||
| @@ -34,7 +34,19 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh | |||
| "fi\n" | |||
| ) | |||
| set(float_types s d c z) | |||
| #set(float_types s d c z) | |||
| if (BUILD_SINGLE) | |||
| list (APPEND float_types s) | |||
| endif() | |||
| if (BUILD_DOUBLE) | |||
| list (APPEND float_types d) | |||
| endif() | |||
| if (BUILD_COMPLEX) | |||
| list (APPEND float_types c) | |||
| endif() | |||
| if (BUILD_COMPLEX16) | |||
| list (APPEND float_types z) | |||
| endif() | |||
| foreach(float_type ${float_types}) | |||
| string(TOUPPER ${float_type} float_type_upper) | |||
| add_test(NAME "${float_type}blas1" | |||
| @@ -7,82 +7,241 @@ all :: | |||
| else | |||
| all :: level1 level2 level3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) | |||
| level1: sblat1 dblat1 cblat1 zblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) | |||
| level1: dblat1 cblat1 zblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) | |||
| level1: sblat1 cblat1 zblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) | |||
| level1: cblat1 zblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) | |||
| level1: cblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) | |||
| level1: zblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) | |||
| level1: sblat1 zblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) | |||
| level1: sblat1 dblat1 zblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) | |||
| level1: sblat1 dblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) | |||
| level1: sblat1 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) | |||
| level1: dblat1 | |||
| endif | |||
| level1 : sblat1 dblat1 cblat1 zblat1 | |||
| ifndef CROSS | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1 | |||
| endif | |||
| ifdef SMP | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./sblat1 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OMP_NUM_THREADS=2 ./dblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OMP_NUM_THREADS=2 ./cblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OMP_NUM_THREADS=2 ./zblat1 | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./sblat1 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./dblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=2 ./cblat1 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=2 ./zblat1 | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| #level2: sblat2 dblat2 cblat2 zblat2 | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) | |||
| level2: sblat2 dblat2 cblat2 zblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) | |||
| level2: dblat2 cblat2 zblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) | |||
| level2: sblat2 cblat2 zblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) | |||
| level2: cblat2 zblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) | |||
| level2: cblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) | |||
| level2: zblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) | |||
| level2: sblat2 zblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) | |||
| level2: sblat2 dblat2 zblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) | |||
| level2: sblat2 dblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) | |||
| level2: sblat2 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) | |||
| level2: dblat2 | |||
| endif | |||
| level2 : sblat2 dblat2 cblat2 zblat2 | |||
| ifndef CROSS | |||
| rm -f ?BLAT2.SUMM | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat | |||
| @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat | |||
| @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat | |||
| @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat | |||
| @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifdef SMP | |||
| rm -f ?BLAT2.SUMM | |||
| ifeq ($(USE_OPENMP), 1) | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat | |||
| @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat | |||
| @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat | |||
| @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat | |||
| @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat | |||
| @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat | |||
| @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=2 ./cblat2 < ./cblat2.dat | |||
| @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat | |||
| @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_HALF),1) | |||
| level3 : test_shgemm sblat3 dblat3 cblat3 zblat3 | |||
| else | |||
| level3 : sblat3 dblat3 cblat3 zblat3 | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) | |||
| level3: sblat3 dblat3 cblat3 zblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) | |||
| level3: dblat3 cblat3 zblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) | |||
| level3: sblat3 cblat3 zblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) | |||
| level3: cblat3 zblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) | |||
| level3: cblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) | |||
| level3: zblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) | |||
| level3: sblat3 zblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) | |||
| level3: sblat3 dblat3 zblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) | |||
| level3: sblat3 dblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) | |||
| level3: sblat3 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) | |||
| level3: dblat3 | |||
| endif | |||
| #ifeq ($(BUILD_HALF),1) | |||
| #level3 : test_shgemm sblat3 dblat3 cblat3 zblat3 | |||
| #else | |||
| #level3 : sblat3 dblat3 cblat3 zblat3 | |||
| #endif | |||
| ifndef CROSS | |||
| rm -f ?BLAT3.SUMM | |||
| ifeq ($(BUILD_HALF),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM | |||
| @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat | |||
| @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat | |||
| @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat | |||
| @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat | |||
| @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifdef SMP | |||
| rm -f ?BLAT3.SUMM | |||
| ifeq ($(USE_OPENMP), 1) | |||
| @@ -90,30 +249,46 @@ ifeq ($(BUILD_HALF),1) | |||
| OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM | |||
| @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat | |||
| @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat | |||
| @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat | |||
| @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat | |||
| @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 | |||
| endif | |||
| else | |||
| ifeq ($(BUILD_HALF),1) | |||
| OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM | |||
| @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat | |||
| @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat | |||
| @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| OPENBLAS_NUM_THREADS=2 ./cblat3 < ./cblat3.dat | |||
| @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat | |||
| @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 | |||
| endif | |||
| endif | |||
| endif | |||
| endif | |||
| level3_3m : zblat3_3m cblat3_3m | |||
| @@ -151,56 +326,71 @@ endif | |||
| endif | |||
| endif | |||
| ifeq ($(BUILD_SINGLE),1) | |||
| sblat1 : sblat1.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o sblat1 sblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_DOUBLE),1) | |||
| dblat1 : dblat1.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o dblat1 dblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| else | |||
| dblat2: | |||
| dblat3: | |||
| endif | |||
| qblat1 : qblat1.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o qblat1 qblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| cblat1 : cblat1.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o cblat1 cblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| cblat2 : cblat2.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o cblat2 cblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| zblat2 : zblat2.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o zblat2 zblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_HALF),1) | |||
| test_shgemm : compare_sgemm_shgemm.c ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| ifeq ($(BUILD_COMPLEX),1) | |||
| cblat3_3m : cblat3_3m.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o cblat3_3m cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| ifeq ($(BUILD_COMPLEX16),1) | |||
| zblat3_3m : zblat3_3m.$(SUFFIX) ../$(LIBNAME) | |||
| $(FC) $(FLDFLAGS) -o zblat3_3m zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) | |||
| endif | |||
| @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| #include "openblas_utest.h" | |||
| #if defined(BUILD_SINGLE) && defined(BUILD_DOUBLE) | |||
| CTEST(dsdot,dsdot_n_1) | |||
| { | |||
| float x= 0.172555164F; | |||
| @@ -47,17 +47,4 @@ CTEST(dsdot,dsdot_n_1) | |||
| ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS); | |||
| } | |||
| CTEST(dsdot,dsdot_n_2) | |||
| { | |||
| float x[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F}; | |||
| float y[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F}; | |||
| blasint incx=1; | |||
| blasint incy=1; | |||
| blasint n=8; | |||
| double res1=0.0f, res2= 2.0400000444054616; | |||
| res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); | |||
| ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS); | |||
| } | |||
| #endif | |||
| @@ -48,6 +48,7 @@ void* xmalloc(size_t n) | |||
| } | |||
| } | |||
| #ifdef BUILD_DOUBLE | |||
| void check_dgemm(double *a, double *b, double *result, double *expected, blasint n) | |||
| { | |||
| char trans1 = 'T'; | |||
| @@ -59,9 +60,13 @@ void check_dgemm(double *a, double *b, double *result, double *expected, blasint | |||
| ASSERT_DBL_NEAR_TOL(expected[i], result[i], DOUBLE_EPS); | |||
| } | |||
| } | |||
| #endif | |||
| CTEST(fork, safety) | |||
| { | |||
| #ifndef BUILD_DOUBLE | |||
| exit(0); | |||
| #else | |||
| blasint n = 1000; | |||
| int i; | |||
| @@ -124,4 +129,5 @@ CTEST(fork, safety) | |||
| ASSERT_EQUAL(wait_pid, fork_pid); | |||
| ASSERT_EQUAL(0, WEXITSTATUS (child_status)); | |||
| } | |||
| #endif | |||
| } | |||
| @@ -529,16 +529,20 @@ CTEST(potrf, smoketest_trivial){ | |||
| for (j = 0; j < n; ++j) { | |||
| double err; | |||
| #ifdef BUILD_SINGLE | |||
| err = fabs(A1s[i+n*j] - Bs[i+n*j]); | |||
| if (err > 1e-5) { | |||
| CTEST_ERR("%s:%d %c s(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
| } | |||
| #endif | |||
| #ifdef BUILD_DOUBLE | |||
| err = fabs(A1d[i+n*j] - Bd[i+n*j]); | |||
| if (err > 1e-12) { | |||
| CTEST_ERR("%s:%d %c d(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
| } | |||
| #endif | |||
| #ifdef BUILD_COMPLEX | |||
| #ifdef OPENBLAS_COMPLEX_C99 | |||
| err = cabsf(A1c[i+n*j] - Bc[i+n*j]); | |||
| #else | |||
| @@ -548,7 +552,9 @@ CTEST(potrf, smoketest_trivial){ | |||
| if (err > 1e-5) { | |||
| CTEST_ERR("%s:%d %c c(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
| } | |||
| #endif | |||
| #ifdef BUILD_COMPLEX16 | |||
| #ifdef OPENBLAS_COMPLEX_C99 | |||
| err = cabs(A1z[i+n*j] - Bz[i+n*j]); | |||
| #else | |||
| @@ -558,6 +564,7 @@ CTEST(potrf, smoketest_trivial){ | |||
| if (err > 1e-12) { | |||
| CTEST_ERR("%s:%d %c z(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| } | |||