Browse Source

Merge pull request #2852 from martin-frbg/issue2588-cmake

Support building only a subset of variable types
tags/v0.3.11^2
Martin Kroeker GitHub 5 years ago
parent
commit
ec638a82bf
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 2779 additions and 954 deletions
  1. +12
    -0
      Makefile
  2. +17
    -5
      Makefile.system
  3. +2
    -2
      Makefile.tail
  4. +17
    -11
      cmake/lapack.cmake
  5. +98
    -10
      common_param.h
  6. +115
    -4
      ctest/Makefile
  7. +13
    -0
      driver/level2/CMakeLists.txt
  8. +80
    -2
      driver/level2/Makefile
  9. +36
    -1
      driver/level3/CMakeLists.txt
  10. +54
    -0
      driver/level3/Makefile
  11. +8
    -0
      driver/level3/syrk_thread.c
  12. +9
    -2
      driver/others/blas_server.c
  13. +13
    -1
      driver/others/blas_server_omp.c
  14. +11
    -5
      driver/others/memory.c
  15. +19
    -7
      exports/Makefile
  16. +907
    -739
      exports/gensymbol
  17. +31
    -0
      interface/CMakeLists.txt
  18. +48
    -6
      interface/Makefile
  19. +281
    -4
      kernel/CMakeLists.txt
  20. +64
    -6
      kernel/Makefile.L2
  21. +67
    -17
      kernel/Makefile.L3
  22. +375
    -39
      kernel/setparam-ref.c
  23. +68
    -31
      lapack-netlib/LAPACKE/src/Makefile
  24. +46
    -16
      lapack-netlib/SRC/Makefile
  25. +16
    -0
      lapack-netlib/TESTING/MATGEN/Makefile
  26. +8
    -0
      lapack/getf2/Makefile
  27. +13
    -0
      lapack/getrf/Makefile
  28. +13
    -0
      lapack/getrs/Makefile
  29. +8
    -0
      lapack/laswp/Makefile
  30. +8
    -0
      lapack/lauu2/Makefile
  31. +13
    -0
      lapack/lauum/Makefile
  32. +13
    -0
      lapack/potf2/Makefile
  33. +14
    -0
      lapack/potrf/Makefile
  34. +8
    -0
      lapack/trti2/Makefile
  35. +13
    -0
      lapack/trtri/Makefile
  36. +13
    -0
      lapack/trtrs/Makefile
  37. +15
    -3
      test/CMakeLists.txt
  38. +217
    -27
      test/Makefile
  39. +2
    -15
      utest/test_dsdot.c
  40. +6
    -0
      utest/test_fork.c
  41. +8
    -1
      utest/test_potrs.c

+ 12
- 0
Makefile View File

@@ -304,6 +304,18 @@ else
endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_SINGLE), 1)
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_DOUBLE), 1)
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX), 1)
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX16), 1)
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc


+ 17
- 5
Makefile.system View File

@@ -9,7 +9,7 @@ ifndef TOPDIR
TOPDIR = .
endif

# If ARCH is not set, we use the host system's architecture for getarch compile options.
# If ARCH is not set, we use the host system's architecture for getarch compile options.
ifndef ARCH
HOSTARCH := $(shell uname -m)
else
@@ -73,6 +73,18 @@ endif
#
# Beginning of system configuration
#
ifneq ($(BUILD_SINGLE),1)
ifneq ($(BUILD_DOUBLE),1)
ifneq ($(BUILD_COMPLEX),1)
ifneq ($(BUILD_COMPLEX16),1)
override BUILD_SINGLE=1
override BUILD_DOUBLE=1
override BUILD_COMPLEX=1
override BUILD_COMPLEX16=1
endif
endif
endif
endif

ifndef HOSTCC
HOSTCC = $(CC)
@@ -1224,16 +1236,16 @@ ifeq ($(BUILD_HALF), 1)
CCOMMON_OPT += -DBUILD_HALF
endif
ifeq ($(BUILD_SINGLE), 1)
CCOMMON_OPT += -DBUILD_SINGLE
CCOMMON_OPT += -DBUILD_SINGLE=1
endif
ifeq ($(BUILD_DOUBLE), 1)
CCOMMON_OPT += -DBUILD_DOUBLE
CCOMMON_OPT += -DBUILD_DOUBLE=1
endif
ifeq ($(BUILD_COMPLEX), 1)
CCOMMON_OPT += -DBUILD_COMPLEX
CCOMMON_OPT += -DBUILD_COMPLEX=1
endif
ifeq ($(BUILD_COMPLEX16), 1)
CCOMMON_OPT += -DBUILD_COMPLEX16
CCOMMON_OPT += -DBUILD_COMPLEX16=1
endif

CCOMMON_OPT += -DVERSION=\"$(VERSION)\"


+ 2
- 2
Makefile.tail View File

@@ -11,8 +11,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))

HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))

BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)

ifdef EXPRECISION
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)


+ 17
- 11
cmake/lapack.cmake View File

@@ -1,11 +1,12 @@
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f dlaset.f
../INSTALL/ilaver.f xerbla_array.f
../INSTALL/slamch.f)
set(SCLAUX
scombssq.f sbdsvdx.f sstevx.f sstein.f
sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
@@ -25,6 +26,7 @@ set(SCLAUX
set(DZLAUX
dbdsdc.f
dbdsvdx.f dstevx.f dstein.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
@@ -35,14 +37,14 @@ set(DZLAUX
dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
set(SLASRC
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgehd2.f sgehrd.f sgelq2.f sgelqf.f
@@ -83,8 +85,8 @@ set(SLASRC
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstev.f sstevd.f sstevr.f
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f
@@ -116,7 +118,7 @@ set(SLASRC
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f
sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
@@ -229,7 +231,7 @@ set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f)
set(DLASRC
dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgehd2.f dgehrd.f dgelq2.f dgelqf.f
@@ -270,8 +272,8 @@ set(DLASRC
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstev.f dstevd.f dstevr.f
dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f
@@ -474,12 +476,16 @@ endif()
if(BUILD_COMPLEX)
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN})
message(STATUS "Building Complex Precision")
message(STATUS "Building Single Precision Complex")
endif()
if(BUILD_COMPLEX16)
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN})
message(STATUS "Building Double Complex Precision")
# for zlange/zlanhe
if (NOT BUILD_DOUBLE)
set (LA_REL_SRC ${LA_REL_SRC} dcombssq.f)
endif ()
message(STATUS "Building Double Precision Complex")
endif()
# add lapack-netlib folder to the sources


+ 98
- 10
common_param.h View File

@@ -146,26 +146,34 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
int (*shlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);

#endif

#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
int sgemm_p, sgemm_q, sgemm_r;
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
#endif

int exclusive_cache;

#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
float (*samax_k) (BLASLONG, float *, BLASLONG);
float (*samin_k) (BLASLONG, float *, BLASLONG);
float (*smax_k) (BLASLONG, float *, BLASLONG);
float (*smin_k) (BLASLONG, float *, BLASLONG);

BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);

float (*snrm2_k) (BLASLONG, float *, BLASLONG);
float (*sasum_k) (BLASLONG, float *, BLASLONG);
#endif
#ifdef BUILD_SINGLE
float (*ssum_k) (BLASLONG, float *, BLASLONG);
#endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);

int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);

@@ -175,11 +183,15 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);

int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
#ifdef BUILD_SINGLE
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);

int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif

#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#ifdef ARCH_X86_64
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
@@ -193,7 +205,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);

#endif
#ifdef BUILD_SINGLE
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
@@ -245,10 +258,14 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);

int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif

#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int dgemm_p, dgemm_q, dgemm_r;
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
#endif

#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*damax_k) (BLASLONG, double *, BLASLONG);
double (*damin_k) (BLASLONG, double *, BLASLONG);
double (*dmax_k) (BLASLONG, double *, BLASLONG);
@@ -257,25 +274,37 @@ BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
#endif

#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
double (*dasum_k) (BLASLONG, double *, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
double (*dsum_k) (BLASLONG, double *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
#endif
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE)
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);

int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);

int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif
#ifdef BUILD_DOUBLE
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);

int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);

#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);

@@ -283,7 +312,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemm_itcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);

#endif
#ifdef BUILD_DOUBLE
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
@@ -335,7 +365,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);

int (*dneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
#endif
#ifdef EXPRECISION

int qgemm_p, qgemm_q, qgemm_r;
@@ -430,6 +460,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);

#endif

#ifdef BUILD_COMPLEX
int cgemm_p, cgemm_q, cgemm_r;
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;

@@ -593,7 +624,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);

int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif

#ifdef BUILD_COMPLEX16
int zgemm_p, zgemm_q, zgemm_r;
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;

@@ -757,6 +790,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);

int (*zneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*zlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
#endif

#ifdef EXPRECISION

@@ -930,22 +964,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
void (*init)(void);

int snum_opt, dnum_opt, qnum_opt;
#ifdef BUILD_SINGLE
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
#endif

#ifdef BUILD_SINGLE
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
#endif

#ifdef BUILD_DOUBLE
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
#endif

#ifdef BUILD_COMPLEX
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
@@ -955,7 +1001,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
#endif

#ifdef BUILD_COMPLEX16
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
@@ -965,17 +1013,23 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
#endif

#ifdef BUILD_SINGLE
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
#endif

#ifdef BUILD_DOUBLE
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
#endif

#ifdef BUILD_COMPLEX
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
@@ -985,7 +1039,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
#endif

#ifdef BUILD_COMPLEX16
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
@@ -995,12 +1051,20 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
#endif

#ifdef BUILD_SINGLE
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);

#endif
} gotoblas_t;

extern gotoblas_t *gotoblas;
@@ -1021,19 +1085,23 @@ extern gotoblas_t *gotoblas;
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
#endif

#if defined (BUILD_SINGLE)
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R gotoblas -> sgemm_r
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif

#if defined (BUILD_DOUBLE)
#define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R gotoblas -> dgemm_r
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif

#define QGEMM_P gotoblas -> qgemm_p
#define QGEMM_Q gotoblas -> qgemm_q
@@ -1042,19 +1110,39 @@ extern gotoblas_t *gotoblas;
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn

#ifdef BUILD_COMPLEX
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#ifndef BUILD_SINGLE
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R 1024
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
#endif

#ifdef BUILD_COMPLEX16
#define ZGEMM_P gotoblas -> zgemm_p
#define ZGEMM_Q gotoblas -> zgemm_q
#define ZGEMM_R gotoblas -> zgemm_r
#define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m
#define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn
#ifndef BUILD_DOUBLE
#define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R 1024
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif
#endif

#define XGEMM_P gotoblas -> xgemm_p
#define XGEMM_Q gotoblas -> xgemm_q


+ 115
- 4
ctest/Makefile View File

@@ -46,56 +46,155 @@ else
all :: all1 all2 all3
endif

all1: xscblat1 xdcblat1 xccblat1 xzcblat1
ifeq ($(BUILD_SINGLE),1)
all1targets += xscblat1
endif
ifeq ($(BUILD_DOUBLE),1)
all1targets += xdcblat1
endif
ifeq ($(BUILD_COMPLEX),1)
all1targets += xccblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
all1targets += xzcblat1
endif

all1: $(all1targets)

ifndef CROSS
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./xdcblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./xccblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat1
endif
else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./xscblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./xdcblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat1
endif
endif
endif

ifeq ($(BUILD_SINGLE),1)
all2targets += xscblat2
endif
ifeq ($(BUILD_DOUBLE),1)
all2targets += xdcblat2
endif
ifeq ($(BUILD_COMPLEX),1)
all2targets += xccblat2
endif
ifeq ($(BUILD_COMPLEX16),1)
all2targets += xzcblat2
endif

all2: $(all2targets)

all2: xscblat2 xdcblat2 xccblat2 xzcblat2
ifndef CROSS
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat2 < sin2
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./xdcblat2 < din2
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./xccblat2 < cin2
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat2 < zin2
endif
else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
endif
endif
endif


ifeq ($(BUILD_SINGLE),1)
all3targets += xscblat3
endif
ifeq ($(BUILD_DOUBLE),1)
all3targets += xdcblat3
endif
ifeq ($(BUILD_COMPLEX),1)
all3targets += xccblat3
endif
ifeq ($(BUILD_COMPLEX16),1)
all3targets += xzcblat3
endif

all3: $(all3targets)

all3: xscblat3 xdcblat3 xccblat3 xzcblat3
ifndef CROSS
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat3 < sin3
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./xdcblat3 < din3
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./xccblat3 < cin3
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat3 < zin3
endif
else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3
endif
endif
endif

all3_3m: xzcblat3_3m xccblat3_3m
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif
else
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif
endif
@@ -115,13 +214,19 @@ endif
endif
endif

ifeq ($(BUILD_SINGLE),1)
# Single real
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xscblat1 c_sblat1.o $(stestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)

xscblat2: $(stestl2o) c_sblat2.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xscblat2 c_sblat2.o $(stestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)

xscblat3: $(stestl3o) c_sblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xscblat3 c_sblat3.o $(stestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif

ifeq ($(BUILD_DOUBLE),1)
# Double real
xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@@ -129,7 +234,10 @@ xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif


ifeq ($(BUILD_COMPLEX),1)
# Single complex
xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@@ -140,7 +248,10 @@ xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)

xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif


ifeq ($(BUILD_COMPLEX16),1)
# Double complex
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@@ -152,6 +263,6 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)

xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif

include $(TOPDIR)/Makefile.tail

+ 13
- 0
driver/level2/CMakeLists.txt View File

@@ -197,6 +197,19 @@ foreach (float_type ${FLOAT_TYPES})
endif ()
endforeach ()

if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
if (USE_THREAD)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "SINGLE")
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "SINGLE")
endif ()
endif ()
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
if (USE_THREAD)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "DOUBLE")
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "DOUBLE")
endif ()
endif ()

if (USE_THREAD)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
endif ()


+ 80
- 2
driver/level2/Makefile View File

@@ -417,19 +417,63 @@ XBLASOBJS += \

endif

ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
ifeq ($(BUILD_DOUBLE),1)
ifdef SMP
SBLASOBJS += \
sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) \
strsv_NUU.$(SUFFIX) strsv_NUN.$(SUFFIX) strsv_NLU.$(SUFFIX) strsv_NLN.$(SUFFIX) \
strsv_TUU.$(SUFFIX) strsv_TUN.$(SUFFIX) strsv_TLU.$(SUFFIX) strsv_TLN.$(SUFFIX)
endif
endif
ifeq ($(BUILD_COMPLEX),1)
ifdef SMP
SBLASOBJS = sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
ifdef SMP
DBLASOBJS = dgemv_thread_n.$(SUFFIX) dgemv_thread_t.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS= \
ctrsv_NUU.$(SUFFIX) ctrsv_NUN.$(SUFFIX) ctrsv_NLU.$(SUFFIX) ctrsv_NLN.$(SUFFIX) \
ctrsv_TUU.$(SUFFIX) ctrsv_TUN.$(SUFFIX) ctrsv_TLU.$(SUFFIX) ctrsv_TLN.$(SUFFIX) \
ctrsv_RUU.$(SUFFIX) ctrsv_RUN.$(SUFFIX) ctrsv_RLU.$(SUFFIX) ctrsv_RLN.$(SUFFIX) \
ctrsv_CUU.$(SUFFIX) ctrsv_CUN.$(SUFFIX) ctrsv_CLU.$(SUFFIX) ctrsv_CLN.$(SUFFIX)
endif
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

all ::

ifeq ($(BUILD_SINGLE),1)

sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<

sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
endif

ifeq ($(BUILD_DOUBLE),1)

dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<

dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
endif

qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<
@@ -437,6 +481,8 @@ qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c
qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<

ifeq ($(BUILD_COMPLEX),1)

cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<

@@ -460,6 +506,9 @@ cgbmv_s.$(SUFFIX) cgbmv_s.$(PSUFFIX) : zgbmv_k.c

cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif

ifeq ($(BUILD_COMPLEX16),1)

zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@@ -484,6 +533,7 @@ zgbmv_s.$(SUFFIX) zgbmv_s.$(PSUFFIX) : zgbmv_k.c

zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif

xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@@ -509,24 +559,34 @@ xgbmv_s.$(SUFFIX) xgbmv_s.$(PSUFFIX) : zgbmv_k.c
xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<


ifeq ($(BUILD_SINGLE),1)

sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<

sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
endif


ifeq ($(BUILD_DOUBLE),1)

dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<

dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
endif
qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<

qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<


ifeq ($(BUILD_COMPLEX),1)

cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<

@@ -550,6 +610,10 @@ cgbmv_thread_s.$(SUFFIX) cgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c

cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif


ifeq ($(BUILD_COMPLEX16),1)

zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@@ -574,6 +638,7 @@ zgbmv_thread_s.$(SUFFIX) zgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c

zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif

xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@@ -599,24 +664,32 @@ xgbmv_thread_s.$(SUFFIX) xgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<


ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)

sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
endif


ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)

dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
endif
qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)

qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)


ifeq ($(BUILD_COMPLEX),1)

cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)

@@ -640,6 +713,10 @@ cgemv_thread_s.$(SUFFIX) cgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.

cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F)
endif


ifeq ($(BUILD_COMPLEX16),1)

zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
@@ -664,6 +741,7 @@ zgemv_thread_s.$(SUFFIX) zgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.

zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F)
endif

xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)


+ 36
- 1
driver/level3/CMakeLists.txt View File

@@ -14,6 +14,24 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES})
endif ()
endforeach ()

if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE")
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE")
endif()
endforeach()
endif()
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE")
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE")
endif()
endforeach()
endif()

set(TRMM_TRSM_SOURCES
trmm_L.c
@@ -100,7 +118,24 @@ foreach (float_type ${FLOAT_TYPES})
endif()
endif ()
endforeach ()

if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "DOUBLE" )
endif()
endforeach()
endif ()
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "SINGLE" )
endif()
endforeach()
endif ()

# for gemm3m
if(USE_GEMM3M)
foreach (GEMM_DEFINE ${GEMM_DEFINES})


+ 54
- 0
driver/level3/Makefile View File

@@ -287,6 +287,60 @@ HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \
dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
endif

ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
ifeq ($(BUILD_DOUBLE),1)
SBLASOBJS= \
strsm_LNUU.$(SUFFIX) strsm_LNUN.$(SUFFIX) strsm_LNLU.$(SUFFIX) strsm_LNLN.$(SUFFIX) \
strsm_LTUU.$(SUFFIX) strsm_LTUN.$(SUFFIX) strsm_LTLU.$(SUFFIX) strsm_LTLN.$(SUFFIX) \
strsm_RNUU.$(SUFFIX) strsm_RNUN.$(SUFFIX) strsm_RNLU.$(SUFFIX) strsm_RNLN.$(SUFFIX) \
strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \
ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \
ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX)
endif
endif
ifeq ($(BUILD_COMPLEX),1)
SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS= \
cherk_UN.$(SUFFIX) cherk_UC.$(SUFFIX) cherk_LN.$(SUFFIX) cherk_LC.$(SUFFIX) \
cherk_kernel_UN.$(SUFFIX) cherk_kernel_UC.$(SUFFIX) \
cherk_kernel_LN.$(SUFFIX) cherk_kernel_LC.$(SUFFIX) \
ctrsm_LNUU.$(SUFFIX) ctrsm_LNUN.$(SUFFIX) ctrsm_LNLU.$(SUFFIX) ctrsm_LNLN.$(SUFFIX) \
ctrsm_LTUU.$(SUFFIX) ctrsm_LTUN.$(SUFFIX) ctrsm_LTLU.$(SUFFIX) ctrsm_LTLN.$(SUFFIX) \
ctrsm_LRUU.$(SUFFIX) ctrsm_LRUN.$(SUFFIX) ctrsm_LRLU.$(SUFFIX) ctrsm_LRLN.$(SUFFIX) \
ctrsm_LCUU.$(SUFFIX) ctrsm_LCUN.$(SUFFIX) ctrsm_LCLU.$(SUFFIX) ctrsm_LCLN.$(SUFFIX) \
ctrsm_RNUU.$(SUFFIX) ctrsm_RNUN.$(SUFFIX) ctrsm_RNLU.$(SUFFIX) ctrsm_RNLN.$(SUFFIX) \
ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \
ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \
ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

all ::

shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h


+ 8
- 0
driver/level3/syrk_thread.c View File

@@ -56,12 +56,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
if (!(mode & BLAS_COMPLEX)) {

switch (mode & BLAS_PREC) {
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
case BLAS_SINGLE:
mask = SGEMM_UNROLL_MN - 1;
break;
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
case BLAS_DOUBLE:
mask = DGEMM_UNROLL_MN - 1;
break;
#endif
#ifdef EXPRECISION
case BLAS_XDOUBLE:
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1;
@@ -70,12 +74,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
}
} else {
switch (mode & BLAS_PREC) {
#ifdef BUILD_COMPLEX
case BLAS_SINGLE:
mask = CGEMM_UNROLL_MN - 1;
break;
#endif
#ifdef BUILD_COMPLEX16
case BLAS_DOUBLE:
mask = ZGEMM_UNROLL_MN - 1;
break;
#endif
#ifdef EXPRECISION
case BLAS_XDOUBLE:
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1;


+ 9
- 2
driver/others/blas_server.c View File

@@ -459,13 +459,16 @@ blas_queue_t *tscq;
} else
#endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
#ifdef BUILD_DOUBLE
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_SINGLE
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
} else {
#endif
} else {
/* Other types in future */
}
} else {
@@ -476,11 +479,15 @@ blas_queue_t *tscq;
} else
#endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#ifdef BUILD_COMPLEX16
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_COMPLEX
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else {
/* Other types in future */
}


+ 13
- 1
driver/others/blas_server_omp.c View File

@@ -315,12 +315,15 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
} else
#endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#if defined ( BUILD_DOUBLE) || defined (BUILD_COMPLEX16)
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE){
#if defined (BUILD_SINGLE) || defined (BUILD_COMPLEX)
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else {
/* Other types in future */
}
@@ -332,15 +335,24 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
} else
#endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#ifdef BUILD_COMPLEX16
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#else
fprintf(stderr,"UNHANDLED COMPLEX16\n");
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_COMPLEX
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#else
fprintf(stderr,"UNHANDLED COMPLEX\n");
#endif
} else {
/* Other types in future */
}
}
if (!sb) fprintf(stderr,"SB not declared!!!\n");
queue->sb=sb;
}
}


+ 11
- 5
driver/others/memory.c View File

@@ -2201,11 +2201,17 @@ static void *alloc_mmap(void *address){
#endif
#endif


allocsize = DGEMM_P * DGEMM_Q * sizeof(double);

start = (BLASULONG)map_address;
current = (SCALING - 1) * BUFFER_SIZE;
#ifdef BUILD_DOUBLE
allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
#elif defined(BUILD_COMPLEX16)
allocsize = ZGEMM_P * ZGEMM_Q * sizeof(double);
#elif defined(BUILD_COMPLEX)
allocsize = CGEMM_P * CGEMM_Q * sizeof(double);
#else
allocsize = SGEMM_P * SGEMM_Q * sizeof(double);
#endif
start = (BLASULONG)map_address;
current = (SCALING - 1) * BUFFER_SIZE;

while(current > 0) {
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE;


+ 19
- 7
exports/Makefile View File

@@ -33,6 +33,18 @@ endif
ifndef BUILD_HALF
BUILD_HALF = 0
endif
ifndef BUILD_SINGLE
BUILD_SINGLE = 0
endif
ifndef BUILD_DOUBLE
BUILD_DOUBLE = 0
endif
ifndef BUILD_COMPLEX
BUILD_COMPLEX = 0
endif
ifndef BUILD_COMPLEX16
BUILD_COMPLEX16 = 0
endif

ifeq ($(OSNAME), WINNT)
ifeq ($(F_COMPILER), GFORTRAN)
@@ -108,10 +120,10 @@ dll : ../$(LIBDLLNAME)
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)

$(LIBPREFIX).def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)

libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)

ifeq ($(OSNAME), Darwin)
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
@@ -246,23 +258,23 @@ static : ../$(LIBNAME)
rm -f goto.$(SUFFIX)

osx.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)

aix.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)

objcopy.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)

objconv.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)

test : linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
rm -f linktest

linktest.c : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c

clean ::
@rm -f *.def *.dylib __.SYMDEF* *.renamed


+ 907
- 739
exports/gensymbol
File diff suppressed because it is too large
View File


+ 31
- 0
interface/CMakeLists.txt View File

@@ -83,8 +83,12 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})

#sdsdot, dsdot
if (BUILD_SINGLE OR BUILD_DOUBLE)
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
endif ()
if (BUILD_DOUBLE)
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
endif ()

# trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
@@ -167,4 +171,31 @@ if (NOT DEFINED NO_LAPACK)
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3)
endif ()

if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "SINGLE")
GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "SINGLE")
GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "SINGLE")
GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "SINGLE")
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "SINGLE")
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "SINGLE")
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "SINGLE")
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "SINGLE")
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "SINGLE")
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "SINGLE")
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "SINGLE")
endif ()
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "DOUBLE")
GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "DOUBLE")
GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "DOUBLE")
GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "DOUBLE")
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "DOUBLE")
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "DOUBLE")
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "DOUBLE")
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "DOUBLE")
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "DOUBLE")
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "DOUBLE")
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "DOUBLE")
endif ()

add_library(interface OBJECT ${OPENBLAS_SRC})

+ 48
- 6
interface/Makefile View File

@@ -329,7 +329,10 @@ CCBLAS3OBJS = \
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
cblas_cgeadd.$(SUFFIX) cblas_xerbla.$(SUFFIX)
cblas_cgeadd.$(SUFFIX)
CXERBLAOBJ = \
cblas_xerbla.$(SUFFIX)



@@ -391,6 +394,8 @@ ZBLAS2OBJS += $(CZBLAS2OBJS)
ZBLAS3OBJS += $(CZBLAS3OBJS)

SHEXTOBJS += $(CSHEXTOBJS)

CBAUXOBJS += $(CXERBLAOBJ)
endif

SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
@@ -434,13 +439,11 @@ QLAPACKOBJS = \
# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)


CLAPACKOBJS = \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX)


#ZLAPACKOBJS = \
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
@@ -469,8 +472,42 @@ ZBLASOBJS += $(ZLAPACKOBJS)

endif

FUNCOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
ifeq ($(BUILD_DOUBLE),1)
SBLASOBJS = dsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) strsm.$(SUFFIX) \
sgetrs.$(SUFFIX) sgetrf.$(SUFFIX) spotf2.$(SUFFIX) spotrf.$(SUFFIX) \
ssyrk.$(SUFFIX) sgemv.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX),1)
SBLASOBJS = \
sdot.$(SUFFIX) srot.$(SUFFIX) snrm2.$(SUFFIX) sswap.$(SUFFIX) \
isamax.$(SUFFIX) saxpy.$(SUFFIX) sscal.$(SUFFIX) scopy.$(SUFFIX) \
sgemv.$(SUFFIX) sgemm.$(SUFFIX)
endif
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
DBLASOBJS = \
ddot.$(SUFFIX) drot.$(SUFFIX) dnrm2.$(SUFFIX) dswap.$(SUFFIX) \
idamax.$(SUFFIX) daxpy.$(SUFFIX) dscal.$(SUFFIX) dcopy.$(SUFFIX) \
dgemv.$(SUFFIX) dgemm.$(SUFFIX)
endif
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS = cgetrs.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) cgetrf.$(SUFFIX) \
cpotrf.$(SUFFIX) ctrsm.$(SUFFIX) cblas_cdotc_sub.$(SUFFIX)
endif
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

FUNCOBJS = $(SHEXTOBJS) $(CXERBLAOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
$(info FUNCOBJS = {[$(FUNCOBJS)]} )
ifdef EXPRECISION
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
endif
@@ -481,6 +518,7 @@ endif

FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=)


include $(TOPDIR)/Makefile.tail

all :: libs
@@ -503,11 +541,14 @@ level1 : $(BEXTOBJS) $(SHBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

aux : $(CBAUXOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

$(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS

srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
$(CC) $(CFLAGS) -c $< -o $(@F)
@@ -2268,3 +2309,4 @@ cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c

cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)


+ 281
- 4
kernel/CMakeLists.txt View File

@@ -91,6 +91,59 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")

if ((BUILD_COMPLEX OR BUILD_DOUBLE) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE")
if (DEFINED SMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE")
endif ()
if (DEFINED SMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE")
endif ()
if (DEFINED ISMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE")
endif ()
if (DEFINED ISMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE")
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE")
if (DEFINED DMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED DMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE")
endif ()

# Makefile.L2
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
@@ -124,7 +177,14 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
endif ()
endforeach ()

if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE")
endif ()
if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
endif ()
# Makefile.L3
set(USE_TRMM false)
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE))
@@ -159,6 +219,38 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
endforeach()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE")
if (DGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
endif ()
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE")
if (SGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE")
endif ()

foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
@@ -499,7 +591,31 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
#geadd
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach ()

if (BUILD_DOUBLE AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false "SINGLE")

GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false "SINGLE")

GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false "SINGLE")

GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE")
endif ()

# Makefile.LA
if(NOT NO_LAPACK)
@@ -526,6 +642,28 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type})
endforeach()
if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
if (NOT DEFINED SNEG_TCOPY)
set(SNEG_TCOPY ../generic/neg_tcopy.c)
endif ()

if (NOT DEFINED SLASWP_NCOPY)
set(SLASWP_NCOPY ../generic/laswp_ncopy.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}_${SGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}_${SGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "SINGLE")
endif()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
if (NOT DEFINED DNEG_TCOPY)
set(DNEG_TCOPY ../generic/neg_tcopy.c)
endif ()

if (NOT DEFINED DLASWP_NCOPY)
set(DLASWP_NCOPY ../generic/laswp_ncopy.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "DOUBLE")
endif()
endif()

if (${DYNAMIC_ARCH})
@@ -557,8 +695,147 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type})
endforeach ()


if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE")
endif ()
if (BUILD_DOUBLE AND NOT BUILD_SINGLE)
GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "SINGLE")

GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "SINGLE")

GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "SINGLE")

GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "SINGLE")

if (SGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX)
GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX")
if (DEFINED CMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${CMAXKERNEL}" "" "max_k" false "" "" false "COMPLEX")
endif ()
if (DEFINED CMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${CMINKERNEL}" "USE_MIN" "min_k" false "" "" false "COMPLEX")
endif ()
GenerateNamedObjects("${KERNELDIR}/${ICAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${ICAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "COMPLEX")
if (DEFINED ICMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ICMAXKERNEL}" "" "i*max_k" false "" "" false "COMPLEX")
endif ()
if (DEFINED ICMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ICMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "COMPLEX")
endif ()
GenerateNamedObjects("${KERNELDIR}/${CASUMKERNEL}" "" "asum_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "" "axpy_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CNRM2KERNEL}" "" "nrm2_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CROTKERNEL}" "" "rot_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CSCALKERNEL}" "" "scal_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CSWAPKERNEL}" "" "swap_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAXPBYKERNEL}" "" "axpby_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CSUMKERNEL}" "" "sum_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "" "dotu_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "CONJ" "dotc_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "" "gemv_n" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false "COMPLEX")
if (CGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMINCOPY}" "COMPLEX" "${CGEMMINCOPYOBJ}" false "" "" true "COMPLEX")
endif ()

if (CGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMITCOPY}" "COMPLEX" "${CGEMMITCOPYOBJ}" false "" "" true "COMPLEX")
endif ()

if (CGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMONCOPY}" "COMPLEX" "${CGEMMONCOPYOBJ}" false "" "" true "COMPLEX")
endif ()

if (CGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX")
endif ()
GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "COMPLEX")

GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "COMPLEX")

GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "COMPLEX")

GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "COMPLEX")
endif ()
endif ()

add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC})
set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}")
@@ -573,7 +850,7 @@ if (${DYNAMIC_ARCH})
set(BUILD_KERNEL 1)
set(KDIR "")
set(TSUFFIX "_${TARGET_CORE}")
set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}")
set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}")
build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}")
set(ADD_COMMONOBJS 0)
endforeach()


+ 64
- 6
kernel/Makefile.L2 View File

@@ -186,31 +186,46 @@ ifndef XHEMV_M_KERNEL
XHEMV_M_KERNEL = ../generic/zhemv_k.c
endif

ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
SBLASOBJS += \
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \
sger_k$(TSUFFIX).$(SUFFIX)

endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \
dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \
dger_k$(TSUFFIX).$(SUFFIX)
endif
QBLASOBJS += \
qgemv_n$(TSUFFIX).$(SUFFIX) qgemv_t$(TSUFFIX).$(SUFFIX) qsymv_U$(TSUFFIX).$(SUFFIX) qsymv_L$(TSUFFIX).$(SUFFIX) \
qger_k$(TSUFFIX).$(SUFFIX)

ifeq ($(BUILD_COMPLEX),1)
SBLASOBJS += \
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX)
CBLASOBJS += \
cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \
cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) \
csymv_U$(TSUFFIX).$(SUFFIX) csymv_L$(TSUFFIX).$(SUFFIX) \
chemv_U$(TSUFFIX).$(SUFFIX) chemv_L$(TSUFFIX).$(SUFFIX) chemv_V$(TSUFFIX).$(SUFFIX) chemv_M$(TSUFFIX).$(SUFFIX) \
cgeru_k$(TSUFFIX).$(SUFFIX) cgerc_k$(TSUFFIX).$(SUFFIX) cgerv_k$(TSUFFIX).$(SUFFIX) cgerd_k$(TSUFFIX).$(SUFFIX)

endif
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS += \
cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \
cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX)
DBLASOBJS += \
dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX)
ZBLASOBJS += \
zgemv_n$(TSUFFIX).$(SUFFIX) zgemv_t$(TSUFFIX).$(SUFFIX) zgemv_r$(TSUFFIX).$(SUFFIX) zgemv_c$(TSUFFIX).$(SUFFIX) \
zgemv_o$(TSUFFIX).$(SUFFIX) zgemv_u$(TSUFFIX).$(SUFFIX) zgemv_s$(TSUFFIX).$(SUFFIX) zgemv_d$(TSUFFIX).$(SUFFIX) \
zsymv_U$(TSUFFIX).$(SUFFIX) zsymv_L$(TSUFFIX).$(SUFFIX) \
zhemv_U$(TSUFFIX).$(SUFFIX) zhemv_L$(TSUFFIX).$(SUFFIX) zhemv_V$(TSUFFIX).$(SUFFIX) zhemv_M$(TSUFFIX).$(SUFFIX) \
zgeru_k$(TSUFFIX).$(SUFFIX) zgerc_k$(TSUFFIX).$(SUFFIX) zgerv_k$(TSUFFIX).$(SUFFIX) zgerd_k$(TSUFFIX).$(SUFFIX)
endif

XBLASOBJS += \
xgemv_n$(TSUFFIX).$(SUFFIX) xgemv_t$(TSUFFIX).$(SUFFIX) xgemv_r$(TSUFFIX).$(SUFFIX) xgemv_c$(TSUFFIX).$(SUFFIX) \
@@ -219,17 +234,21 @@ XBLASOBJS += \
xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)

ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
$(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@

$(KDIR)sgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@
endif

ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
$(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@

$(KDIR)dgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@
endif

$(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@
@@ -237,6 +256,8 @@ $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)qgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVTKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@


ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
$(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@

@@ -260,6 +281,10 @@ $(KDIR)cgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE

$(KDIR)cgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
endif


ifeq ($(BUILD_COMPLEX16),1)

$(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
@@ -284,6 +309,7 @@ $(KDIR)zgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE

$(KDIR)zgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
endif

$(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
@@ -309,17 +335,25 @@ $(KDIR)xgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE
$(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVTKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@


ifeq ($(BUILD_SINGLE),1)

$(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@

$(KDIR)ssymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_L_KERNEL) $(SSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@
endif


ifeq ($(BUILD_DOUBLE),1)

$(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@

$(KDIR)dsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_L_KERNEL) $(DSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@
endif

$(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@
@@ -327,17 +361,23 @@ $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)qsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_L_KERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@

ifeq ($(BUILD_COMPLEX),1)

$(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@

$(KDIR)csymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_L_KERNEL) $(CSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@
endif

ifeq ($(BUILD_COMPLEX16),1)

$(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@

$(KDIR)zsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_L_KERNEL) $(ZSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@
endif

$(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@
@@ -345,15 +385,23 @@ $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)xsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_L_KERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@

ifeq ($(BUILD_SINGLE),1)

$(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM)
$(CC) -c $(CFLAGS) -UDOUBLE $< -o $@
endif

ifeq ($(BUILD_DOUBLE),1)

$(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM)
$(CC) -c $(CFLAGS) -DDOUBLE $< -o $@
endif

$(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM)
$(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@

ifeq ($(BUILD_COMPLEX),1)

$(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM)
$(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@

@@ -365,6 +413,9 @@ $(KDIR)cgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER

$(KDIR)cgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERCKERNEL) $(CGERPARAM)
$(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@
endif

ifeq ($(BUILD_COMPLEX16),1)

$(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM)
$(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@
@@ -377,6 +428,7 @@ $(KDIR)zgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER

$(KDIR)zgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERCKERNEL) $(ZGERPARAM)
$(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@
endif

$(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM)
$(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@
@@ -390,6 +442,8 @@ $(KDIR)xgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM)
$(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@

ifeq ($(BUILD_COMPLEX),1)

$(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@

@@ -401,6 +455,9 @@ $(KDIR)chemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_V$(TSUFFIX).$(PSUFFIX) : $(KER

$(KDIR)chemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_M_KERNEL) $(CHEMV_L_PARAM) ../symcopy.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
endif

ifeq ($(BUILD_COMPLEX16),1)

$(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@
@@ -413,7 +470,7 @@ $(KDIR)zhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER

$(KDIR)zhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_M_KERNEL) $(ZHEMV_L_PARAM) ../symcopy.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
endif
$(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@

@@ -426,3 +483,4 @@ $(KDIR)xhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@



+ 67
- 17
kernel/Makefile.L3 View File

@@ -100,8 +100,10 @@ SHKERNELOBJS += \
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ)
endif

ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
SKERNELOBJS += \
sgemm_kernel$(TSUFFIX).$(SUFFIX) \
sgemm_beta$(TSUFFIX).$(SUFFIX) \
$(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \
$(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ)

@@ -110,28 +112,36 @@ SKERNELOBJS += \
sgemm_direct$(TSUFFIX).$(SUFFIX) \
sgemm_direct_performant$(TSUFFIX).$(SUFFIX)
endif
endif

ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
DKERNELOBJS += \
dgemm_beta$(TSUFFIX).$(SUFFIX) \
dgemm_kernel$(TSUFFIX).$(SUFFIX) \
$(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \
$(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ)
endif

QKERNELOBJS += \
qgemm_kernel$(TSUFFIX).$(SUFFIX) \
$(QGEMMINCOPYOBJ) $(QGEMMITCOPYOBJ) \
$(QGEMMONCOPYOBJ) $(QGEMMOTCOPYOBJ)

ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CKERNELOBJS += \
cgemm_kernel_n$(TSUFFIX).$(SUFFIX) cgemm_kernel_r$(TSUFFIX).$(SUFFIX) \
cgemm_kernel_l$(TSUFFIX).$(SUFFIX) cgemm_kernel_b$(TSUFFIX).$(SUFFIX) \
$(CGEMMINCOPYOBJ) $(CGEMMITCOPYOBJ) \
$(CGEMMONCOPYOBJ) $(CGEMMOTCOPYOBJ)
endif

ifeq ($(BUILD_COMPLEX16),1)
ZKERNELOBJS += \
zgemm_kernel_n$(TSUFFIX).$(SUFFIX) zgemm_kernel_r$(TSUFFIX).$(SUFFIX) \
zgemm_kernel_l$(TSUFFIX).$(SUFFIX) zgemm_kernel_b$(TSUFFIX).$(SUFFIX) \
$(ZGEMMINCOPYOBJ) $(ZGEMMITCOPYOBJ) \
$(ZGEMMONCOPYOBJ) $(ZGEMMOTCOPYOBJ)
endif

XKERNELOBJS += \
xgemm_kernel_n$(TSUFFIX).$(SUFFIX) xgemm_kernel_r$(TSUFFIX).$(SUFFIX) \
@@ -153,38 +163,48 @@ ifeq ($(BUILD_HALF),1)
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX)
endif

ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS += \
sgemm_beta$(TSUFFIX).$(SUFFIX) \
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
strmm_kernel_RN$(TSUFFIX).$(SUFFIX) strmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
strsm_kernel_LN$(TSUFFIX).$(SUFFIX) strsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \
dgemm_beta$(TSUFFIX).$(SUFFIX) \
dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX)
endif

QBLASOBJS += \
qgemm_beta$(TSUFFIX).$(SUFFIX) \
qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX)

ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \
cgemm_beta$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS += \
cgemm_beta$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \
zgemm_beta$(TSUFFIX).$(SUFFIX) \
ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
@@ -194,7 +214,8 @@ ZBLASOBJS += \
ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \
ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX)
endif

XBLASOBJS += \
xgemm_beta$(TSUFFIX).$(SUFFIX) \
@@ -205,7 +226,7 @@ XBLASOBJS += \
xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX)

ifeq ($(USE_GEMM3M), 1)

@@ -215,6 +236,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX)

endif

ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \
strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@@ -223,7 +245,10 @@ SBLASOBJS += \
strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \
strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \
strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS += \
strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \
strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \
strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \
@@ -231,10 +256,15 @@ SBLASOBJS += \
strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \
strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \
strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \
ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \
dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@@ -254,6 +284,7 @@ DBLASOBJS += \
dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \
dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \
dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX)
endif

QBLASOBJS += \
qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
@@ -273,8 +304,9 @@ QBLASOBJS += \
qtrsm_outucopy$(TSUFFIX).$(SUFFIX) qtrsm_outncopy$(TSUFFIX).$(SUFFIX) \
qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \
qsymm_iutcopy$(TSUFFIX).$(SUFFIX) qsymm_iltcopy$(TSUFFIX).$(SUFFIX) \
qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) \
qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX)

ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \
ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@@ -284,6 +316,13 @@ CBLASOBJS += \
ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) \
ctrmm_outucopy$(TSUFFIX).$(SUFFIX) ctrmm_outncopy$(TSUFFIX).$(SUFFIX) \
ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) \
csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \
csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \
chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \
chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS += \
ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) \
@@ -291,12 +330,10 @@ CBLASOBJS += \
ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_outucopy$(TSUFFIX).$(SUFFIX) ctrsm_outncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) \
csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \
csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \
chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \
chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX)
ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \
ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@@ -318,6 +355,7 @@ ZBLASOBJS += \
zsymm_outcopy$(TSUFFIX).$(SUFFIX) zsymm_oltcopy$(TSUFFIX).$(SUFFIX) \
zhemm_iutcopy$(TSUFFIX).$(SUFFIX) zhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
zhemm_outcopy$(TSUFFIX).$(SUFFIX) zhemm_oltcopy$(TSUFFIX).$(SUFFIX)
endif

XBLASOBJS += \
xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
@@ -343,6 +381,7 @@ XBLASOBJS += \

ifeq ($(USE_GEMM3M), 1)

ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \
@@ -362,7 +401,9 @@ CBLASOBJS += \
chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \
chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \
chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) chemm3m_olcopyi$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \
zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \
@@ -382,6 +423,7 @@ ZBLASOBJS += \
zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \
zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \
zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX)
endif

XBLASOBJS += \
xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
@@ -406,20 +448,25 @@ XBLASOBJS += \
endif

###### BLAS extensions #####

ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
sgeadd_k$(TSUFFIX).$(SUFFIX)

endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \
domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
dgeadd_k$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \
comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
@@ -430,7 +477,9 @@ CBLASOBJS += \
cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
cgeadd_k$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \
zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
@@ -441,6 +490,7 @@ ZBLASOBJS += \
zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
zgeadd_k$(TSUFFIX).$(SUFFIX)
endif

ifeq ($(BUILD_HALF), 1)
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))


+ 375
- 39
kernel/setparam-ref.c View File

@@ -114,6 +114,7 @@ gotoblas_t TABLE_NAME = {
#endif
#endif

#if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
0, 0, 0,
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
#ifdef SGEMM_DEFAULT_UNROLL_MN
@@ -121,7 +122,7 @@ gotoblas_t TABLE_NAME = {
#else
MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif
#endif

#ifdef HAVE_EXCLUSIVE_CACHE
1,
@@ -129,19 +130,38 @@ gotoblas_t TABLE_NAME = {
0,
#endif

#if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
samax_kTS, samin_kTS, smax_kTS, smin_kTS,
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS,
dsdot_kTS,
srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
sgemv_nTS, sgemv_tTS, sger_kTS,
snrm2_kTS, sasum_kTS,
#endif
#if BUILD_SINGLE == 1
ssum_kTS,
#endif

#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
scopy_kTS, sdot_kTS,
// dsdot_kTS,
srot_kTS, saxpy_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
sscal_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
sswap_kTS,
sgemv_nTS, sgemv_tTS,
#endif
#if BUILD_SINGLE == 1
sger_kTS,
ssymv_LTS, ssymv_UTS,
#endif

#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
#ifdef ARCH_X86_64
sgemm_directTS,
sgemm_direct_performantTS,
#endif
sgemm_kernelTS, sgemm_betaTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
sgemm_incopyTS, sgemm_itcopyTS,
@@ -149,6 +169,9 @@ gotoblas_t TABLE_NAME = {
sgemm_oncopyTS, sgemm_otcopyTS,
#endif
sgemm_oncopyTS, sgemm_otcopyTS,
#endif

#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
@@ -159,6 +182,8 @@ gotoblas_t TABLE_NAME = {
#endif
strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
#endif
#if BUILD_SINGLE == 1
strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
@@ -175,13 +200,16 @@ gotoblas_t TABLE_NAME = {
ssymm_outcopyTS, ssymm_oltcopyTS,
#endif
ssymm_outcopyTS, ssymm_oltcopyTS,

#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
#ifndef NO_LAPACK
sneg_tcopyTS, slaswp_ncopyTS,
#else
NULL,NULL,
#endif
#endif

#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
0, 0, 0,
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
#ifdef DGEMM_DEFAULT_UNROLL_MN
@@ -189,14 +217,36 @@ gotoblas_t TABLE_NAME = {
#else
MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif
#endif


#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS,
drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS,
dgemv_nTS, dgemv_tTS, dger_kTS,
dnrm2_kTS, dasum_kTS,
#endif
#if (BUILD_DOUBLE==1)
dsum_kTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
dcopy_kTS, ddot_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
dsdot_kTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
drot_kTS,
daxpy_kTS,
dscal_kTS,
dswap_kTS,
dgemv_nTS, dgemv_tTS,
#endif
#if (BUILD_DOUBLE==1)
dger_kTS,
dsymv_LTS, dsymv_UTS,
#endif

#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
dgemm_kernelTS, dgemm_betaTS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
dgemm_incopyTS, dgemm_itcopyTS,
@@ -204,6 +254,9 @@ gotoblas_t TABLE_NAME = {
dgemm_oncopyTS, dgemm_otcopyTS,
#endif
dgemm_oncopyTS, dgemm_otcopyTS,
#endif

#if (BUILD_DOUBLE==1)
dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
@@ -237,6 +290,8 @@ gotoblas_t TABLE_NAME = {
NULL, NULL,
#endif

#endif

#ifdef EXPRECISION

0, 0, 0,
@@ -291,6 +346,7 @@ gotoblas_t TABLE_NAME = {

#endif

#if (BUILD_COMPLEX || BUILD_COMPLEX16)
0, 0, 0,
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
#ifdef CGEMM_DEFAULT_UNROLL_MN
@@ -298,21 +354,34 @@ gotoblas_t TABLE_NAME = {
#else
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
#endif

camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS,
cdotu_kTS, cdotc_kTS, csrot_kTS,
caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS,
#endif
#if (BUILD_COMPLEX)
cnrm2_kTS, casum_kTS, csum_kTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
ccopy_kTS, cdotu_kTS, cdotc_kTS,
#endif
#if (BUILD_COMPLEX)
csrot_kTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
caxpy_kTS,
caxpyc_kTS,
cscal_kTS,
cswap_kTS,

cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
#endif
#if (BUILD_COMPLEX)
cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
csymv_LTS, csymv_UTS,
chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,

#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
cgemm_betaTS,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
cgemm_incopyTS, cgemm_itcopyTS,
#else
@@ -332,6 +401,8 @@ gotoblas_t TABLE_NAME = {
#endif
ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
#endif
#if (BUILD_COMPLEX)

ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
@@ -361,7 +432,7 @@ gotoblas_t TABLE_NAME = {

0, 0, 0,

#if defined(USE_GEMM3M)
#if (USE_GEMM3M)
#ifdef CGEMM3M_DEFAULT_UNROLL_M
CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
#else
@@ -419,13 +490,20 @@ gotoblas_t TABLE_NAME = {
NULL, NULL,
NULL, NULL,
#endif
#endif

#if (BUILD_COMPLEX || BUILD_COMPLEX16)
#ifndef NO_LAPACK
cneg_tcopyTS, claswp_ncopyTS,
cneg_tcopyTS,
claswp_ncopyTS,
#else
NULL, NULL,
#endif

#endif

#if BUILD_COMPLEX16 == 1
0, 0, 0,
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
#ifdef ZGEMM_DEFAULT_UNROLL_MN
@@ -495,7 +573,7 @@ gotoblas_t TABLE_NAME = {
zhemm_outcopyTS, zhemm_oltcopyTS,

0, 0, 0,
#if defined(USE_GEMM3M)
#if (USE_GEMM3M)
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
#else
@@ -560,6 +638,8 @@ gotoblas_t TABLE_NAME = {
NULL, NULL,
#endif

#endif

#ifdef EXPRECISION

0, 0, 0,
@@ -626,7 +706,7 @@ gotoblas_t TABLE_NAME = {
xhemm_outcopyTS, xhemm_oltcopyTS,

0, 0, 0,
#if defined(USE_GEMM3M)
#if (USE_GEMM3M)
QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),

xgemm3m_kernelTS,
@@ -691,52 +771,112 @@ gotoblas_t TABLE_NAME = {
init_parameter,

SNUMOPT, DNUMOPT, QNUMOPT,
#if BUILD_SINGLE == 1
saxpby_kTS,
#endif
#if BUILD_DOUBLE == 1
daxpby_kTS,
#endif
#if BUILD_COMPLEX == 1
caxpby_kTS,
#endif
#if BUILD_COMPLEX16== 1
zaxpby_kTS,
#endif

saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS,

#if BUILD_SINGLE == 1
somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
#endif
#if BUILD_DOUBLE== 1
domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
#endif
#if BUILD_COMPLEX == 1
comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
#endif
#if BUILD_COMPLEX16 == 1
zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
#endif

#if BUILD_SINGLE == 1
simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
#endif
#if BUILD_DOUBLE== 1
dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
#endif
#if BUILD_COMPLEX== 1
cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
#endif
#if BUILD_COMPLEX16==1
zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
#endif

sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS

#if BUILD_SINGLE == 1
sgeadd_kTS,
#endif
#if BUILD_DOUBLE==1
dgeadd_kTS,
#endif
#if BUILD_COMPLEX==1
cgeadd_kTS,
#endif
#if BUILD_COMPLEX16==1
zgeadd_kTS
#endif
};

#if defined(ARCH_ARM64)
#if (ARCH_ARM64)
static void init_parameter(void) {
#if defined(BUILD_HALF)
#if (BUILD_HALF)
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
#endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif

#if defined(BUILD_HALF)
#if (BUILD_HALF)
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
#endif
#if BUILD_SINGLE == 1
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
#endif
#if BUILD_DOUBLE== 1
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX== 1
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
#endif

#if defined(BUILD_HALF)
#if (BUILD_HALF)
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
#endif
#if BUILD_SINGLE == 1
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
#endif
#if BUILD_DOUBLE==1
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
#endif

#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
@@ -747,7 +887,7 @@ static void init_parameter(void) {
TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
#endif

#if defined(USE_GEMM3M)
#if (USE_GEMM3M)
#ifdef CGEMM3M_DEFAULT_P
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else
@@ -792,8 +932,8 @@ static void init_parameter(void) {
#endif

}
#else // defined(ARCH_ARM64)
#if defined(ARCH_POWER)
#else // (ARCH_ARM64)
#if (ARCH_POWER)
static void init_parameter(void) {

#ifdef BUILD_HALF
@@ -823,7 +963,7 @@ static void init_parameter(void) {
}
#else //POWER

#if defined(ARCH_ZARCH)
#if (ARCH_ZARCH)
static void init_parameter(void) {
#ifdef BUILD_HALF
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
@@ -989,22 +1129,34 @@ static void init_parameter(void) {
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
#endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX == 1
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
#endif

#if BUILD_COMPLEX == 1
#ifdef CGEMM3M_DEFAULT_Q
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
#else
TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
#endif
#endif

#if BUILD_COMPLEX16 == 1
#ifdef ZGEMM3M_DEFAULT_Q
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
#else
TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
#endif
#endif

#ifdef EXPRECISION
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
@@ -1012,16 +1164,24 @@ static void init_parameter(void) {
TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
#endif

#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
#if (CORE_KATMAI) || (CORE_COPPERMINE) || (CORE_BANIAS) || (CORE_YONAH) || (CORE_ATHLON)

#ifdef DEBUG
fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
@@ -1034,10 +1194,18 @@ static void init_parameter(void) {
fprintf(stderr, "Northwood\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
@@ -1050,10 +1218,18 @@ static void init_parameter(void) {
fprintf(stderr, "Atom\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 256;
#endif
#if BUILD_DOUBLE ==1
TABLE_NAME.dgemm_p = 128;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 128;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 64;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 64;
TABLE_NAME.xgemm_p = 32;
@@ -1066,10 +1242,18 @@ static void init_parameter(void) {
fprintf(stderr, "Prescott\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
#endif
#if BUILD_DOUBLE ==1
TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16 == 1
TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
@@ -1082,10 +1266,18 @@ static void init_parameter(void) {
fprintf(stderr, "Core2\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE==1
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
@@ -1098,10 +1290,18 @@ static void init_parameter(void) {
fprintf(stderr, "Penryn\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
@@ -1114,10 +1314,18 @@ static void init_parameter(void) {
fprintf(stderr, "Dunnington\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE ==1
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
@@ -1131,10 +1339,18 @@ static void init_parameter(void) {
fprintf(stderr, "Nehalem\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1147,10 +1363,18 @@ static void init_parameter(void) {
fprintf(stderr, "Sandybridge\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1163,26 +1387,42 @@ static void init_parameter(void) {
fprintf(stderr, "Haswell\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#if defined (SKYLAKEX) || defined (COOPERLAKE)
#if defined(SKYLAKEX) || defined(COOPERLAKE)

#ifdef DEBUG
fprintf(stderr, "SkylakeX\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1196,10 +1436,18 @@ static void init_parameter(void) {
fprintf(stderr, "Opteron\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
@@ -1212,10 +1460,18 @@ static void init_parameter(void) {
fprintf(stderr, "Barcelona\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1228,10 +1484,18 @@ static void init_parameter(void) {
fprintf(stderr, "Bobcate\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1244,10 +1508,18 @@ static void init_parameter(void) {
fprintf(stderr, "Bulldozer\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1260,10 +1532,18 @@ static void init_parameter(void) {
fprintf(stderr, "Excavator\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1277,10 +1557,18 @@ static void init_parameter(void) {
fprintf(stderr, "Piledriver\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1293,10 +1581,18 @@ static void init_parameter(void) {
fprintf(stderr, "Steamroller\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1309,10 +1605,18 @@ static void init_parameter(void) {
fprintf(stderr, "Zen\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@@ -1326,11 +1630,18 @@ static void init_parameter(void) {
fprintf(stderr, "NANO\n");
#endif

#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if (BUILD_DOUBLE==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if (BUILD_COMPLEX==1)
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if (BUILD_COMPLEX16==1)
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;

#endif


#ifdef EXPRECISION
@@ -1340,41 +1651,55 @@ static void init_parameter(void) {

#endif

#if BUILD_COMPLEX==1
#ifdef CGEMM3M_DEFAULT_P
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
#endif
#endif

#if BUILD_COMPLEX16==1
#ifdef ZGEMM3M_DEFAULT_P
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
#else
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
#endif
#endif

#ifdef EXPRECISION
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
#endif


#if BUILD_SINGLE == 1
TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
#endif
#if BUILD_DOUBLE== 1
TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
#endif

#if BUILD_COMPLEX==1
#ifdef CGEMM3M_DEFAULT_UNROLL_M
TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
#else
TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
#endif
#endif

#if BUILD_COMPLEX16==1
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
#else
TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
#endif
#endif

#ifdef QUAD_PRECISION
TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
@@ -1386,15 +1711,19 @@ static void init_parameter(void) {
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
#endif

#if BUILD_SINGLE==1
TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
#endif

#if BUILD_DOUBLE==1
TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
#endif

#ifdef EXPRECISION
TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
@@ -1403,26 +1732,33 @@ static void init_parameter(void) {
) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
#endif

#if BUILD_COMPLEX ==1
TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
#endif

#if BUILD_COMPLEX16 ==1
TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
#endif

#if BUILD_COMPLEX == 1
TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
#endif

#if BUILD_COMPLEX16 == 1
TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
#endif



@@ -1444,4 +1780,4 @@ static void init_parameter(void) {
}
#endif //POWER
#endif //ZARCH
#endif //defined(ARCH_ARM64)
#endif //(ARCH_ARM64)

+ 68
- 31
lapack-netlib/LAPACKE/src/Makefile View File

@@ -46,6 +46,7 @@ OBJ = \
lapacke_ilaver.o \
lapacke_nancheck.o

ifeq ($(BUILD_COMPLEX),1)
OBJ_C = \
lapacke_cbbcsd.o \
lapacke_cbbcsd_work.o \
@@ -653,7 +654,9 @@ lapacke_cupgtr.o \
lapacke_cupgtr_work.o \
lapacke_cupmtr.o \
lapacke_cupmtr_work.o
endif

ifeq ($(BUILD_DOUBLE),1)
OBJ_D = \
lapacke_dbbcsd.o \
lapacke_dbbcsd_work.o \
@@ -1218,8 +1221,12 @@ lapacke_dtrttf_work.o \
lapacke_dtrttp.o \
lapacke_dtrttp_work.o \
lapacke_dtzrzf.o \
lapacke_dtzrzf_work.o
lapacke_dtzrzf_work.o \
lapacke_slag2d.o \
lapacke_slag2d_work.o
endif

ifeq ($(BUILD_SINGLE),1)
OBJ_S = \
lapacke_sbbcsd.o \
lapacke_sbbcsd_work.o \
@@ -1395,8 +1402,6 @@ lapacke_slacn2.o \
lapacke_slacn2_work.o \
lapacke_slacpy.o \
lapacke_slacpy_work.o \
lapacke_slag2d.o \
lapacke_slag2d_work.o \
lapacke_slamch.o \
lapacke_slamch_work.o \
lapacke_slange.o \
@@ -1781,7 +1786,9 @@ lapacke_strttp.o \
lapacke_strttp_work.o \
lapacke_stzrzf.o \
lapacke_stzrzf_work.o
endif

ifeq ($(BUILD_COMPLEX16),1)
OBJ_Z = \
lapacke_zbbcsd.o \
lapacke_zbbcsd_work.o \
@@ -2393,35 +2400,52 @@ lapacke_zupgtr.o \
lapacke_zupgtr_work.o \
lapacke_zupmtr.o \
lapacke_zupmtr_work.o
endif

ifdef BUILD_DEPRECATED
DEPRECATED = \
ifeq ($(BUILD_COMPLEX),1)
DEPRECATEDC = \
lapacke_cggsvp.o \
lapacke_cggsvp_work.o \
lapacke_dggsvp.o \
lapacke_dggsvp_work.o \
lapacke_sggsvp.o \
lapacke_sggsvp_work.o \
lapacke_zggsvp.o \
lapacke_zggsvp_work.o \
lapacke_cggsvd.o \
lapacke_cggsvd_work.o \
lapacke_cgeqpf.o \
lapacke_cgeqpf_work.o
endif

ifeq ($(BUILD_DOUBLE),1)
DEPRECATEDD = \
lapacke_dggsvp.o \
lapacke_dggsvp_work.o \
lapacke_dggsvd.o \
lapacke_dggsvd_work.o \
lapacke_dgeqpf.o \
lapacke_dgeqpf_work.o
endif

ifeq ($(BUILD_SINGLE),1)
DEPRECATEDS = \
lapacke_sggsvp.o \
lapacke_sggsvp_work.o \
lapacke_sggsvd.o \
lapacke_sggsvd_work.o \
lapacke_sgeqpf.o \
lapacke_sgeqpf_work.o
endif

ifeq ($(BUILD_COMPLEX16),1)
DEPRECATEDZ = \
lapacke_zggsvp.o \
lapacke_zggsvp_work.o \
lapacke_zggsvd.o \
lapacke_zggsvd_work.o \
lapacke_cgeqpf.o \
lapacke_cgeqpf_work.o \
lapacke_dgeqpf.o \
lapacke_dgeqpf_work.o \
lapacke_sgeqpf.o \
lapacke_sgeqpf_work.o \
lapacke_zgeqpf.o \
lapacke_zgeqpf_work.o
endif

DEPRECATED = $(DEPRECATEDS) $(DEPRECATEDD) $(DEPRECATEDC) $(DEPRECATEDZ)
endif

ifdef USEXBLAS
EXTENDED = \
lapacke_cgbrfsx.o lapacke_cporfsx.o lapacke_dgerfsx.o lapacke_sgbrfsx.o lapacke_ssyrfsx.o lapacke_zherfsx.o \
@@ -2440,37 +2464,50 @@ endif

ifdef LAPACKE_WITH_TMG
# FILE PARTS OF TMGLIB
MATGEN = \
ifeq ($(BUILD_COMPLEX),1)
MATGENC = \
lapacke_clatms.o \
lapacke_clatms_work.o \
lapacke_dlatms.o \
lapacke_dlatms_work.o \
lapacke_slatms.o \
lapacke_slatms_work.o \
lapacke_zlatms.o \
lapacke_zlatms_work.o \
lapacke_clagge.o \
lapacke_clagge_work.o \
lapacke_claghe.o \
lapacke_claghe_work.o \
lapacke_clagsy.o \
lapacke_clagsy_work.o
endif
ifeq ($(BUILD_DOUBLE),1)
MATGEND = \
lapacke_dlatms.o \
lapacke_dlatms_work.o \
lapacke_dlagge.o \
lapacke_dlagge_work.o \
lapacke_dlagsy.o \
lapacke_dlagsy_work.o
endif
ifeq ($(BUILD_SINGLE),1)
MATGENS = \
lapacke_slatms.o \
lapacke_slatms_work.o \
lapacke_slagge.o \
lapacke_slagge_work.o \
lapacke_slagsy.o \
lapacke_slagsy_work.o
endif
ifeq ($(BUILD_COMPLEX16),1)
MATGENZ = \
lapacke_zlatms.o \
lapacke_zlatms_work.o \
lapacke_zlagge.o \
lapacke_zlagge_work.o \
lapacke_claghe.o \
lapacke_claghe_work.o \
lapacke_zlaghe.o \
lapacke_zlaghe_work.o \
lapacke_clagsy.o \
lapacke_clagsy_work.o \
lapacke_dlagsy.o \
lapacke_dlagsy_work.o \
lapacke_slagsy.o \
lapacke_slagsy_work.o \
lapacke_zlagsy.o \
lapacke_zlagsy_work.o
endif

MATGEN = $(MATGENS) $(MATGEND) $(MATGENC) $(MATGENZ)
endif

.PHONY: all
all: $(LAPACKELIB)



+ 46
- 16
lapack-netlib/SRC/Makefile View File

@@ -66,7 +66,9 @@ ALLAUX_O = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o

ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" ""
SCLAUX = \
sbdsvdx.o sstevx.o sstein.o \
sbdsdc.o \
sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \
slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \
@@ -81,10 +83,14 @@ SCLAUX = \
slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \
slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \
ssteqr.o ssterf.o slaisnan.o sisnan.o \
slartgp.o slartgs.o \
slartgp.o slartgs.o scombssq.o \
../INSTALL/second_$(TIMER).o
endif

ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
DZLAUX = \
dcombssq.o \
dbdsvdx.o dstevx.o dstein.o \
dbdsdc.o \
dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \
dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \
@@ -101,9 +107,12 @@ DZLAUX = \
dsteqr.o dsterf.o dlaisnan.o disnan.o \
dlartgp.o dlartgs.o \
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
endif

#ifeq ($(BUILD_SINGLE),1)
ifdef BUILD_SINGLE
SLASRC_O = \
sbdsvdx.o spotrf2.o sgetrf2.o \
spotrf2.o sgetrf2.o \
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \
sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \
@@ -145,8 +154,7 @@ SLASRC_O = \
ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \
ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \
sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \
ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \
sstevx.o \
ssptrf.o ssptri.o ssptrs.o sstegr.o sstev.o sstevd.o sstevr.o \
ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \
ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \
ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \
@@ -180,9 +188,13 @@ SLASRC_O = \
ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
sgesvdq.o scombssq.o
sgesvdq.o
endif

ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
DSLASRC_O = spotrs.o sgetrs.o spotrf.o sgetrf.o
endif

ifdef USEXBLAS
SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
@@ -194,6 +206,7 @@ SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
slascl2.o sla_wwaddw.o
endif

ifeq ($(BUILD_COMPLEX),1)
CLASRC_O = \
cpotrf2.o cgetrf2.o \
cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \
@@ -284,6 +297,7 @@ CLASRC_O = \
cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \
chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \
cgesvdq.o
endif

ifdef USEXBLAS
CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
@@ -299,11 +313,13 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
endif

ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o clag2z.o
endif

ifeq ($(BUILD_DOUBLE),1)
DLASRC_O = \
dpotrf2.o dgetrf2.o \
dbdsvdx.o \
dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \
dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \
@@ -345,8 +361,7 @@ DLASRC_O = \
dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \
dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \
dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \
dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \
dstevx.o \
dsptrf.o dsptri.o dsptrs.o dstegr.o dstev.o dstevd.o dstevr.o \
dsycon.o dsyev.o dsyevd.o dsyevr.o \
dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \
dsysv.o dsysvx.o \
@@ -381,7 +396,8 @@ DLASRC_O = \
dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
dgesvdq.o dcombssq.o
dgesvdq.o
endif

ifdef USEXBLAS
DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
@@ -393,6 +409,7 @@ DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
dlascl2.o dla_wwaddw.o
endif

ifeq ($(BUILD_COMPLEX16),1)
ZLASRC_O = \
zpotrf2.o zgetrf2.o \
zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \
@@ -471,7 +488,7 @@ ZLASRC_O = \
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
zunmtr.o zupgtr.o \
zupmtr.o izmax1.o dzsum1.o zstemr.o \
zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \
zcgesv.o zcposv.o zlag2c.o zlat2c.o \
zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \
ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \
zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \
@@ -488,6 +505,7 @@ ZLASRC_O = \
zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \
zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \
zgesvdq.o
endif

ifdef USEXBLAS
ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
@@ -501,18 +519,30 @@ ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o
endif

DEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \
ifeq ($(BUILD_COMPLEX),1)
CDEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \
DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \
DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o \
DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o
endif

ifeq ($(BUILD_DOUBLE),1)
DDEPRECSRC = \
DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \
DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \
DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o \
DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o
endif
ifeq ($(BUILD_SINGLE),1)
SDEPRECSRC = \
DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \
DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \
DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o \
DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o
endif
ifeq ($(BUILD_COMPLEX16),1)
ZDEPRECSRC = \
DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \
DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \
DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o
endif

# filter out optimized codes from OpenBLAS
ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o
@@ -560,7 +590,7 @@ ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif

ifdef BUILD_DEPRECATED
DEPRECATED = $(DEPRECSRC)
DEPRECATED = $(SDEPRECSRC) $(DDEPRECSRC) $(CDEPRECSRC) $(ZDEPRECSRC)
endif

.PHONY: all


+ 16
- 0
lapack-netlib/TESTING/MATGEN/Makefile View File

@@ -33,25 +33,37 @@
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc

ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" ""
SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o
endif

ifeq ($(BUILD_SINGLE),1)
SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \
slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \
slatm3.o slatm5.o slatm6.o slahilb.o
endif

ifeq ($(BUILD_COMPLEX),1)
CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \
clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \
clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o
endif

ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o
endif

ifeq ($(BUILD_DOUBLE),1)
DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \
dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \
dlatm3.o dlatm5.o dlatm6.o dlahilb.o
endif

ifeq ($(BUILD_COMPLEX16),1)
ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \
zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \
zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o
endif

.PHONY: all
all: $(TMGLIB)
@@ -97,5 +109,9 @@ cleanobj:
cleanlib:
rm -f $(TMGLIB)

ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),)
slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
endif
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),)
dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
endif

+ 8
- 0
lapack/getf2/Makefile View File

@@ -1,11 +1,19 @@
TOPDIR = ../..
include ../../Makefile.system

ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS = sgetf2_k.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dgetf2_k.$(SUFFIX)
endif
QBLASOBJS = qgetf2_k.$(SUFFIX)
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS = cgetf2_k.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = zgetf2_k.$(SUFFIX)
endif
XBLASOBJS = xgetf2_k.$(SUFFIX)

sgetf2_k.$(SUFFIX) : getf2_k.c


+ 13
- 0
lapack/getrf/Makefile View File

@@ -17,6 +17,19 @@ ZBLASOBJS += zgetrf_parallel.$(SUFFIX)
XBLASOBJS += xgetrf_parallel.$(SUFFIX)
endif

ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

ifeq ($(USE_OPENMP), 1)
GETRF_SRC = getrf_parallel_omp.c
else


+ 13
- 0
lapack/getrs/Makefile View File

@@ -17,6 +17,19 @@ ZBLASOBJS += zgetrs_N_parallel.$(SUFFIX) zgetrs_T_parallel.$(SUFFIX) zgetrs_R_pa
XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX)
endif

ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

sgetrs_N_single.$(SUFFIX) : getrs_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANS $< -o $(@F)



+ 8
- 0
lapack/laswp/Makefile View File

@@ -1,11 +1,19 @@
TOPDIR = ../..
include ../../Makefile.system

ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS = slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX)
endif
QBLASOBJS = qlaswp_plus.$(SUFFIX) qlaswp_minus.$(SUFFIX)
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS = claswp_plus.$(SUFFIX) claswp_minus.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = zlaswp_plus.$(SUFFIX) zlaswp_minus.$(SUFFIX)
endif
XBLASOBJS = xlaswp_plus.$(SUFFIX) xlaswp_minus.$(SUFFIX)

slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) \


+ 8
- 0
lapack/lauu2/Makefile View File

@@ -1,11 +1,19 @@
TOPDIR = ../..
include ../../Makefile.system

ifeq ($(BUILD_SINGLE),1)
SBLASOBJS = slauu2_U.$(SUFFIX) slauu2_L.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dlauu2_U.$(SUFFIX) dlauu2_L.$(SUFFIX)
endif
QBLASOBJS = qlauu2_U.$(SUFFIX) qlauu2_L.$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS = clauu2_U.$(SUFFIX) clauu2_L.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = zlauu2_U.$(SUFFIX) zlauu2_L.$(SUFFIX)
endif
XBLASOBJS = xlauu2_U.$(SUFFIX) xlauu2_L.$(SUFFIX)

slauu2_U.$(SUFFIX) : lauu2_U.c


+ 13
- 0
lapack/lauum/Makefile View File

@@ -17,6 +17,19 @@ ZBLASOBJS += zlauum_U_parallel.$(SUFFIX) zlauum_L_parallel.$(SUFFIX)
XBLASOBJS += xlauum_U_parallel.$(SUFFIX) xlauum_L_parallel.$(SUFFIX)
endif

ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

slauum_U_single.$(SUFFIX) : lauum_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F)



+ 13
- 0
lapack/potf2/Makefile View File

@@ -8,6 +8,19 @@ CBLASOBJS = cpotf2_U.$(SUFFIX) cpotf2_L.$(SUFFIX)
ZBLASOBJS = zpotf2_U.$(SUFFIX) zpotf2_L.$(SUFFIX)
XBLASOBJS = xpotf2_U.$(SUFFIX) xpotf2_L.$(SUFFIX)

ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

spotf2_U.$(SUFFIX) : potf2_U.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F)



+ 14
- 0
lapack/potrf/Makefile View File

@@ -17,6 +17,20 @@ ZBLASOBJS += zpotrf_U_parallel.$(SUFFIX) zpotrf_L_parallel.$(SUFFIX)
XBLASOBJS += xpotrf_U_parallel.$(SUFFIX) xpotrf_L_parallel.$(SUFFIX)
endif

ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif


spotrf_U_single.$(SUFFIX) : potrf_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F)



+ 8
- 0
lapack/trti2/Makefile View File

@@ -1,11 +1,19 @@
TOPDIR = ../..
include ../../Makefile.system

ifeq ($(BUILD_SINGLE),1)
SBLASOBJS = strti2_UU.$(SUFFIX) strti2_UN.$(SUFFIX) strti2_LU.$(SUFFIX) strti2_LN.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dtrti2_UU.$(SUFFIX) dtrti2_UN.$(SUFFIX) dtrti2_LU.$(SUFFIX) dtrti2_LN.$(SUFFIX)
endif
QBLASOBJS = qtrti2_UU.$(SUFFIX) qtrti2_UN.$(SUFFIX) qtrti2_LU.$(SUFFIX) qtrti2_LN.$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS = ctrti2_UU.$(SUFFIX) ctrti2_UN.$(SUFFIX) ctrti2_LU.$(SUFFIX) ctrti2_LN.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = ztrti2_UU.$(SUFFIX) ztrti2_UN.$(SUFFIX) ztrti2_LU.$(SUFFIX) ztrti2_LN.$(SUFFIX)
endif
XBLASOBJS = xtrti2_UU.$(SUFFIX) xtrti2_UN.$(SUFFIX) xtrti2_LU.$(SUFFIX) xtrti2_LN.$(SUFFIX)

strti2_UU.$(SUFFIX) : trti2_U.c


+ 13
- 0
lapack/trtri/Makefile View File

@@ -23,6 +23,19 @@ ZBLASOBJS += ztrtri_UU_parallel.$(SUFFIX) ztrtri_UN_parallel.$(SUFFIX) ztrtri_LU
XBLASOBJS += xtrtri_UU_parallel.$(SUFFIX) xtrtri_UN_parallel.$(SUFFIX) xtrtri_LU_parallel.$(SUFFIX) xtrtri_LN_parallel.$(SUFFIX)
endif

ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

strtri_UU_single.$(SUFFIX) : trtri_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUNIT $< -o $(@F)



+ 13
- 0
lapack/trtrs/Makefile View File

@@ -17,6 +17,19 @@ ZBLASOBJS += ztrtrs_UNU_parallel.$(SUFFIX) ztrtrs_UNN_parallel.$(SUFFIX) ztrtrs_
XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX)
endif

ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

strtrs_UNU_single.$(SUFFIX) : trtrs_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F)



+ 15
- 3
test/CMakeLists.txt View File

@@ -4,7 +4,7 @@ include_directories(${PROJECT_BINARY_DIR})
enable_language(Fortran)

if (BUILD_SINGLE)
list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3)
list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3)
endif()
if (BUILD_DOUBLE)
list (APPEND OpenBLAS_Tests dblat1 dblat2 dblat3)
@@ -17,7 +17,7 @@ if (BUILD_COMPLEX16)
endif()

foreach(test_bin ${OpenBLAS_Tests})
add_executable(${test_bin} ${test_bin}.f)
add_executable(${test_bin} ${test_bin}.f)
target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME})
endforeach()

@@ -34,7 +34,19 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh
"fi\n"
)

set(float_types s d c z)
#set(float_types s d c z)
if (BUILD_SINGLE)
list (APPEND float_types s)
endif()
if (BUILD_DOUBLE)
list (APPEND float_types d)
endif()
if (BUILD_COMPLEX)
list (APPEND float_types c)
endif()
if (BUILD_COMPLEX16)
list (APPEND float_types z)
endif()
foreach(float_type ${float_types})
string(TOUPPER ${float_type} float_type_upper)
add_test(NAME "${float_type}blas1"


+ 217
- 27
test/Makefile View File

@@ -7,82 +7,241 @@ all ::
else
all :: level1 level2 level3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1)
level1: sblat1 dblat1 cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1)
level1: dblat1 cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1)
level1: sblat1 cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1)
level1: cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x)
level1: cblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1)
level1: zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1)
level1: sblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1)
level1: sblat1 dblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx)
level1: sblat1 dblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx)
level1: sblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx)
level1: dblat1
endif

level1 : sblat1 dblat1 cblat1 zblat1
ifndef CROSS
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1
endif
ifdef SMP
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./sblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./dblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./cblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./zblat1
endif
else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./sblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./dblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./cblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./zblat1
endif
endif
endif
endif

#level2: sblat2 dblat2 cblat2 zblat2
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1)
level2: sblat2 dblat2 cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1)
level2: dblat2 cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1)
level2: sblat2 cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1)
level2: cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x)
level2: cblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1)
level2: zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1)
level2: sblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1)
level2: sblat2 dblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx)
level2: sblat2 dblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx)
level2: sblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx)
level2: dblat2
endif

level2 : sblat2 dblat2 cblat2 zblat2
ifndef CROSS
rm -f ?BLAT2.SUMM
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
endif
ifdef SMP
rm -f ?BLAT2.SUMM
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
endif
else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./cblat2 < ./cblat2.dat
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
endif
endif
endif
endif

ifeq ($(BUILD_HALF),1)
level3 : test_shgemm sblat3 dblat3 cblat3 zblat3
else
level3 : sblat3 dblat3 cblat3 zblat3
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1)
level3: sblat3 dblat3 cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1)
level3: dblat3 cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1)
level3: sblat3 cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1)
level3: cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x)
level3: cblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1)
level3: zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1)
level3: sblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1)
level3: sblat3 dblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx)
level3: sblat3 dblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx)
level3: sblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx)
level3: dblat3
endif



#ifeq ($(BUILD_HALF),1)
#level3 : test_shgemm sblat3 dblat3 cblat3 zblat3
#else
#level3 : sblat3 dblat3 cblat3 zblat3
#endif

ifndef CROSS
rm -f ?BLAT3.SUMM
ifeq ($(BUILD_HALF),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
endif
ifdef SMP
rm -f ?BLAT3.SUMM
ifeq ($(USE_OPENMP), 1)
@@ -90,30 +249,46 @@ ifeq ($(BUILD_HALF),1)
OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
endif
else
ifeq ($(BUILD_HALF),1)
OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./cblat3 < ./cblat3.dat
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
endif
endif
endif
endif


level3_3m : zblat3_3m cblat3_3m
@@ -151,56 +326,71 @@ endif
endif
endif

ifeq ($(BUILD_SINGLE),1)
sblat1 : sblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat1 sblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif

ifeq ($(BUILD_DOUBLE),1)
dblat1 : dblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat1 dblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
else
dblat2:
dblat3:
endif


qblat1 : qblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o qblat1 qblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

ifeq ($(BUILD_COMPLEX),1)
cblat1 : cblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat1 cblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

cblat2 : cblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat2 cblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif

ifeq ($(BUILD_COMPLEX16),1)
zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

zblat2 : zblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat2 zblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif

ifeq ($(BUILD_HALF),1)
test_shgemm : compare_sgemm_shgemm.c ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif

dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)

ifeq ($(BUILD_COMPLEX),1)
cblat3_3m : cblat3_3m.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat3_3m cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif

ifeq ($(BUILD_COMPLEX16),1)
zblat3_3m : zblat3_3m.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat3_3m zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif





+ 2
- 15
utest/test_dsdot.c View File

@@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/

#include "openblas_utest.h"
#if defined(BUILD_SINGLE) && defined(BUILD_DOUBLE)
CTEST(dsdot,dsdot_n_1)
{
float x= 0.172555164F;
@@ -47,17 +47,4 @@ CTEST(dsdot,dsdot_n_1)
ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS);

}

CTEST(dsdot,dsdot_n_2)
{
float x[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F};
float y[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F};
blasint incx=1;
blasint incy=1;
blasint n=8;

double res1=0.0f, res2= 2.0400000444054616;

res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy);
ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS);
}
#endif

+ 6
- 0
utest/test_fork.c View File

@@ -48,6 +48,7 @@ void* xmalloc(size_t n)
}
}

#ifdef BUILD_DOUBLE
void check_dgemm(double *a, double *b, double *result, double *expected, blasint n)
{
char trans1 = 'T';
@@ -59,9 +60,13 @@ void check_dgemm(double *a, double *b, double *result, double *expected, blasint
ASSERT_DBL_NEAR_TOL(expected[i], result[i], DOUBLE_EPS);
}
}
#endif

CTEST(fork, safety)
{
#ifndef BUILD_DOUBLE
exit(0);
#else
blasint n = 1000;
int i;

@@ -124,4 +129,5 @@ CTEST(fork, safety)
ASSERT_EQUAL(wait_pid, fork_pid);
ASSERT_EQUAL(0, WEXITSTATUS (child_status));
}
#endif
}

+ 8
- 1
utest/test_potrs.c View File

@@ -529,16 +529,20 @@ CTEST(potrf, smoketest_trivial){
for (j = 0; j < n; ++j) {
double err;

#ifdef BUILD_SINGLE
err = fabs(A1s[i+n*j] - Bs[i+n*j]);
if (err > 1e-5) {
CTEST_ERR("%s:%d %c s(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
}

#endif
#ifdef BUILD_DOUBLE
err = fabs(A1d[i+n*j] - Bd[i+n*j]);
if (err > 1e-12) {
CTEST_ERR("%s:%d %c d(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
}
#endif

#ifdef BUILD_COMPLEX
#ifdef OPENBLAS_COMPLEX_C99
err = cabsf(A1c[i+n*j] - Bc[i+n*j]);
#else
@@ -548,7 +552,9 @@ CTEST(potrf, smoketest_trivial){
if (err > 1e-5) {
CTEST_ERR("%s:%d %c c(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
}
#endif

#ifdef BUILD_COMPLEX16
#ifdef OPENBLAS_COMPLEX_C99
err = cabs(A1z[i+n*j] - Bz[i+n*j]);
#else
@@ -558,6 +564,7 @@ CTEST(potrf, smoketest_trivial){
if (err > 1e-12) {
CTEST_ERR("%s:%d %c z(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
}
#endif
}
}
}


Loading…
Cancel
Save