@@ -178,4 +178,4 @@ In chronological order: | |||
* [2019-11-06] optimize AVX512 SGEMM | |||
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels | |||
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM | |||
* [2019-12-27] AVX2 CGEMM3M kernel | |||
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels |
@@ -247,21 +247,21 @@ prof_lapack : lapack_prebuild | |||
lapack_prebuild : | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
@@ -319,7 +319,7 @@ lapack-test : | |||
ifneq ($(CROSS), 1) | |||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \ | |||
./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING) | |||
endif | |||
lapack-runtest: | |||
@@ -25,6 +25,8 @@ else ifeq ($(ARCH), i386) | |||
override ARCH=x86 | |||
else ifeq ($(ARCH), aarch64) | |||
override ARCH=arm64 | |||
else ifeq ($(ARCH), zarch) | |||
override ARCH=zarch | |||
endif | |||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib | |||
@@ -558,6 +560,11 @@ DYNAMIC_CORE += THUNDERX2T99 | |||
DYNAMIC_CORE += TSV110 | |||
endif | |||
ifeq ($(ARCH), zarch) | |||
DYNAMIC_CORE = Z13 | |||
DYNAMIC_CORE += Z14 | |||
endif | |||
ifeq ($(ARCH), power) | |||
DYNAMIC_CORE = POWER6 | |||
DYNAMIC_CORE += POWER8 | |||
@@ -115,7 +115,9 @@ set(SLASRC | |||
stplqt.f stplqt2.f stpmlqt.f | |||
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f | |||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f | |||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f) | |||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f | |||
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f | |||
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f ) | |||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f | |||
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f | |||
@@ -210,7 +212,9 @@ set(CLASRC | |||
ctplqt.f ctplqt2.f ctpmlqt.f | |||
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f | |||
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f | |||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f) | |||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f | |||
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f | |||
cungtsqr.f cunhr_col.f ) | |||
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f | |||
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f | |||
@@ -299,7 +303,9 @@ set(DLASRC | |||
dtplqt.f dtplqt2.f dtpmlqt.f | |||
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f | |||
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f | |||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f) | |||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f | |||
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f | |||
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f ) | |||
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f | |||
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f | |||
@@ -398,7 +404,9 @@ set(ZLASRC | |||
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f | |||
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f | |||
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f | |||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f) | |||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f | |||
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f | |||
zungtsqr.f zunhr_col.f) | |||
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f | |||
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f | |||
@@ -715,6 +715,8 @@ set(DSRC | |||
lapacke_dgesv_work.c | |||
lapacke_dgesvd.c | |||
lapacke_dgesvd_work.c | |||
lapacke_dgesvdq.c | |||
lapacke_dgesvdq_work.c | |||
lapacke_dgesvdx.c | |||
lapacke_dgesvdx_work.c | |||
lapacke_dgesvj.c | |||
@@ -1287,6 +1289,8 @@ set(SSRC | |||
lapacke_sgesv_work.c | |||
lapacke_sgesvd.c | |||
lapacke_sgesvd_work.c | |||
lapacke_sgesvdq.c | |||
lapacke_sgesvdq_work.c | |||
lapacke_sgesvdx.c | |||
lapacke_sgesvdx_work.c | |||
lapacke_sgesvj.c | |||
@@ -1853,6 +1857,8 @@ set(ZSRC | |||
lapacke_zgesv_work.c | |||
lapacke_zgesvd.c | |||
lapacke_zgesvd_work.c | |||
lapacke_zgesvdq.c | |||
lapacke_zgesvdq_work.c | |||
lapacke_zgesvdx.c | |||
lapacke_zgesvdx_work.c | |||
lapacke_zgesvj.c | |||
@@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES. | |||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | |||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | |||
16.0 THRESHOLD VALUE OF TEST RATIO | |||
7 NUMBER OF VALUES OF N | |||
6 NUMBER OF VALUES OF N | |||
1 2 3 5 7 9 35 VALUES OF N | |||
3 NUMBER OF VALUES OF ALPHA | |||
0.0 1.0 0.7 VALUES OF ALPHA | |||
@@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES. | |||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | |||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | |||
16.0 THRESHOLD VALUE OF TEST RATIO | |||
7 NUMBER OF VALUES OF N | |||
6 NUMBER OF VALUES OF N | |||
0 1 2 3 5 9 35 VALUES OF N | |||
3 NUMBER OF VALUES OF ALPHA | |||
0.0 1.0 0.7 VALUES OF ALPHA | |||
@@ -21,9 +21,13 @@ else | |||
ifeq ($(ARCH),power) | |||
COMMONOBJS += dynamic_power.$(SUFFIX) | |||
else | |||
ifeq ($(ARCH),zarch) | |||
COMMONOBJS += dynamic_zarch.$(SUFFIX) | |||
else | |||
COMMONOBJS += dynamic.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
else | |||
COMMONOBJS += parameter.$(SUFFIX) | |||
endif | |||
@@ -85,9 +89,13 @@ else | |||
ifeq ($(ARCH),power) | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX) | |||
else | |||
ifeq ($(ARCH),zarch) | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX) | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | |||
endif | |||
endif | |||
endif | |||
else | |||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | |||
endif | |||
@@ -0,0 +1,131 @@ | |||
#include "common.h" | |||
extern gotoblas_t gotoblas_Z13; | |||
extern gotoblas_t gotoblas_Z14; | |||
extern gotoblas_t gotoblas_Z15; | |||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
//extern gotoblas_t gotoblas_Z14; | |||
//#endif | |||
#define NUM_CORETYPES 5 | |||
extern void openblas_warning(int verbose, const char* msg); | |||
static char* corename[] = { | |||
"unknown", | |||
"Z13", | |||
"Z14", | |||
"Z15", | |||
"ZARCH_GENERIC", | |||
}; | |||
char* gotoblas_corename(void) { | |||
if (gotoblas == &gotoblas_Z13) return corename[1]; | |||
if (gotoblas == &gotoblas_Z14) return corename[2]; | |||
if (gotoblas == &gotoblas_Z15) return corename[3]; | |||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
// if (gotoblas == &gotoblas_POWER9) return corename[3]; | |||
//#endif | |||
return corename[0]; // try generic? | |||
} | |||
// __builtin_cpu_is is not supported by zarch | |||
static gotolabs_t* get_coretype(void) { | |||
FILE* infile; | |||
char buffer[512], * p; | |||
p = (char*)NULL; | |||
infile = fopen("/proc/sysinfo", "r"); | |||
while (fgets(buffer, sizeof(buffer), infile)) { | |||
if (!strncmp("Type", buffer, 4)) { | |||
p = strchr(buffer, ':') + 2; | |||
#if 0 | |||
fprintf(stderr, "%s\n", p); | |||
#endif | |||
break; | |||
} | |||
} | |||
fclose(infile); | |||
if (strstr(p, "2964")) return &gotoblas_Z13; | |||
if (strstr(p, "2965")) return &gotoblas_Z13; | |||
if (strstr(p, "3906")) return &gotoblas_Z14; | |||
if (strstr(p, "3907")) return &gotoblas_Z14; | |||
if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14 | |||
if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14 | |||
return NULL; // should be ZARCH_GENERIC | |||
} | |||
static gotoblas_t* force_coretype(char* coretype) { | |||
int i; | |||
int found = -1; | |||
char message[128]; | |||
for (i = 0; i < NUM_CORETYPES; i++) | |||
{ | |||
if (!strncasecmp(coretype, corename[i], 20)) | |||
{ | |||
found = i; | |||
break; | |||
} | |||
} | |||
switch (found) | |||
{ | |||
case 1: return (&gotoblas_Z13); | |||
case 2: return (&gotoblas_Z14); | |||
case 3: return (&gotoblas_Z15); | |||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000) | |||
// case 3: return (&gotoblas_POWER9); | |||
//#endif | |||
default: return NULL; | |||
} | |||
snprintf(message, 128, "Core not found: %s\n", coretype); | |||
openblas_warning(1, message); | |||
} | |||
void gotoblas_dynamic_init(void) { | |||
char coremsg[128]; | |||
char coren[22]; | |||
char* p; | |||
if (gotoblas) return; | |||
p = getenv("OPENBLAS_CORETYPE"); | |||
if (p) | |||
{ | |||
gotoblas = force_coretype(p); | |||
} | |||
else | |||
{ | |||
gotoblas = get_coretype(); | |||
} | |||
if (gotoblas == NULL) | |||
{ | |||
snprintf(coremsg, 128, "Falling back to Z14 core\n"); | |||
openblas_warning(1, coremsg); | |||
gotoblas = &gotoblas_Z14; | |||
} | |||
if (gotoblas && gotoblas->init) { | |||
strncpy(coren, gotoblas_corename(), 20); | |||
sprintf(coremsg, "Core: %s\n", coren); | |||
openblas_warning(2, coremsg); | |||
gotoblas->init(); | |||
} | |||
else { | |||
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||
exit(1); | |||
} | |||
} | |||
void gotoblas_dynamic_quit(void) { | |||
gotoblas = NULL; | |||
} |
@@ -694,7 +694,19 @@ | |||
# functions added for lapack-3.8.0 | |||
ilaenv2stage | |||
ilaenv2stage, | |||
# functions added for lapack-3.9.0 | |||
cgesvdq, | |||
cungtsqr, | |||
dcombssq, | |||
dgesvdq, | |||
dorgtsqr, | |||
scombssq, | |||
sgesvdq, | |||
sorgtsqr, | |||
zgesvdq, | |||
zungtsqr | |||
); | |||
@lapack_extendedprecision_objs = ( | |||
@@ -3347,6 +3359,15 @@ | |||
LAPACKE_zsytrf_aa_2stage_work, | |||
LAPACKE_zsytrs_aa_2stage, | |||
LAPACKE_zsytrs_aa_2stage_work, | |||
# new functions from 3.9.0 | |||
LAPACKE_dgesvdq, | |||
LAPACKE_dgesvdq_work, | |||
LAPACKE_sgesvdq, | |||
LAPACKE_sgesvdq_work, | |||
LAPACKE_zgesvdq, | |||
LAPACKE_zgesvdq_work | |||
); | |||
#These function may need 2 underscores. | |||
@@ -3419,7 +3440,13 @@ | |||
dsytrf_aa_2stage, dsytrs_aa_2stage, | |||
zhesv_aa_2stage, zhetrf_aa_2stage, | |||
zhetrs_aa_2stage, zsysv_aa_2stage, | |||
zsytrf_aa_2stage, zsytrs_aa_2stage | |||
zsytrf_aa_2stage, zsytrs_aa_2stage, | |||
# 3.9.0 | |||
claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col, | |||
dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col, | |||
slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col, | |||
zlaunhr_col_getrfnp, zlaunhr_col_getrfnp2, zunhr_col | |||
); | |||
@@ -103,26 +103,34 @@ ZDOTKERNEL = zdot.S | |||
DSDOTKERNEL = dot.S | |||
DGEMM_BETA = dgemm_beta.S | |||
SGEMM_BETA = sgemm_beta.S | |||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
ifeq ($(SGEMM_UNROLL_N), 4) | |||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
ifeq ($(SGEMM_UNROLL_M), 16) | |||
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S | |||
else | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
endif | |||
ifeq ($(SGEMM_UNROLL_M), 4) | |||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S | |||
else | |||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
endif | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(SGEMM_UNROLL_N), 16) | |||
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||
else | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
endif | |||
ifeq ($(SGEMM_UNROLL_N), 4) | |||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
else | |||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
endif | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
@@ -109,22 +109,29 @@ ZGEMVTKERNEL = zgemv_t.S | |||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||
ifeq ($(SGEMM_UNROLL_N), 4) | |||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
ifeq ($(SGEMM_UNROLL_M), 16) | |||
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S | |||
else | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
endif | |||
ifeq ($(SGEMM_UNROLL_M), 4) | |||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S | |||
else | |||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||
endif | |||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
endif | |||
ifeq ($(SGEMM_UNROLL_N), 16) | |||
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S | |||
else | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
endif | |||
ifeq ($(SGEMM_UNROLL_N), 4) | |||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S | |||
else | |||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||
endif | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
@@ -43,7 +43,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define betaV0 v11.d[0] | |||
#define I x16 | |||
#define size 128 | |||
#define prfm_size 640 | |||
#define calc_size 128 | |||
/************************************************************************************** | |||
* Macro definitions | |||
@@ -119,27 +120,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
ldp q2, q3, [A02] | |||
ldp q4, q5, [A03] | |||
ldp q6, q7, [A04] | |||
fmul v0.2d, v0.2d, betaV0 | |||
fmul v1.2d, v1.2d, betaV0 | |||
fmul v2.2d, v2.2d, betaV0 | |||
fmul v3.2d, v3.2d, betaV0 | |||
prfm PLDL1KEEP, [A01, prfm_size] | |||
fmul v4.2d, v4.2d, betaV0 | |||
fmul v5.2d, v5.2d, betaV0 | |||
prfm PLDL1KEEP, [A03, prfm_size] | |||
fmul v6.2d, v6.2d, betaV0 | |||
fmul v7.2d, v7.2d, betaV0 | |||
st1 {v0.2d, v1.2d}, [A01] | |||
add A01, A01, size | |||
add A01, A01, calc_size | |||
st1 {v2.2d, v3.2d}, [A02] | |||
add A02, A02, size | |||
add A02, A02, calc_size | |||
st1 {v4.2d, v5.2d}, [A03] | |||
add A03, A03, size | |||
add A03, A03, calc_size | |||
st1 {v6.2d, v7.2d}, [A04] | |||
add A04, A04, size | |||
add A04, A04, calc_size | |||
subs I , I , #1 | |||
bne .Lgemm_beta_03 | |||
@@ -0,0 +1,259 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A00 PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
#define M x0 | |||
#define N x1 | |||
#define BETA s0 | |||
#define LDC x6 | |||
#define C00 x7 | |||
#define A01 x8 | |||
#define A02 x9 | |||
#define A03 x10 | |||
#define A04 x11 | |||
#define I x12 | |||
#define beta0 s11 | |||
#define betaV0 v11.s[0] | |||
#define prfm_size 640 | |||
#define calc_size 128 | |||
/************************************************************************************** | |||
* Macro definitions | |||
**************************************************************************************/ | |||
.macro SAVE_REGS | |||
add sp, sp, #-(11 * 16) | |||
stp d8, d9, [sp, #(0 * 16)] | |||
stp d10, d11, [sp, #(1 * 16)] | |||
stp d12, d13, [sp, #(2 * 16)] | |||
stp d14, d15, [sp, #(3 * 16)] | |||
stp d16, d17, [sp, #(4 * 16)] | |||
stp x18, x19, [sp, #(5 * 16)] | |||
stp x20, x21, [sp, #(6 * 16)] | |||
stp x22, x23, [sp, #(7 * 16)] | |||
stp x24, x25, [sp, #(8 * 16)] | |||
stp x26, x27, [sp, #(9 * 16)] | |||
str x28, [sp, #(10 * 16)] | |||
.endm | |||
.macro RESTORE_REGS | |||
ldp d8, d9, [sp, #(0 * 16)] | |||
ldp d10, d11, [sp, #(1 * 16)] | |||
ldp d12, d13, [sp, #(2 * 16)] | |||
ldp d14, d15, [sp, #(3 * 16)] | |||
ldp d16, d17, [sp, #(4 * 16)] | |||
ldp x18, x19, [sp, #(5 * 16)] | |||
ldp x20, x21, [sp, #(6 * 16)] | |||
ldp x22, x23, [sp, #(7 * 16)] | |||
ldp x24, x25, [sp, #(8 * 16)] | |||
ldp x26, x27, [sp, #(9 * 16)] | |||
ldr x28, [sp, #(10 * 16)] | |||
add sp, sp, #(11*16) | |||
.endm | |||
.macro INIT_ZERO | |||
fmul v0.4s, v0.4s, betaV0 | |||
fmul v1.4s, v1.4s, betaV0 | |||
fmul v2.4s, v2.4s, betaV0 | |||
fmul v3.4s, v3.4s, betaV0 | |||
fmul v4.4s, v4.4s, betaV0 | |||
fmul v5.4s, v5.4s, betaV0 | |||
fmul v6.4s, v6.4s, betaV0 | |||
fmul v7.4s, v7.4s, betaV0 | |||
.endm | |||
/************************************************************************************** | |||
* End of macro definitions | |||
**************************************************************************************/ | |||
PROLOGUE | |||
.align 5 | |||
ldr LDC, [sp] | |||
SAVE_REGS | |||
.Lgemm_beta_BEGIN: | |||
fmov beta0, BETA | |||
cmp N, #0 | |||
ble .Lgemm_beta_L999 | |||
fcmp BETA, #0.0 | |||
beq .Lgemm_beta_zero_01 | |||
.Lgemm_beta_01: | |||
lsl LDC, LDC, #2 | |||
.align 5 | |||
.Lgemm_beta_02: | |||
mov A01, C00 | |||
add C00, C00, LDC | |||
asr I, M, #5 | |||
cmp I, #0 | |||
ble .Lgemm_beta_04 | |||
add A02, A01, #32 | |||
add A03, A02, #32 | |||
add A04, A03, #32 | |||
.align 5 | |||
.Lgemm_beta_03: | |||
prfm PLDL1KEEP, [A01, prfm_size] | |||
ldp q0, q1, [A01] | |||
ldp q2, q3, [A02] | |||
ldp q4, q5, [A03] | |||
ldp q6, q7, [A04] | |||
fmul v0.4s, v0.4s, betaV0 | |||
fmul v1.4s, v1.4s, betaV0 | |||
fmul v2.4s, v2.4s, betaV0 | |||
fmul v3.4s, v3.4s, betaV0 | |||
fmul v4.4s, v4.4s, betaV0 | |||
fmul v5.4s, v5.4s, betaV0 | |||
fmul v6.4s, v6.4s, betaV0 | |||
fmul v7.4s, v7.4s, betaV0 | |||
prfm PLDL1KEEP, [A01, prfm_size + 64] | |||
st1 {v0.4s, v1.4s}, [A01] | |||
add A01, A01, calc_size | |||
st1 {v2.4s, v3.4s}, [A02] | |||
add A02, A02, calc_size | |||
st1 {v4.4s, v5.4s}, [A03] | |||
add A03, A03, calc_size | |||
st1 {v6.4s, v7.4s}, [A04] | |||
add A04, A04, calc_size | |||
subs I , I , #1 | |||
bne .Lgemm_beta_03 | |||
.align 5 | |||
.Lgemm_beta_04: | |||
and I, M , #31 | |||
cmp I, #0 | |||
ble .Lgemm_beta_06 | |||
.align 5 | |||
.Lgemm_beta_05: | |||
ldr s12, [A01] | |||
fmul s12, s12, beta0 | |||
str s12, [A01] | |||
add A01, A01, #4 | |||
subs I , I , #1 | |||
bne .Lgemm_beta_05 | |||
.align 5 | |||
.Lgemm_beta_06: | |||
subs N , N, #1 // N-- | |||
bne .Lgemm_beta_02 | |||
.align 5 | |||
.Lgemm_beta_L999: | |||
mov x0, #0 | |||
RESTORE_REGS | |||
ret | |||
.align 5 | |||
.Lgemm_beta_zero_01: | |||
INIT_ZERO | |||
lsl LDC, LDC, #2 | |||
.align 5 | |||
.Lgemm_beta_zero_02: | |||
mov A01, C00 | |||
add C00, C00, LDC | |||
asr I, M, #5 | |||
cmp I, #0 | |||
ble .Lgemm_beta_zero_04 | |||
add A02, A01, #32 | |||
add A03, A02, #32 | |||
add A04, A03, #32 | |||
.align 5 | |||
.Lgemm_beta_zero_03: | |||
st1 {v0.4s, v1.4s}, [A01] | |||
add A01, A01, calc_size | |||
st1 {v2.4s, v3.4s}, [A02] | |||
add A02, A02, calc_size | |||
st1 {v4.4s, v5.4s}, [A03] | |||
add A03, A03, calc_size | |||
st1 {v6.4s, v7.4s}, [A04] | |||
add A04, A04, calc_size | |||
subs I, I, #1 | |||
bne .Lgemm_beta_zero_03 | |||
.align 5 | |||
.Lgemm_beta_zero_04: | |||
and I, M, #31 | |||
cmp I, #0 | |||
ble .Lgemm_beta_zero_06 | |||
.align 5 | |||
.Lgemm_beta_zero_05: | |||
str beta0, [A01] | |||
add A01, A01, #4 | |||
subs I, I, #1 | |||
bne .Lgemm_beta_zero_05 | |||
.align 5 | |||
.Lgemm_beta_zero_06: | |||
subs N, N, #1 | |||
bne .Lgemm_beta_zero_02 | |||
.align 5 | |||
.Lgemm_beta_zero_L999: | |||
mov x0, #0 | |||
RESTORE_REGS | |||
ret | |||
EPILOGUE |
@@ -0,0 +1,824 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2019, The OpenBLAS Project | |||
All rights reserved. | |||
*****************************************************************************/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
#define M x0 | |||
#define N x1 | |||
#define A x2 | |||
#define LDA x3 | |||
#define B x4 | |||
#define M8 x5 | |||
#define A01 x6 | |||
#define A02 x7 | |||
#define A03 x8 | |||
#define A04 x9 | |||
#define A05 x10 | |||
#define A06 x11 | |||
#define A07 x12 | |||
#define A08 x13 | |||
#define B01 x14 | |||
#define B02 x15 | |||
#define B03 x16 | |||
#define B04 x17 | |||
#define B00 x22 | |||
#define I x18 | |||
#define J x19 | |||
#define TEMP1 x20 | |||
#define A_PREFETCH 256 | |||
/************************************************************************************** | |||
* Macro definitions | |||
**************************************************************************************/ | |||
.macro SAVE_REGS | |||
add sp, sp, #-(11 * 16) | |||
stp d8, d9, [sp, #(0 * 16)] | |||
stp d10, d11, [sp, #(1 * 16)] | |||
stp d12, d13, [sp, #(2 * 16)] | |||
stp d14, d15, [sp, #(3 * 16)] | |||
stp d16, d17, [sp, #(4 * 16)] | |||
stp x18, x19, [sp, #(5 * 16)] | |||
stp x20, x21, [sp, #(6 * 16)] | |||
stp x22, x23, [sp, #(7 * 16)] | |||
stp x24, x25, [sp, #(8 * 16)] | |||
stp x26, x27, [sp, #(9 * 16)] | |||
str x28, [sp, #(10 * 16)] | |||
.endm | |||
.macro RESTORE_REGS | |||
ldp d8, d9, [sp, #(0 * 16)] | |||
ldp d10, d11, [sp, #(1 * 16)] | |||
ldp d12, d13, [sp, #(2 * 16)] | |||
ldp d14, d15, [sp, #(3 * 16)] | |||
ldp d16, d17, [sp, #(4 * 16)] | |||
ldp x18, x19, [sp, #(5 * 16)] | |||
ldp x20, x21, [sp, #(6 * 16)] | |||
ldp x22, x23, [sp, #(7 * 16)] | |||
ldp x24, x25, [sp, #(8 * 16)] | |||
ldp x26, x27, [sp, #(9 * 16)] | |||
ldr x28, [sp, #(10 * 16)] | |||
add sp, sp, #(11*16) | |||
.endm | |||
/*************************************************************************************************************************/ | |||
.macro COPY16x8 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
//prfm PSTL1KEEP, [B00, M8] | |||
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
add A01, A01, #64 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
add TEMP1, B00, #64 | |||
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||
add A02, A02, #64 | |||
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03] | |||
add A03, A03, #64 | |||
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04] | |||
add A04, A04, #64 | |||
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [A05] | |||
add A05, A05, #64 | |||
st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [A06] | |||
add A06, A06, #64 | |||
st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [A07] | |||
add A07, A07, #64 | |||
st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [A08] | |||
add A08, A08, #64 | |||
st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
add B00, B00, M8 | |||
.endm | |||
.macro COPY8x8 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
ldp q0, q1, [A01] | |||
ldp q2, q3, [A02] | |||
add A01, A01, #32 | |||
add A02, A02, #32 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||
add B01, B01, #64 | |||
ldp q4, q5, [A03] | |||
ldp q6, q7, [A04] | |||
add A03, A03, #32 | |||
add A04, A04, #32 | |||
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01] | |||
add B01, B01, #64 | |||
ldp q8, q9, [A05] | |||
ldp q10, q11, [A06] | |||
add A05, A05, #32 | |||
add A06, A06, #32 | |||
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [B01] | |||
add B01, B01, #64 | |||
ldp q12, q13, [A07] | |||
ldp q14, q15, [A08] | |||
add A07, A07, #32 | |||
add A08, A08, #32 | |||
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [B01] | |||
add B01, B01, #64 | |||
.endm | |||
.macro COPY4x8 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
ldr q0, [A01] | |||
ldr q1, [A02] | |||
ldr q2, [A03] | |||
ldr q3, [A04] | |||
add A01, A01, #16 | |||
add A02, A02, #16 | |||
add A03, A03, #16 | |||
add A04, A04, #16 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02] | |||
add B02, B02, #64 | |||
ldr q4, [A05] | |||
ldr q5, [A06] | |||
ldr q6, [A07] | |||
ldr q7, [A08] | |||
add A05, A05, #16 | |||
add A06, A06, #16 | |||
add A07, A07, #16 | |||
add A08, A08, #16 | |||
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B02] | |||
add B02, B02, #64 | |||
.endm | |||
.macro COPY2x8 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
ldr d0, [A01] | |||
ldr d1, [A02] | |||
ldr d2, [A03] | |||
ldr d3, [A04] | |||
add A01, A01, #8 | |||
add A02, A02, #8 | |||
add A03, A03, #8 | |||
add A04, A04, #8 | |||
stp d0, d1, [B03] | |||
add B03, B03, #16 | |||
stp d2, d3, [B03] | |||
add B03, B03, #16 | |||
ldr d4, [A05] | |||
ldr d5, [A06] | |||
ldr d6, [A07] | |||
ldr d7, [A08] | |||
add A05, A05, #8 | |||
add A06, A06, #8 | |||
add A07, A07, #8 | |||
add A08, A08, #8 | |||
stp d4, d5, [B03] | |||
add B03, B03, #16 | |||
stp d6, d7, [B03] | |||
add B03, B03, #16 | |||
.endm | |||
.macro COPY1x8 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A05, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A06, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A07, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A08, #A_PREFETCH] | |||
ldr s0, [A01] | |||
ldr s1, [A02] | |||
ldr s2, [A03] | |||
ldr s3, [A04] | |||
add A01, A01, #4 | |||
add A02, A02, #4 | |||
add A03, A03, #4 | |||
add A04, A04, #4 | |||
stp s0, s1, [B04] | |||
add B04, B04, #8 | |||
stp s2, s3, [B04] | |||
add B04, B04, #8 | |||
ldr s4, [A05] | |||
ldr s5, [A06] | |||
ldr s6, [A07] | |||
ldr s7, [A08] | |||
ldr d4, [A05], #8 | |||
ldr d5, [A06], #8 | |||
ldr d6, [A07], #8 | |||
ldr d7, [A08], #8 | |||
stp s4, s5, [B04] | |||
add B04, B04, #8 | |||
stp s6, s7, [B04] | |||
add B04, B04, #8 | |||
.endm | |||
/*************************************************************************************************************************/ | |||
.macro COPY16x4 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
add A01, A01, #64 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
add TEMP1, B00, #64 | |||
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||
add A02, A02, #64 | |||
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03] | |||
add A03, A03, #64 | |||
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1] | |||
add TEMP1, TEMP1, #64 | |||
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04] | |||
add A04, A04, #64 | |||
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1] | |||
add B00, B00, M8 | |||
.endm | |||
.macro COPY8x4 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
ldp q0, q1, [A01] | |||
ldp q2, q3, [A02] | |||
add A01, A01, #32 | |||
add A02, A02, #32 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||
add B01, B01, #64 | |||
ldp q4, q5, [A03] | |||
ldp q6, q7, [A04] | |||
add A03, A03, #32 | |||
add A04, A04, #32 | |||
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01] | |||
add B01, B01, #64 | |||
.endm | |||
.macro COPY4x4 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
ldr q0, [A01] | |||
ldr q1, [A02] | |||
ldr q2, [A03] | |||
ldr q3, [A04] | |||
add A01, A01, #16 | |||
add A02, A02, #16 | |||
add A03, A03, #16 | |||
add A04, A04, #16 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02] | |||
add B02, B02, #64 | |||
.endm | |||
.macro COPY2x4 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
ldr d0, [A01] | |||
ldr d1, [A02] | |||
ldr d2, [A03] | |||
ldr d3, [A04] | |||
add A01, A01, #8 | |||
add A02, A02, #8 | |||
add A03, A03, #8 | |||
add A04, A04, #8 | |||
stp d0, d1, [B03] | |||
add B03, B03, #16 | |||
stp d2, d3, [B03] | |||
add B03, B03, #16 | |||
.endm | |||
.macro COPY1x4 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A03, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A04, #A_PREFETCH] | |||
ldr s0, [A01] | |||
ldr s1, [A02] | |||
ldr s2, [A03] | |||
ldr s3, [A04] | |||
add A01, A01, #4 | |||
add A02, A02, #4 | |||
add A03, A03, #4 | |||
add A04, A04, #4 | |||
stp s0, s1, [B04] | |||
add B04, B04, #8 | |||
stp s2, s3, [B04] | |||
add B04, B04, #8 | |||
.endm | |||
/*************************************************************************************************************************/ | |||
.macro COPY16x2 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
add A01, A01, #64 | |||
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02] | |||
add A02, A02, #64 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
add TEMP1, B00, #64 | |||
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1] | |||
add B00, B00, M8 | |||
.endm | |||
.macro COPY8x2 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
ld1 {v0.4s, v1.4s}, [A01] | |||
ld1 {v2.4s, v3.4s}, [A02] | |||
add A01, A01, #32 | |||
add A02, A02, #32 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01] | |||
add B01, B01, #64 | |||
.endm | |||
.macro COPY4x2 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
ldr q0, [A01] | |||
ldr q1, [A02] | |||
add A01, A01, #16 | |||
add A02, A02, #16 | |||
stp q0, q1, [B02] | |||
add B02, B02, #32 | |||
.endm | |||
.macro COPY2x2 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
ldr d0, [A01] | |||
ldr d1, [A02] | |||
add A01, A01, #8 | |||
add A02, A02, #8 | |||
stp d0, d1, [B03] | |||
add B03, B03, #16 | |||
.endm | |||
.macro COPY1x2 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
//prfm PLDL1KEEP, [A02, #A_PREFETCH] | |||
ldr s0, [A01] | |||
ldr s1, [A02] | |||
add A01, A01, #4 | |||
add A02, A02, #4 | |||
stp s0, s1, [B04] | |||
add B04, B04, #8 | |||
.endm | |||
/*************************************************************************************************************************/ | |||
.macro COPY16x1 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01] | |||
add A01, A01, #64 | |||
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00] | |||
add B00, B00, M8 | |||
.endm | |||
.macro COPY8x1 | |||
prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
ldp q0, q1, [A01] | |||
add A01, A01, #32 | |||
stp q0, q1, [B01] | |||
add B01, B01, #32 | |||
.endm | |||
.macro COPY4x1 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
ldr q0, [A01] | |||
add A01, A01, #16 | |||
str q0, [B02] | |||
add B02, B02, #16 | |||
.endm | |||
.macro COPY2x1 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
ldr d0, [A01] | |||
add A01, A01, #8 | |||
str d0, [B03] | |||
add B03, B03, #8 | |||
.endm | |||
.macro COPY1x1 | |||
//prfm PLDL1KEEP, [A01, #A_PREFETCH] | |||
ldr s0, [A01] | |||
add A01, A01, #4 | |||
str s0, [B04] | |||
add B04, B04, #4 | |||
.endm | |||
/************************************************************************************** | |||
* End of macro definitions | |||
**************************************************************************************/ | |||
PROLOGUE | |||
.align 5 | |||
SAVE_REGS | |||
lsl LDA, LDA, #2 // LDA = LDA * SIZE | |||
lsl TEMP1, M, #2 // TEMP1 = M * SIZE | |||
and B01 , N , #-16 | |||
and B02 , N , #-8 | |||
and B03 , N , #-4 | |||
and B04 , N , #-2 | |||
mul B01, B01, TEMP1 | |||
mul B02, B02, TEMP1 | |||
mul B03, B03, TEMP1 | |||
mul B04, B04, TEMP1 | |||
add B01 , B01, B | |||
add B02 , B02, B | |||
add B03 , B03, B | |||
add B04 , B04, B | |||
lsl M8, M, #6 // M8 = M * 16 * SIZE | |||
.Lsgemm_tcopy_L8_BEGIN: | |||
asr J, M, #3 // J = M / 8 | |||
cmp J, #0 | |||
ble .Lsgemm_tcopy_L4_BEGIN | |||
.align 5 | |||
.Lsgemm_tcopy_L8_M16_BEGIN: | |||
mov A01, A | |||
add A02, A01, LDA | |||
add A03, A02, LDA | |||
add A04, A03, LDA | |||
add A05, A04, LDA | |||
add A06, A05, LDA | |||
add A07, A06, LDA | |||
add A08, A07, LDA | |||
add A, A08, LDA | |||
mov B00, B | |||
add B, B00, #512 // B = B + 8 * 16 * SIZE | |||
asr I, N, #4 // I = N / 16 | |||
cmp I, #0 | |||
ble .Lsgemm_tcopy_L8_M16_40 | |||
.align 5 | |||
.Lsgemm_tcopy_L8_M16_20: | |||
COPY16x8 | |||
subs I , I , #1 | |||
bne .Lsgemm_tcopy_L8_M16_20 | |||
.Lsgemm_tcopy_L8_M16_40: | |||
tst N , #8 | |||
ble .Lsgemm_tcopy_L8_M16_60 | |||
COPY8x8 | |||
.Lsgemm_tcopy_L8_M16_60: | |||
tst N , #4 | |||
ble .Lsgemm_tcopy_L8_M16_80 | |||
COPY4x8 | |||
.Lsgemm_tcopy_L8_M16_80: | |||
tst N , #2 | |||
ble .Lsgemm_tcopy_L8_M16_100 | |||
COPY2x8 | |||
.Lsgemm_tcopy_L8_M16_100: | |||
tst N, #1 | |||
ble .Lsgemm_tcopy_L8_M16_END | |||
COPY1x8 | |||
.Lsgemm_tcopy_L8_M16_END: | |||
subs J , J, #1 // j-- | |||
bne .Lsgemm_tcopy_L8_M16_BEGIN | |||
/*********************************************************************************************/ | |||
.Lsgemm_tcopy_L4_BEGIN: | |||
tst M, #7 | |||
ble .Lsgemm_tcopy_L999 | |||
tst M, #4 | |||
ble .Lsgemm_tcopy_L2_BEGIN | |||
.Lsgemm_tcopy_L4_M16_BEGIN: | |||
mov A01, A | |||
add A02, A01, LDA | |||
add A03, A02, LDA | |||
add A04, A03, LDA | |||
add A, A04, LDA | |||
mov B00, B | |||
add B, B00, #256 // B = B + 4 * 16 * SIZE | |||
asr I, N, #4 // I = N / 16 | |||
cmp I, #0 | |||
ble .Lsgemm_tcopy_L4_M16_40 | |||
.align 5 | |||
.Lsgemm_tcopy_L4_M16_20: | |||
COPY16x4 | |||
subs I , I , #1 | |||
bne .Lsgemm_tcopy_L4_M16_20 | |||
.Lsgemm_tcopy_L4_M16_40: | |||
tst N , #8 | |||
ble .Lsgemm_tcopy_L4_M16_60 | |||
COPY8x4 | |||
.Lsgemm_tcopy_L4_M16_60: | |||
tst N , #4 | |||
ble .Lsgemm_tcopy_L4_M16_80 | |||
COPY4x4 | |||
.Lsgemm_tcopy_L4_M16_80: | |||
tst N , #2 | |||
ble .Lsgemm_tcopy_L4_M16_100 | |||
COPY2x4 | |||
.Lsgemm_tcopy_L4_M16_100: | |||
tst N, #1 | |||
ble .Lsgemm_tcopy_L4_M16_END | |||
COPY1x4 | |||
.Lsgemm_tcopy_L4_M16_END: | |||
/*********************************************************************************************/ | |||
.Lsgemm_tcopy_L2_BEGIN: | |||
tst M, #3 | |||
ble .Lsgemm_tcopy_L999 | |||
tst M, #2 | |||
ble .Lsgemm_tcopy_L1_BEGIN | |||
.Lsgemm_tcopy_L2_M16_BEGIN: | |||
mov A01, A | |||
add A02, A01, LDA | |||
add A, A02, LDA | |||
mov B00, B | |||
add B, B00, #128 // B = B + 2 * 16 * SIZE | |||
asr I, N, #4 // I = N / 16 | |||
cmp I, #0 | |||
ble .Lsgemm_tcopy_L2_M16_40 | |||
.align 5 | |||
.Lsgemm_tcopy_L2_M16_20: | |||
COPY16x2 | |||
subs I , I , #1 | |||
bne .Lsgemm_tcopy_L2_M16_20 | |||
.Lsgemm_tcopy_L2_M16_40: | |||
tst N , #8 | |||
ble .Lsgemm_tcopy_L2_M16_60 | |||
COPY8x2 | |||
.Lsgemm_tcopy_L2_M16_60: | |||
tst N , #4 | |||
ble .Lsgemm_tcopy_L2_M16_80 | |||
COPY4x2 | |||
.Lsgemm_tcopy_L2_M16_80: | |||
tst N , #2 | |||
ble .Lsgemm_tcopy_L2_M16_100 | |||
COPY2x2 | |||
.Lsgemm_tcopy_L2_M16_100: | |||
tst N , #1 | |||
ble .Lsgemm_tcopy_L2_M16_END | |||
COPY1x2 | |||
.Lsgemm_tcopy_L2_M16_END: | |||
/*********************************************************************************************/ | |||
.Lsgemm_tcopy_L1_BEGIN: | |||
tst M, #1 | |||
ble .Lsgemm_tcopy_L999 | |||
.Lsgemm_tcopy_L1_M16_BEGIN: | |||
mov A01, A // A01 = A | |||
mov B00, B | |||
asr I, N, #4 // I = M / 16 | |||
cmp I, #0 | |||
ble .Lsgemm_tcopy_L1_M16_40 | |||
.align 5 | |||
.Lsgemm_tcopy_L1_M16_20: | |||
COPY16x1 | |||
subs I , I , #1 | |||
bne .Lsgemm_tcopy_L1_M16_20 | |||
.Lsgemm_tcopy_L1_M16_40: | |||
tst N , #8 | |||
ble .Lsgemm_tcopy_L1_M16_60 | |||
COPY8x1 | |||
.Lsgemm_tcopy_L1_M16_60: | |||
tst N , #4 | |||
ble .Lsgemm_tcopy_L1_M16_80 | |||
COPY4x1 | |||
.Lsgemm_tcopy_L1_M16_80: | |||
tst N , #2 | |||
ble .Lsgemm_tcopy_L1_M16_100 | |||
COPY2x1 | |||
.Lsgemm_tcopy_L1_M16_100: | |||
tst N , #1 | |||
ble .Lsgemm_tcopy_L1_M16_END | |||
COPY1x1 | |||
.Lsgemm_tcopy_L1_M16_END: | |||
.Lsgemm_tcopy_L999: | |||
mov x0, #0 // set return value | |||
RESTORE_REGS | |||
ret | |||
EPILOGUE | |||
@@ -739,6 +739,26 @@ static void init_parameter(void) { | |||
} | |||
#else //POWER | |||
#if defined(ARCH_ZARCH) | |||
static void init_parameter(void) { | |||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | |||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | |||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; | |||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; | |||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | |||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | |||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; | |||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; | |||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | |||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | |||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; | |||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; | |||
} | |||
#else //ZARCH | |||
#ifdef ARCH_X86 | |||
static int get_l2_size_old(void){ | |||
int i, eax, ebx, ecx, edx, cpuid_level; | |||
@@ -1325,4 +1345,5 @@ static void init_parameter(void) { | |||
} | |||
#endif //POWER | |||
#endif //ZARCH | |||
#endif //defined(ARCH_ARM64) |
@@ -98,5 +98,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | |||
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c | |||
@@ -95,5 +95,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c | |||
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S | |||
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c | |||
@@ -0,0 +1,224 @@ | |||
/* %0 = "+r"(a_pointer), %1 = "+r"(b_pointer), %2 = "+r"(c_pointer), %3 = "+r"(ldc_in_bytes), %4 for k_count, %5 for c_store */ | |||
/* r12 = k << 5(const), r13 = k(const), r14 = b_head_pos(const), r15 = tmp */ | |||
#include "common.h" | |||
#include <stdint.h> | |||
//recommended settings: GEMM_Q=256, GEMM_P=256 | |||
/* m = 4 *//* ymm0 for alpha, ymm1-ymm3 for temporary use, ymm4-ymm15 for accumulators */ | |||
#define KERNEL_k1m4n1 \ | |||
"vmovupd (%0),%%ymm1; addq $32,%0;"\ | |||
"vbroadcastsd (%1),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,%%ymm4;"\ | |||
"addq $8,%1;" | |||
#define KERNEL_h_k1m4n2 \ | |||
"vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2; addq $32,%0;"\ | |||
"vbroadcastf128 (%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm4; vfmadd231pd %%ymm2,%%ymm3,%%ymm5;" | |||
#define KERNEL_k1m4n2 KERNEL_h_k1m4n2 "addq $16,%1;" | |||
#define KERNEL_h_k1m4n4 \ | |||
KERNEL_h_k1m4n2 "vbroadcastf128 16(%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm6; vfmadd231pd %%ymm2,%%ymm3,%%ymm7;" | |||
#define KERNEL_k1m4n4 KERNEL_h_k1m4n4 "addq $32,%1;" | |||
#define unit_kernel_k1m4n4(c1,c2,c3,c4,off1,off2,...) \ | |||
"vbroadcastf128 "#off1"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";"\ | |||
"vbroadcastf128 "#off2"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c3"; vfmadd231pd %%ymm2,%%ymm3,"#c4";" | |||
#define KERNEL_h_k1m4n8 KERNEL_h_k1m4n4 unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1) | |||
#define KERNEL_k1m4n8 KERNEL_h_k1m4n8 "addq $32,%1;" | |||
#define KERNEL_h_k1m4n12 KERNEL_h_k1m4n8 unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2) | |||
#define KERNEL_k1m4n12 KERNEL_h_k1m4n12 "addq $32,%1;" | |||
#define KERNEL_k2m4n1 KERNEL_k1m4n1 KERNEL_k1m4n1 | |||
#define KERNEL_k2m4n2 KERNEL_k1m4n2 KERNEL_k1m4n2 | |||
#define KERNEL_k2m4n4 KERNEL_k1m4n4 KERNEL_k1m4n4 | |||
#define KERNEL_k2m4n8 KERNEL_k1m4n8 KERNEL_k1m4n8 | |||
#define KERNEL_k2m4n12 \ | |||
"vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2;"\ | |||
unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,0,16,%1)\ | |||
unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1)\ | |||
unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2)\ | |||
"vmovddup 32(%0),%%ymm1; vmovddup 40(%0),%%ymm2; prefetcht0 512(%0); addq $64,%0;"\ | |||
unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,32,48,%1)\ | |||
unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,32,48,%1,%%r12,1)\ | |||
unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,32,48,%1,%%r12,2) "addq $64,%1;" | |||
#define INIT_m4n1 "vpxor %%ymm4,%%ymm4,%%ymm4;" | |||
#define INIT_m4n2 INIT_m4n1 "vpxor %%ymm5,%%ymm5,%%ymm5;" | |||
#define INIT_m4n4 INIT_m4n2 "vpxor %%ymm6,%%ymm6,%%ymm6;vpxor %%ymm7,%%ymm7,%%ymm7;" | |||
#define unit_init_m4n4(c1,c2,c3,c4) \ | |||
"vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";vpxor "#c3","#c3","#c3";vpxor "#c4","#c4","#c4";" | |||
#define INIT_m4n8 INIT_m4n4 unit_init_m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11) | |||
#define INIT_m4n12 INIT_m4n8 unit_init_m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15) | |||
#define SAVE_h_m4n1 \ | |||
"vpermpd $216,%%ymm4,%%ymm3; vunpcklpd %%ymm3,%%ymm3,%%ymm1; vunpckhpd %%ymm3,%%ymm3,%%ymm2;"\ | |||
"vfmadd213pd (%2),%%ymm0,%%ymm1; vfmadd213pd 32(%2),%%ymm0,%%ymm2; vmovupd %%ymm1,(%2); vmovupd %%ymm2,32(%2);" | |||
#define unit_save_m4n2(c1,c2) \ | |||
"vperm2f128 $2,"#c1","#c2",%%ymm2; vperm2f128 $19,"#c1","#c2","#c2"; vmovapd %%ymm2,"#c1";"\ | |||
"vunpcklpd "#c1","#c1",%%ymm2; vunpcklpd "#c2","#c2",%%ymm3;"\ | |||
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd 32(%5),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,32(%5);"\ | |||
"vunpckhpd "#c1","#c1",%%ymm2; vunpckhpd "#c2","#c2",%%ymm3;"\ | |||
"vfmadd213pd (%5,%3,1),%%ymm0,%%ymm2; vfmadd213pd 32(%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5,%3,1); vmovupd %%ymm3,32(%5,%3,1);"\ | |||
"leaq (%5,%3,2),%5;" | |||
#define SAVE_h_m4n2 "movq %2,%5;" unit_save_m4n2(%%ymm4,%%ymm5) | |||
#define SAVE_h_m4n4 SAVE_h_m4n2 unit_save_m4n2(%%ymm6,%%ymm7) | |||
#define SAVE_h_m4n8 SAVE_h_m4n4 unit_save_m4n2(%%ymm8,%%ymm9) unit_save_m4n2(%%ymm10,%%ymm11) | |||
#define SAVE_h_m4n12 SAVE_h_m4n8 unit_save_m4n2(%%ymm12,%%ymm13) unit_save_m4n2(%%ymm14,%%ymm15) | |||
#define SAVE_m4(ndim) SAVE_h_m4n##ndim "addq $64,%2;" | |||
#define COMPUTE_m4(ndim) \ | |||
INIT_m4n##ndim\ | |||
"movq %%r13,%4; movq %%r14,%1; movq %2,%5; xorq %%r15,%%r15;"\ | |||
"cmpq $24,%4; jb "#ndim"004042f;"\ | |||
#ndim"004041:\n\t"\ | |||
"cmpq $126,%%r15; movq $126,%%r15; cmoveq %3,%%r15;"\ | |||
KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\ | |||
"prefetcht1 (%5); subq $63,%5;"\ | |||
KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\ | |||
"addq %%r15,%5; prefetcht1 (%8); addq $32,%8;"\ | |||
"subq $8,%4; cmpq $16,%4; jnb "#ndim"004041b;"\ | |||
"movq %2,%5;"\ | |||
#ndim"004042:\n\t"\ | |||
"testq %4,%4; jz "#ndim"004043f;"\ | |||
"prefetcht0 (%5); prefetcht0 63(%5);"\ | |||
KERNEL_k1m4n##ndim\ | |||
"prefetcht0 (%5,%3,4); prefetcht0 63(%5,%3,4); addq %3,%5;"\ | |||
"decq %4; jmp "#ndim"004042b;"\ | |||
#ndim"004043:\n\t"\ | |||
"prefetcht0 (%%r14); prefetcht0 64(%%r14);"\ | |||
SAVE_m4(ndim) | |||
/* m = 2 *//* vmm0 for alpha, vmm1-vmm3 for temporary use, vmm4-vmm9 for accumulators */ | |||
#define KERNEL_k1m2n1 \ | |||
"vmovupd (%0),%%xmm1; addq $16,%0;"\ | |||
"vmovddup (%1),%%xmm2; vfmadd231pd %%xmm1,%%xmm2,%%xmm4;"\ | |||
"addq $8,%1;" | |||
#define KERNEL_h_k1m2n2 \ | |||
"vmovddup (%0),%%xmm1; vmovddup 8(%0),%%xmm2; addq $16,%0;"\ | |||
"vmovupd (%1),%%xmm3; vfmadd231pd %%xmm1,%%xmm3,%%xmm4; vfmadd231pd %%xmm2,%%xmm3,%%xmm5;" | |||
#define KERNEL_k1m2n2 KERNEL_h_k1m2n2 "addq $16,%1;" | |||
#define unit_kernel_k1m2n4(c1,c2,...) \ | |||
"vmovupd ("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";" | |||
#define KERNEL_h_k1m2n4 \ | |||
"vbroadcastsd (%0),%%ymm1; vbroadcastsd 8(%0),%%ymm2; addq $16,%0;"\ | |||
unit_kernel_k1m2n4(%%ymm4,%%ymm5,%1) | |||
#define KERNEL_k1m2n4 KERNEL_h_k1m2n4 "addq $32,%1;" | |||
#define KERNEL_h_k1m2n8 KERNEL_h_k1m2n4 \ | |||
unit_kernel_k1m2n4(%%ymm6,%%ymm7,%1,%%r12,1) | |||
#define KERNEL_k1m2n8 KERNEL_h_k1m2n8 "addq $32,%1;" | |||
#define KERNEL_h_k1m2n12 KERNEL_h_k1m2n8 \ | |||
unit_kernel_k1m2n4(%%ymm8,%%ymm9,%1,%%r12,2) | |||
#define KERNEL_k1m2n12 KERNEL_h_k1m2n12 "addq $32,%1;" | |||
#define INIT_m2n1 "vpxor %%xmm4,%%xmm4,%%xmm4;" | |||
#define INIT_m2n2 INIT_m2n1 "vpxor %%xmm5,%%xmm5,%%xmm5;" | |||
#define unit_init_m2n4(c1,c2) "vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";" | |||
#define INIT_m2n4 unit_init_m2n4(%%ymm4,%%ymm5) | |||
#define INIT_m2n8 INIT_m2n4 unit_init_m2n4(%%ymm6,%%ymm7) | |||
#define INIT_m2n12 INIT_m2n8 unit_init_m2n4(%%ymm8,%%ymm9) | |||
#define SAVE_h_m2n1 \ | |||
"vinsertf128 $1,%%xmm4,%%ymm4,%%ymm4; vpermilpd $12,%%ymm4,%%ymm4; vfmadd213pd (%2),%%ymm0,%%ymm4; vmovupd %%ymm4,(%2);" | |||
#define SAVE_h_m2n2 \ | |||
"vinsertf128 $1,%%xmm5,%%ymm4,%%ymm4; vunpcklpd %%ymm4,%%ymm4,%%ymm1; vunpckhpd %%ymm4,%%ymm4,%%ymm2;"\ | |||
"vfmadd213pd (%2),%%ymm0,%%ymm1; vmovupd %%ymm1,(%2);"\ | |||
"vfmadd213pd (%2,%3,1),%%ymm0,%%ymm2; vmovupd %%ymm2,(%2,%3,1);" | |||
#define unit_save_m2n4(c1,c2) \ | |||
"vperm2f128 $2,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\ | |||
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;"\ | |||
"vperm2f128 $19,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\ | |||
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;" | |||
#define SAVE_h_m2n4 "movq %2,%5;" unit_save_m2n4(%%ymm4,%%ymm5) | |||
#define SAVE_h_m2n8 SAVE_h_m2n4 unit_save_m2n4(%%ymm6,%%ymm7) | |||
#define SAVE_h_m2n12 SAVE_h_m2n8 unit_save_m2n4(%%ymm8,%%ymm9) | |||
#define SAVE_m2(ndim) SAVE_h_m2n##ndim "addq $32,%2;" | |||
#define COMPUTE_m2(ndim) \ | |||
INIT_m2n##ndim\ | |||
"movq %%r13,%4; movq %%r14,%1;"\ | |||
#ndim"002022:\n\t"\ | |||
"testq %4,%4; jz "#ndim"002023f;"\ | |||
KERNEL_k1m2n##ndim\ | |||
"decq %4; jmp "#ndim"002022b;"\ | |||
#ndim"002023:\n\t"\ | |||
SAVE_m2(ndim) | |||
/* m = 1 *//* vmm0 for alpha, vmm1-vmm3 and vmm10-vmm15 for temporary use, vmm4-vmm6 for accumulators */ | |||
#define KERNEL_k1m1n1 \ | |||
"vmovsd (%0),%%xmm1; addq $8,%0;"\ | |||
"vfmadd231sd (%1),%%xmm1,%%xmm4; addq $8,%1;" | |||
#define KERNEL_k1m1n2 \ | |||
"vmovddup (%0),%%xmm1; addq $8,%0;"\ | |||
"vfmadd231pd (%1),%%xmm1,%%xmm4; addq $16,%1;" | |||
#define unit_kernel_k1m1n4(c1,...) \ | |||
"vmovupd ("#__VA_ARGS__"),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,"#c1";" | |||
#define KERNEL_h_k1m1n4 \ | |||
"vbroadcastsd (%0),%%ymm1; addq $8,%0;"\ | |||
unit_kernel_k1m1n4(%%ymm4,%1) | |||
#define KERNEL_k1m1n4 KERNEL_h_k1m1n4 "addq $32,%1;" | |||
#define KERNEL_h_k1m1n8 KERNEL_h_k1m1n4 unit_kernel_k1m1n4(%%ymm5,%1,%%r12,1) | |||
#define KERNEL_k1m1n8 KERNEL_h_k1m1n8 "addq $32,%1;" | |||
#define KERNEL_h_k1m1n12 KERNEL_h_k1m1n8 unit_kernel_k1m1n4(%%ymm6,%1,%%r12,2) | |||
#define KERNEL_k1m1n12 KERNEL_h_k1m1n12 "addq $32,%1;" | |||
#define INIT_m1n1 INIT_m2n1 | |||
#define INIT_m1n2 INIT_m2n1 | |||
#define INIT_m1n4 "vpxor %%ymm4,%%ymm4,%%ymm4;" | |||
#define INIT_m1n8 INIT_m1n4 "vpxor %%ymm5,%%ymm5,%%ymm5;" | |||
#define INIT_m1n12 INIT_m1n8 "vpxor %%ymm6,%%ymm6,%%ymm6;" | |||
#define SAVE_h_m1n1 \ | |||
"vmovddup %%xmm4,%%xmm4; vfmadd213pd (%2),%%xmm0,%%xmm4; vmovupd %%xmm4,(%2);" | |||
#define SAVE_h_m1n2 \ | |||
"vunpcklpd %%xmm4,%%xmm4,%%xmm1; vunpckhpd %%xmm4,%%xmm4,%%xmm2;"\ | |||
"vfmadd213pd (%2),%%xmm0,%%xmm1; vmovupd %%xmm1,(%2);"\ | |||
"vfmadd213pd (%2,%3,1),%%xmm0,%%xmm2; vmovupd %%xmm2,(%2,%3,1);" | |||
#define unit_save_m1n4(c1) \ | |||
"vunpcklpd "#c1","#c1",%%ymm1; vunpckhpd "#c1","#c1",%%ymm2;"\ | |||
"vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\ | |||
"vfmadd213pd %%ymm3,%%ymm0,%%ymm1; vmovupd %%xmm1,(%5); vextractf128 $1,%%ymm1,(%5,%3,2); addq %3,%5;"\ | |||
"vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\ | |||
"vfmadd213pd %%ymm3,%%ymm0,%%ymm2; vmovupd %%xmm2,(%5); vextractf128 $1,%%ymm2,(%5,%3,2); addq %3,%5; leaq (%5,%3,2),%5;" | |||
#define SAVE_h_m1n4 "movq %2,%5;" unit_save_m1n4(%%ymm4) | |||
#define SAVE_h_m1n8 SAVE_h_m1n4 unit_save_m1n4(%%ymm5) | |||
#define SAVE_h_m1n12 SAVE_h_m1n8 unit_save_m1n4(%%ymm6) | |||
#define SAVE_m1(ndim) SAVE_h_m1n##ndim "addq $16,%2;" | |||
#define COMPUTE_m1(ndim) \ | |||
INIT_m1n##ndim\ | |||
"movq %%r13,%4; movq %%r14,%1;"\ | |||
#ndim"001011:\n\t"\ | |||
"testq %4,%4; jz "#ndim"001012f;"\ | |||
KERNEL_k1m1n##ndim\ | |||
"decq %4; jmp "#ndim"001011b;"\ | |||
#ndim"001012:\n\t"\ | |||
SAVE_m1(ndim) | |||
#define COMPUTE(ndim) {\ | |||
next_b = b_pointer + ndim * K;\ | |||
__asm__ __volatile__(\ | |||
"vbroadcastf128 (%6),%%ymm0;"\ | |||
"movq %4,%%r13; movq %4,%%r12; salq $5,%%r12; movq %1,%%r14; movq %7,%%r11;"\ | |||
"cmpq $4,%7;jb 33101"#ndim"f;"\ | |||
"33109"#ndim":\n\t"\ | |||
COMPUTE_m4(ndim)\ | |||
"subq $4,%7;cmpq $4,%7;jnb 33109"#ndim"b;"\ | |||
"33101"#ndim":\n\t"\ | |||
"cmpq $2,%7;jb 33104"#ndim"f;"\ | |||
COMPUTE_m2(ndim)\ | |||
"subq $2,%7;"\ | |||
"33104"#ndim":\n\t"\ | |||
"testq %7,%7;jz 33105"#ndim"f;"\ | |||
COMPUTE_m1(ndim)\ | |||
"33105"#ndim":\n\t"\ | |||
"movq %%r13,%4; movq %%r14,%1; movq %%r11,%7;"\ | |||
:"+r"(a_pointer),"+r"(b_pointer),"+r"(c_pointer),"+r"(ldc_in_bytes),"+r"(K),"+r"(ctemp),"+r"(const_val),"+r"(M),"+r"(next_b)\ | |||
::"r11","r12","r13","r14","r15","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14",\ | |||
"xmm15","cc","memory");\ | |||
a_pointer -= M * K; b_pointer += ndim * K; c_pointer += 2*(LDC * ndim - M);\ | |||
} | |||
int __attribute__ ((noinline)) | |||
CNAME(BLASLONG m, BLASLONG n, BLASLONG k, double alphar, double alphai, double * __restrict__ A, double * __restrict__ B, double * __restrict__ C, BLASLONG LDC) | |||
{ | |||
if(m==0||n==0||k==0) return 0; | |||
int64_t ldc_in_bytes = (int64_t)LDC * sizeof(double) * 2; | |||
double constval[2]; constval[0] = alphar; constval[1] = alphai; | |||
double *const_val=constval; | |||
int64_t M = (int64_t)m, K = (int64_t)k; | |||
BLASLONG n_count = n; | |||
double *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B; | |||
for(;n_count>11;n_count-=12) COMPUTE(12) | |||
for(;n_count>7;n_count-=8) COMPUTE(8) | |||
for(;n_count>3;n_count-=4) COMPUTE(4) | |||
for(;n_count>1;n_count-=2) COMPUTE(2) | |||
if(n_count>0) COMPUTE(1) | |||
return 0; | |||
} |
@@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
@@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ctrmm4x4V.S | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ztrmm4x4V.S | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
@@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
@@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c | |||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ctrmm4x4V.S | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ztrmm4x4V.S | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
@@ -94,26 +94,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
@@ -0,0 +1,38 @@ | |||
image: | |||
- Visual Studio 2017 | |||
configuration: Release | |||
clone_depth: 3 | |||
matrix: | |||
fast_finish: false | |||
skip_commits: | |||
# Add [av skip] to commit messages | |||
message: /\[av skip\]/ | |||
cache: | |||
- '%APPVEYOR_BUILD_FOLDER%\build' | |||
environment: | |||
global: | |||
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 | |||
install: | |||
- call %CONDA_INSTALL_LOCN%\Scripts\activate.bat | |||
- conda config --add channels conda-forge --force | |||
- conda install --yes --quiet flang jom | |||
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 | |||
- set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" | |||
- set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" | |||
before_build: | |||
- ps: if (-Not (Test-Path .\build)) { mkdir build } | |||
- cd build | |||
- cmake -G "NMake Makefiles JOM" -DCMAKE_Fortran_COMPILER=flang -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON .. | |||
build_script: | |||
- cmake --build . | |||
test_script: | |||
- ctest -j2 |
@@ -35,3 +35,9 @@ LAPACKE/example/xexample* | |||
# SED | |||
SRC/*-e | |||
LAPACKE/src/*-e | |||
build* | |||
# DOCS documentation | |||
DOCS/man | |||
DOCS/explore-html | |||
output_err |
@@ -1,33 +1,32 @@ | |||
language: cpp | |||
language: c | |||
dist: xenial | |||
group: travis_latest | |||
git: | |||
depth: 3 | |||
quiet: true | |||
addons: | |||
apt: | |||
sources: | |||
- george-edison55-precise-backports # cmake | |||
packages: | |||
- cmake | |||
- cmake-data | |||
- gfortran | |||
os: | |||
- linux | |||
- osx | |||
env: | |||
- CMAKE_BUILD_TYPE=Release | |||
- CMAKE_BUILD_TYPE=Coverage | |||
- gfortran | |||
install: | |||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; | |||
then | |||
for pkg in gcc cmake; do | |||
if brew list -1 | grep -q "^${pkg}\$"; then | |||
brew outdated $pkg || brew upgrade $pkg; | |||
else | |||
brew install $pkg; | |||
fi | |||
done | |||
fi | |||
matrix: | |||
include: | |||
- os: linux | |||
env: CMAKE_BUILD_TYPE=Release | |||
- os: linux | |||
env: CMAKE_BUILD_TYPE=Coverage | |||
- os: osx | |||
env: CMAKE_BUILD_TYPE=Release | |||
before_install: | |||
- brew update > /dev/null | |||
- brew install gcc > /dev/null | |||
- os: osx | |||
env: CMAKE_BUILD_TYPE=Coverage | |||
before_install: | |||
- brew update > /dev/null | |||
- brew install gcc > /dev/null | |||
script: | |||
- export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST | |||
@@ -6,4 +6,5 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/blas.pc | |||
DESTINATION ${PKG_CONFIG_DIR} | |||
COMPONENT Development | |||
) |
@@ -1,13 +1,18 @@ | |||
include ../make.inc | |||
TOPSRCDIR = .. | |||
include $(TOPSRCDIR)/make.inc | |||
.PHONY: all | |||
all: blas | |||
.PHONY: blas | |||
blas: | |||
$(MAKE) -C SRC | |||
.PHONY: blas_testing | |||
blas_testing: blas | |||
$(MAKE) -C TESTING run | |||
.PHONY: clean cleanobj cleanlib cleanexe cleantest | |||
clean: | |||
$(MAKE) -C SRC clean | |||
$(MAKE) -C TESTING clean | |||
@@ -1,5 +1,3 @@ | |||
include ../../make.inc | |||
####################################################################### | |||
# This is the makefile to create a library for the BLAS. | |||
# The files are grouped as follows: | |||
@@ -55,6 +53,10 @@ include ../../make.inc | |||
# | |||
####################################################################### | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
.PHONY: all | |||
all: $(BLASLIB) | |||
#--------------------------------------------------------- | |||
@@ -138,33 +140,32 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ | |||
$(ZBLAS2) $(ZBLAS3) $(ALLBLAS) | |||
$(BLASLIB): $(ALLOBJ) | |||
$(ARCH) $(ARCHFLAGS) $@ $^ | |||
$(AR) $(ARFLAGS) $@ $^ | |||
$(RANLIB) $@ | |||
.PHONY: single double complex complex16 | |||
single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) | |||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
$(RANLIB) $(BLASLIB) | |||
double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) | |||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
$(RANLIB) $(BLASLIB) | |||
complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) | |||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
$(RANLIB) $(BLASLIB) | |||
complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) | |||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(BLASLIB) $^ | |||
$(RANLIB) $(BLASLIB) | |||
FRC: | |||
@FRC=$(FRC) | |||
.PHONY: clean cleanobj cleanlib | |||
clean: cleanobj cleanlib | |||
cleanobj: | |||
rm -f *.o | |||
cleanlib: | |||
#rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas | |||
.f.o: | |||
$(FORTRAN) $(OPTS) -c -o $@ $< |
@@ -43,7 +43,7 @@ | |||
*> \param[in] INCX | |||
*> \verbatim | |||
*> INCX is INTEGER | |||
*> storage spacing between elements of SX | |||
*> storage spacing between elements of CX | |||
*> \endverbatim | |||
* | |||
* Authors: | |||
@@ -43,7 +43,7 @@ | |||
*> \param[in] INCX | |||
*> \verbatim | |||
*> INCX is INTEGER | |||
*> storage spacing between elements of SX | |||
*> storage spacing between elements of DX | |||
*> \endverbatim | |||
* | |||
* Authors: | |||
@@ -43,7 +43,7 @@ | |||
*> \param[in] INCX | |||
*> \verbatim | |||
*> INCX is INTEGER | |||
*> storage spacing between elements of SX | |||
*> storage spacing between elements of ZX | |||
*> \endverbatim | |||
* | |||
* Authors: | |||
@@ -0,0 +1,29 @@ | |||
SBLAS1 = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'sdot.f', 'snrm2.f', 'srot.f', 'srotg.f', 'sscal.f', 'sswap.f', 'sdsdot.f', 'srotmg.f', 'srotm.f') | |||
CBLAS1 = files('scabs1.f', 'scasum.f', 'scnrm2.f', 'icamax.f', 'caxpy.f', 'ccopy.f', 'cdotc.f', 'cdotu.f', 'csscal.f', 'crotg.f', 'cscal.f', 'cswap.f', 'csrot.f') | |||
DBLAS1 = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'ddot.f', 'dnrm2.f', 'drot.f', 'drotg.f', 'dscal.f', 'dsdot.f', 'dswap.f', 'drotmg.f', 'drotm.f') | |||
ZBLAS1 = files('dcabs1.f', 'dzasum.f', 'dznrm2.f', 'izamax.f', 'zaxpy.f', 'zcopy.f', 'zdotc.f', 'zdotu.f', 'zdscal.f', 'zrotg.f', 'zscal.f', 'zswap.f', 'zdrot.f') | |||
CB1AUX = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'snrm2.f', 'sscal.f') | |||
ZB1AUX = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'dnrm2.f', 'dscal.f') | |||
ALLBLAS = files('lsame.f', 'xerbla.f', 'xerbla_array.f') | |||
SBLAS2 = files('sgemv.f', 'sgbmv.f', 'ssymv.f', 'ssbmv.f', 'sspmv.f', 'strmv.f', 'stbmv.f', 'stpmv.f', 'strsv.f', 'stbsv.f', 'stpsv.f', 'sger.f', 'ssyr.f', 'sspr.f', 'ssyr2.f', 'sspr2.f') | |||
CBLAS2 = files('cgemv.f', 'cgbmv.f', 'chemv.f', 'chbmv.f', 'chpmv.f', 'ctrmv.f', 'ctbmv.f', 'ctpmv.f', 'ctrsv.f', 'ctbsv.f', 'ctpsv.f', 'cgerc.f', 'cgeru.f', 'cher.f', 'chpr.f', 'cher2.f', 'chpr2.f') | |||
DBLAS2 = files('dgemv.f', 'dgbmv.f', 'dsymv.f', 'dsbmv.f', 'dspmv.f', 'dtrmv.f', 'dtbmv.f', 'dtpmv.f', 'dtrsv.f', 'dtbsv.f', 'dtpsv.f', 'dger.f', 'dsyr.f', 'dspr.f', 'dsyr2.f', 'dspr2.f') | |||
ZBLAS2 = files('zgemv.f', 'zgbmv.f', 'zhemv.f', 'zhbmv.f', 'zhpmv.f', 'ztrmv.f', 'ztbmv.f', 'ztpmv.f', 'ztrsv.f', 'ztbsv.f', 'ztpsv.f', 'zgerc.f', 'zgeru.f', 'zher.f', 'zhpr.f', 'zher2.f', 'zhpr2.f') | |||
SBLAS3 = files('sgemm.f', 'ssymm.f', 'ssyrk.f', 'ssyr2k.f', 'strmm.f', 'strsm.f') | |||
CBLAS3 = files('cgemm.f', 'csymm.f', 'csyrk.f', 'csyr2k.f', 'ctrmm.f', 'ctrsm.f', 'chemm.f', 'cherk.f', 'cher2k.f') | |||
DBLAS3 = files('dgemm.f', 'dsymm.f', 'dsyrk.f', 'dsyr2k.f', 'dtrmm.f', 'dtrsm.f') | |||
ZBLAS3 = files('zgemm.f', 'zsymm.f', 'zsyrk.f', 'zsyr2k.f', 'ztrmm.f', 'ztrsm.f', 'zhemm.f', 'zherk.f', 'zher2k.f') |
@@ -23,13 +23,13 @@ | |||
*> | |||
*> \verbatim | |||
*> | |||
* Compute the inner product of two vectors with extended | |||
* precision accumulation. | |||
* | |||
* Returns S.P. result with dot product accumulated in D.P. | |||
* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||
* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||
* defined in a similar way using INCY. | |||
*> Compute the inner product of two vectors with extended | |||
*> precision accumulation. | |||
*> | |||
*> Returns S.P. result with dot product accumulated in D.P. | |||
*> SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||
*> where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||
*> defined in a similar way using INCY. | |||
*> \endverbatim | |||
* | |||
* Arguments: | |||
@@ -77,7 +77,14 @@ | |||
*> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA), | |||
*> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) | |||
* | |||
*> \ingroup complex_blas_level1 | |||
*> \author Univ. of Tennessee | |||
*> \author Univ. of California Berkeley | |||
*> \author Univ. of Colorado Denver | |||
*> \author NAG Ltd. | |||
* | |||
*> \date November 2017 | |||
* | |||
*> \ingroup single_blas_level1 | |||
* | |||
*> \par Further Details: | |||
* ===================== | |||
@@ -102,65 +109,7 @@ | |||
*> 920501 Reformatted the REFERENCES section. (WRB) | |||
*> 070118 Reformat to LAPACK coding style | |||
*> \endverbatim | |||
* | |||
* ===================================================================== | |||
* | |||
* .. Local Scalars .. | |||
* DOUBLE PRECISION DSDOT | |||
* INTEGER I,KX,KY,NS | |||
* .. | |||
* .. Intrinsic Functions .. | |||
* INTRINSIC DBLE | |||
* .. | |||
* DSDOT = SB | |||
* IF (N.LE.0) THEN | |||
* SDSDOT = DSDOT | |||
* RETURN | |||
* END IF | |||
* IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN | |||
* | |||
* Code for equal and positive increments. | |||
* | |||
* NS = N*INCX | |||
* DO I = 1,NS,INCX | |||
* DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I)) | |||
* END DO | |||
* ELSE | |||
* | |||
* Code for unequal or nonpositive increments. | |||
* | |||
* KX = 1 | |||
* KY = 1 | |||
* IF (INCX.LT.0) KX = 1 + (1-N)*INCX | |||
* IF (INCY.LT.0) KY = 1 + (1-N)*INCY | |||
* DO I = 1,N | |||
* DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY)) | |||
* KX = KX + INCX | |||
* KY = KY + INCY | |||
* END DO | |||
* END IF | |||
* SDSDOT = DSDOT | |||
* RETURN | |||
* END | |||
* | |||
*> \par Purpose: | |||
* ============= | |||
*> | |||
*> \verbatim | |||
*> \endverbatim | |||
* | |||
* Authors: | |||
* ======== | |||
* | |||
*> \author Univ. of Tennessee | |||
*> \author Univ. of California Berkeley | |||
*> \author Univ. of Colorado Denver | |||
*> \author NAG Ltd. | |||
* | |||
*> \date November 2017 | |||
* | |||
*> \ingroup single_blas_level1 | |||
* | |||
* ===================================================================== | |||
REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY) | |||
* | |||
@@ -175,71 +124,6 @@ | |||
* .. | |||
* .. Array Arguments .. | |||
REAL SX(*),SY(*) | |||
* .. | |||
* | |||
* PURPOSE | |||
* ======= | |||
* | |||
* Compute the inner product of two vectors with extended | |||
* precision accumulation. | |||
* | |||
* Returns S.P. result with dot product accumulated in D.P. | |||
* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), | |||
* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is | |||
* defined in a similar way using INCY. | |||
* | |||
* AUTHOR | |||
* ====== | |||
* Lawson, C. L., (JPL), Hanson, R. J., (SNLA), | |||
* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL) | |||
* | |||
* ARGUMENTS | |||
* ========= | |||
* | |||
* N (input) INTEGER | |||
* number of elements in input vector(s) | |||
* | |||
* SB (input) REAL | |||
* single precision scalar to be added to inner product | |||
* | |||
* SX (input) REAL array, dimension (N) | |||
* single precision vector with N elements | |||
* | |||
* INCX (input) INTEGER | |||
* storage spacing between elements of SX | |||
* | |||
* SY (input) REAL array, dimension (N) | |||
* single precision vector with N elements | |||
* | |||
* INCY (input) INTEGER | |||
* storage spacing between elements of SY | |||
* | |||
* SDSDOT (output) REAL | |||
* single precision dot product (SB if N .LE. 0) | |||
* | |||
* Further Details | |||
* =============== | |||
* | |||
* REFERENCES | |||
* | |||
* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. | |||
* Krogh, Basic linear algebra subprograms for Fortran | |||
* usage, Algorithm No. 539, Transactions on Mathematical | |||
* Software 5, 3 (September 1979), pp. 308-323. | |||
* | |||
* REVISION HISTORY (YYMMDD) | |||
* | |||
* 791001 DATE WRITTEN | |||
* 890531 Changed all specific intrinsics to generic. (WRB) | |||
* 890831 Modified array declarations. (WRB) | |||
* 890831 REVISION DATE from Version 3.2 | |||
* 891214 Prologue converted to Version 4.0 format. (BAB) | |||
* 920310 Corrected definition of LX in DESCRIPTION. (WRB) | |||
* 920501 Reformatted the REFERENCES section. (WRB) | |||
* 070118 Reformat to LAPACK coding style | |||
* | |||
* ===================================================================== | |||
* | |||
* .. Local Scalars .. | |||
DOUBLE PRECISION DSDOT | |||
INTEGER I,KX,KY,NS | |||
@@ -1,5 +1,7 @@ | |||
include ../../make.inc | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
.PHONY: all single double complex complex16 | |||
all: single double complex complex16 | |||
single: xblat1s xblat2s xblat3s | |||
double: xblat1d xblat2d xblat3d | |||
@@ -7,32 +9,33 @@ complex: xblat1c xblat2c xblat3c | |||
complex16: xblat1z xblat2z xblat3z | |||
xblat1s: sblat1.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat1d: dblat1.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat1c: cblat1.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat1z: zblat1.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat2s: sblat2.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat2d: dblat2.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat2c: cblat2.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat2z: zblat2.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat3s: sblat3.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat3d: dblat3.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat3c: cblat3.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xblat3z: zblat3.o $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
.PHONY: run | |||
run: all | |||
./xblat1s > sblat1.out | |||
./xblat1d > dblat1.out | |||
@@ -47,6 +50,7 @@ run: all | |||
./xblat3c < cblat3.in | |||
./xblat3z < zblat3.in | |||
.PHONY: clean cleanobj cleanexe cleantest | |||
clean: cleanobj cleanexe cleantest | |||
cleanobj: | |||
rm -f *.o | |||
@@ -54,6 +58,3 @@ cleanexe: | |||
rm -f xblat* | |||
cleantest: | |||
rm -f *.out core | |||
.f.o: | |||
$(FORTRAN) $(OPTS) -c -o $@ $< |
@@ -619,7 +619,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -991,7 +991,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -946,7 +946,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -619,7 +619,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -12,8 +12,10 @@ FortranCInterface_HEADER(${LAPACK_BINARY_DIR}/include/cblas_mangling.h | |||
SYMBOL_NAMESPACE "F77_") | |||
if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) | |||
message(WARNING "Reverting to pre-defined include/lapacke_mangling.h") | |||
configure_file(include/lapacke_mangling_with_flags.h.in | |||
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||
configure_file(include/lapacke_mangling_with_flags.h.in | |||
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||
configure_file(include/cblas_mangling_with_flags.h.in | |||
${LAPACK_BINARY_DIR}/include/cblas_mangling.h) | |||
endif() | |||
include_directories(include ${LAPACK_BINARY_DIR}/include) | |||
@@ -28,7 +30,10 @@ endforeach() | |||
endmacro() | |||
append_subdir_files(CBLAS_INCLUDE "include") | |||
install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h | |||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||
COMPONENT Development | |||
) | |||
# -------------------------------------------------- | |||
if(BUILD_TESTING) | |||
@@ -45,7 +50,9 @@ endif() | |||
set(_cblas_config_install_guard_target "") | |||
if(ALL_TARGETS) | |||
install(EXPORT cblas-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
# Choose one of the cblas targets to use as a guard for | |||
# cblas-config.cmake to load targets from the install tree. | |||
list(GET ALL_TARGETS 0 _cblas_config_install_guard_target) | |||
@@ -82,4 +89,6 @@ install(FILES | |||
) | |||
#install(EXPORT cblas-targets | |||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}) | |||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
# COMPONENT Development | |||
# ) |
@@ -1,19 +1,25 @@ | |||
include ../make.inc | |||
TOPSRCDIR = .. | |||
include $(TOPSRCDIR)/make.inc | |||
.PHONY: all | |||
all: cblas | |||
.PHONY: cblas | |||
cblas: include/cblas_mangling.h | |||
$(MAKE) -C src | |||
include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in | |||
cp $< $@ | |||
cp include/cblas_mangling_with_flags.h.in $@ | |||
.PHONY: cblas_testing | |||
cblas_testing: cblas | |||
$(MAKE) -C testing run | |||
.PHONY: cblas_example | |||
cblas_example: cblas | |||
$(MAKE) -C examples | |||
.PHONY: clean cleanobj cleanlib cleanexe cleantest | |||
clean: | |||
$(MAKE) -C src clean | |||
$(MAKE) -C testing clean | |||
@@ -1,17 +1,21 @@ | |||
include ../../make.inc | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
.SUFFIXES: .c .o | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
.PHONY: all | |||
all: cblas_ex1 cblas_ex2 | |||
cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
.PHONY: clean cleanobj cleanexe | |||
clean: cleanobj cleanexe | |||
cleanobj: | |||
rm -f *.o | |||
cleanexe: | |||
rm -f cblas_ex1 cblas_ex2 | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< |
@@ -47,7 +47,7 @@ int main ( ) | |||
a[m*3+1] = 6; | |||
a[m*3+2] = 7; | |||
a[m*3+3] = 8; | |||
/* The elemetns of x and y */ | |||
/* The elements of x and y */ | |||
x[0] = 1; | |||
x[1] = 2; | |||
x[2] = 1; | |||
@@ -1,7 +1,13 @@ | |||
# This Makefile compiles the CBLAS routines | |||
include ../../make.inc | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
.SUFFIXES: .c .o | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
.PHONY: all | |||
all: $(CBLASLIB) | |||
# Error handling routines for level 2 & 3 | |||
@@ -43,24 +49,25 @@ zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \ | |||
# Common files for level 1 single precision | |||
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o | |||
.PHONY: slib1 dlib1 clib1 zlib1 | |||
# Single precision real | |||
slib1: $(slev1) $(sclev1) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Double precision real | |||
dlib1: $(dlev1) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Single precision complex | |||
clib1: $(clev1) $(sclev1) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Double precision complex | |||
zlib1: $(zlev1) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# | |||
@@ -95,24 +102,25 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \ | |||
cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \ | |||
cblas_zhpr.o cblas_zhpr2.o | |||
.PHONY: slib2 dlib2 clib2 zlib2 | |||
# Single precision real | |||
slib2: $(slev2) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Double precision real | |||
dlib2: $(dlev2) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Single precision complex | |||
clib2: $(clev2) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Double precision complex | |||
zlib2: $(zlev2) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# | |||
@@ -141,24 +149,25 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \ | |||
cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \ | |||
cblas_zsyr2k.o | |||
.PHONY: slib3 dlib3 clib3 zlib3 | |||
# Single precision real | |||
slib3: $(slev3) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Double precision real | |||
dlib3: $(dlev3) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Single precision complex | |||
clib3: $(clev3) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# Double precision complex | |||
zlib3: $(zlev3) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
@@ -166,36 +175,33 @@ alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1) | |||
alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) | |||
alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) | |||
.PHONY: all1 all2 all3 | |||
# All level 1 | |||
all1: $(alev1) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# All level 2 | |||
all2: $(alev2) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# All level 3 | |||
all3: $(alev3) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^ | |||
$(AR) $(ARFLAGS) $(CBLASLIB) $^ | |||
$(RANLIB) $(CBLASLIB) | |||
# All levels and precisions | |||
$(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand) | |||
$(ARCH) $(ARCHFLAGS) $@ $^ | |||
$(AR) $(ARFLAGS) $@ $^ | |||
$(RANLIB) $@ | |||
FRC: | |||
@FRC=$(FRC) | |||
.PHONY: clean cleanobj cleanlib | |||
clean: cleanobj cleanlib | |||
cleanobj: | |||
rm -f *.o | |||
cleanlib: | |||
rm -f $(CBLASLIB) | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
.f.o: | |||
$(FORTRAN) $(OPTS) -c -o $@ $< |
@@ -91,7 +91,7 @@ void cblas_sgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA, | |||
else | |||
{ | |||
cblas_xerbla(2, "cblas_sgemm", | |||
"Illegal TransA setting, %d\n", TransA); | |||
"Illegal TransB setting, %d\n", TransB); | |||
CBLAS_CallFromC = 0; | |||
RowMajorStrg = 0; | |||
return; | |||
@@ -2,7 +2,12 @@ | |||
# The Makefile compiles c wrappers and testers for CBLAS. | |||
# | |||
include ../../make.inc | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
.SUFFIXES: .c .o | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
# Archive files necessary to compile | |||
LIB = $(CBLASLIB) $(BLASLIB) | |||
@@ -27,6 +32,7 @@ ztestl1o = c_zblas1.o | |||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o | |||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o | |||
.PHONY: all all1 all2 all3 | |||
all: all1 all2 all3 | |||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | |||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | |||
@@ -38,37 +44,38 @@ all3: xscblat3 xdcblat3 xccblat3 xzcblat3 | |||
# Single real | |||
xscblat1: c_sblat1.o $(stestl1o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xscblat2: c_sblat2.o $(stestl2o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xscblat3: c_sblat3.o $(stestl3o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
# Double real | |||
xdcblat1: c_dblat1.o $(dtestl1o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xdcblat2: c_dblat2.o $(dtestl2o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xdcblat3: c_dblat3.o $(dtestl3o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
# Single complex | |||
xccblat1: c_cblat1.o $(ctestl1o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xccblat2: c_cblat2.o $(ctestl2o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xccblat3: c_cblat3.o $(ctestl3o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
# Double complex | |||
xzcblat1: c_zblat1.o $(ztestl1o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xzcblat2: c_zblat2.o $(ztestl2o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
xzcblat3: c_zblat3.o $(ztestl3o) $(LIB) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
# RUN TESTS | |||
.PHONY: run | |||
run: all | |||
@echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--" | |||
@./xscblat1 > stest1.out | |||
@@ -95,6 +102,7 @@ run: all | |||
@echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--" | |||
@./xzcblat3 < zin3 > ztest3.out | |||
.PHONY: clean cleanobj cleanexe cleantest | |||
clean: cleanobj cleanexe cleantest | |||
cleanobj: | |||
rm -f *.o | |||
@@ -102,9 +110,3 @@ cleanexe: | |||
rm -f x* | |||
cleantest: | |||
rm -f *.out core | |||
.SUFFIXES: .o .f .c | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
.f.o: | |||
$(FORTRAN) $(OPTS) -c -o $@ $< |
@@ -577,7 +577,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -653,7 +653,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -653,7 +653,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -577,7 +577,7 @@ | |||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) | |||
* ************************* STEST1 ***************************** | |||
* | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN | |||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN | |||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE | |||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. | |||
* | |||
@@ -1,4 +1,4 @@ | |||
# This module checks against various known compilers and thier respective | |||
# This module checks against various known compilers and their respective | |||
# flags to determine any specific flags needing to be set. | |||
# | |||
# 1. If FPE traps are enabled either abort or disable them | |||
@@ -20,7 +20,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) | |||
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | |||
foreach (LANG ${ENABLED_LANGUAGES}) | |||
# Gcov evaluation is dependend on the used compiler. Check gcov support for | |||
# Gcov evaluation is dependent on the used compiler. Check gcov support for | |||
# each compiler that is used. If gcov binary was already found for this | |||
# compiler, do not try to find it again. | |||
if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN) | |||
@@ -42,7 +42,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY}) | |||
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) | |||
foreach (LANG ${ENABLED_LANGUAGES}) | |||
# Coverage flags are not dependend on language, but the used compiler. So | |||
# Coverage flags are not dependent on language, but the used compiler. So | |||
# instead of searching flags foreach language, search flags foreach compiler | |||
# used. | |||
set(COMPILER ${CMAKE_${LANG}_COMPILER_ID}) | |||
@@ -24,7 +24,7 @@ message(STATUS "=========") | |||
set(F77_OUTPUT_EXE "/Fe" CACHE INTERNAL | |||
"Fortran compiler option for setting executable file name.") | |||
else() | |||
# in other case, let user specify their fortran configrations. | |||
# in other case, let user specify their fortran configurations. | |||
set(F77_OPTION_COMPILE "-c" CACHE STRING | |||
"Fortran compiler option for compiling without linking.") | |||
set(F77_OUTPUT_OBJ "-o" CACHE STRING | |||
@@ -5,6 +5,10 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
endif() | |||
unset(_LAPACK_TARGET) | |||
# Hint for project building against lapack | |||
set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@") | |||
# Report the blas and lapack raw or imported libraries. | |||
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | |||
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | |||
set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES}) |
@@ -8,8 +8,12 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
endif() | |||
unset(_LAPACK_TARGET) | |||
# Hint for project building against lapack | |||
set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@") | |||
# Report the blas and lapack raw or imported libraries. | |||
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@") | |||
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@") | |||
set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES}) | |||
unset(_LAPACK_SELF_DIR) |
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.12) | |||
project(LAPACK Fortran C) | |||
set(LAPACK_MAJOR_VERSION 3) | |||
set(LAPACK_MINOR_VERSION 8) | |||
set(LAPACK_MINOR_VERSION 9) | |||
set(LAPACK_PATCH_VERSION 0) | |||
set( | |||
LAPACK_VERSION | |||
@@ -13,6 +13,9 @@ set( | |||
# Add the CMake directory for custon CMake modules | |||
set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) | |||
# Export all symbols on Windows when building shared libraries | |||
SET(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||
# Set a default build type if none was specified | |||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) | |||
message(STATUS "Setting build type to 'Release' as none was specified.") | |||
@@ -21,8 +24,19 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) | |||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage") | |||
endif() | |||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) | |||
if(${CMAKE_BUILD_TYPE_UPPER} STREQUAL "COVERAGE") | |||
# Coverage | |||
set(_is_coverage_build 0) | |||
set(_msg "Checking if build type is 'Coverage'") | |||
message(STATUS "${_msg}") | |||
if(NOT CMAKE_CONFIGURATION_TYPES) | |||
string(TOLOWER ${CMAKE_BUILD_TYPE} _build_type_lc) | |||
if(${_build_type_lc} STREQUAL "coverage") | |||
set(_is_coverage_build 1) | |||
endif() | |||
endif() | |||
message(STATUS "${_msg}: ${_is_coverage_build}") | |||
if(_is_coverage_build) | |||
message(STATUS "Adding coverage") | |||
find_package(codecov) | |||
endif() | |||
@@ -58,18 +72,18 @@ include(PreventInSourceBuilds) | |||
include(PreventInBuildInstalls) | |||
if(UNIX) | |||
if("${CMAKE_Fortran_COMPILER}" MATCHES "ifort") | |||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict") | |||
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) | |||
list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict") | |||
endif() | |||
if("${CMAKE_Fortran_COMPILER}" MATCHES "xlf") | |||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none") | |||
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) | |||
list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none") | |||
endif() | |||
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | |||
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | |||
string(REPLACE \;mtsk\; \; CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES "${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}") | |||
endif() | |||
if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq") | |||
if(CMAKE_Fortran_COMPILER_ID STREQUAL Compaq) | |||
if(WIN32) | |||
if(CMAKE_GENERATOR STREQUAL "NMake Makefiles") | |||
get_filename_component(CMAKE_Fortran_COMPILER_CMDNAM ${CMAKE_Fortran_COMPILER} NAME_WE) | |||
@@ -96,24 +110,16 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq") | |||
endif() | |||
endif() | |||
# Get Python | |||
message(STATUS "Looking for Python greater than 2.6 - ${PYTHONINTERP_FOUND}") | |||
find_package(PythonInterp 2.7) # lapack_testing.py uses features from python 2.7 and greater | |||
if(PYTHONINTERP_FOUND) | |||
message(STATUS "Using Python version ${PYTHON_VERSION_STRING}") | |||
else() | |||
message(STATUS "No suitable Python version found, so skipping summary tests.") | |||
endif() | |||
# -------------------------------------------------- | |||
# -------------------------------------------------- | |||
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) | |||
macro(lapack_install_library lib) | |||
install(TARGETS ${lib} | |||
EXPORT ${LAPACK_INSTALL_EXPORT_NAME} | |||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | |||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT Development | |||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT RuntimeLibraries | |||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT RuntimeLibraries | |||
) | |||
endmacro() | |||
@@ -121,12 +127,22 @@ set(PKG_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig) | |||
# -------------------------------------------------- | |||
# Testing | |||
option(BUILD_TESTING "Build tests" OFF) | |||
enable_testing() | |||
option(BUILD_TESTING "Build tests" ${_is_coverage_build}) | |||
include(CTest) | |||
enable_testing() | |||
message(STATUS "Build tests: ${BUILD_TESTING}") | |||
# lapack_testing.py uses features from python 2.7 and greater | |||
if(BUILD_TESTING) | |||
set(_msg "Looking for Python >= 2.7 needed for summary tests") | |||
message(STATUS "${_msg}") | |||
find_package(PythonInterp 2.7 QUIET) | |||
if(PYTHONINTERP_FOUND) | |||
message(STATUS "${_msg} - found (${PYTHON_VERSION_STRING})") | |||
else() | |||
message(STATUS "${_msg} - not found (skipping summary tests)") | |||
endif() | |||
endif() | |||
# -------------------------------------------------- | |||
# Organize output files. On Windows this also keeps .dll files next | |||
# to the .exe files that need them, making tests easy to run. | |||
@@ -299,16 +315,40 @@ if(LAPACKE) | |||
add_subdirectory(LAPACKE) | |||
endif() | |||
#------------------------------------- | |||
# BLAS++ / LAPACK++ | |||
option(BLAS++ "Build BLAS++" OFF) | |||
option(LAPACK++ "Build LAPACK++" OFF) | |||
function(_display_cpp_implementation_msg name) | |||
string(TOLOWER ${name} name_lc) | |||
message(STATUS "${name}++ enable") | |||
message(STATUS "----------------") | |||
message(STATUS "Thank you for your interest in ${name}++, a newly developed C++ API for ${name} library") | |||
message(STATUS "The objective of ${name}++ is to provide a convenient, performance oriented API for development in the C++ language, that, for the most part, preserves established conventions, while, at the same time, takes advantages of modern C++ features, such as: namespaces, templates, exceptions, etc.") | |||
message(STATUS "We are still working on integrating ${name}++ in our library. For the moment, you can download directly ${name_lc}++ from https://bitbucket.org/icl/${name_lc}pp") | |||
message(STATUS "For support ${name}++ related question, please email: slate-user@icl.utk.edu") | |||
message(STATUS "----------------") | |||
endfunction() | |||
if(BLAS++) | |||
_display_cpp_implementation_msg("BLAS") | |||
endif() | |||
if(LAPACK++) | |||
_display_cpp_implementation_msg("LAPACK") | |||
endif() | |||
# -------------------------------------------------- | |||
# CPACK Packaging | |||
set(CPACK_PACKAGE_NAME "LAPACK") | |||
set(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd") | |||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "LAPACK- Linear Algebra Package") | |||
set(CPACK_PACKAGE_VERSION_MAJOR 3) | |||
set(CPACK_PACKAGE_VERSION_MINOR 5) | |||
set(CPACK_PACKAGE_VERSION_PATCH 0) | |||
set(CPACK_PACKAGE_VERSION_MAJOR ${LAPACK_MAJOR_VERSION}) | |||
set(CPACK_PACKAGE_VERSION_MINOR ${LAPACK_MINOR_VERSION}) | |||
set(CPACK_PACKAGE_VERSION_PATCH ${LAPACK_PATCH_VERSION}) | |||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") | |||
set(CPACK_MONOLITHIC_INSTALL ON) | |||
set(CPACK_PACKAGE_INSTALL_DIRECTORY "LAPACK") | |||
if(WIN32 AND NOT UNIX) | |||
# There is a bug in NSI that does not handle full unix paths properly. Make | |||
@@ -347,7 +387,9 @@ endif() | |||
set(_lapack_config_install_guard_target "") | |||
if(ALL_TARGETS) | |||
install(EXPORT lapack-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}) | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
# Choose one of the lapack targets to use as a guard for | |||
# lapack-config.cmake to load targets from the install tree. | |||
@@ -382,6 +424,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_D | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc | |||
DESTINATION ${PKG_CONFIG_DIR} | |||
COMPONENT Development | |||
) | |||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | |||
@@ -398,4 +441,6 @@ install(FILES | |||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake | |||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
@@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK | |||
# could be handy for archiving the generated documentation or if some version | |||
# control system is used. | |||
PROJECT_NUMBER = 3.8.0 | |||
PROJECT_NUMBER = 3.9.0 | |||
# Using the PROJECT_BRIEF tag one can provide an optional one line description | |||
# for a project that appears at the top of each page and should give viewer a | |||
@@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK | |||
# could be handy for archiving the generated documentation or if some version | |||
# control system is used. | |||
PROJECT_NUMBER = 3.8.0 | |||
PROJECT_NUMBER = 3.9.0 | |||
# Using the PROJECT_BRIEF tag one can provide an optional one line description | |||
# for a project that appears at the top of each page and should give viewer a | |||
@@ -439,39 +439,39 @@ SHELL = /bin/sh | |||
\end{quote} | |||
and it will need to be modified to \texttt{SHELL = /sbin/sh} if you are | |||
installing LAPACK on an SGI architecture. | |||
Second, you will | |||
need to modify the \texttt{PLAT} definition, which is appended to all | |||
library names, to specify the architecture to which you are installing | |||
LAPACK. This features avoids confusion in library names when you are | |||
installing LAPACK on more than one architecture. Next, you will need | |||
to modify \texttt{FORTRAN}, \texttt{OPTS}, \texttt{DRVOPTS}, \texttt{NOOPT}, \texttt{LOADER}, | |||
and \texttt{LOADOPTS} to specify | |||
Next, you will need to modify \texttt{FC}, \texttt{FFLAGS}, | |||
\texttt{FFLAGS\_DRV}, \texttt{FFLAGS\_NOOPT}, and \texttt{LDFLAGS} to specify | |||
the compiler, compiler options, compiler options for the testing and | |||
timing\footnotemark[\value{footnote}] main programs, loader, loader options. | |||
Next you will have to choose which function you will use to time in the \texttt{SECOND} and \texttt{DSECND} routines. | |||
timing\footnotemark[\value{footnote}] main programs, and linker options. | |||
Next you will have to choose which function you will use to time in the | |||
\texttt{SECOND} and \texttt{DSECND} routines. | |||
\begin{verbatim} | |||
#The Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME | |||
TIMER = EXT_ETIME | |||
# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_ | |||
# TIMER = EXT_ETIME_ | |||
# For gfortran compiler: SECOND and DSECND will use the INTERNAL FUNCTION ETIME | |||
# TIMER = INT_ETIME | |||
# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...) | |||
# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME | |||
# TIMER = INT_CPU_TIME | |||
# If neither of this works...you can use the NONE value... | |||
# In that case, SECOND and DSECND will always return 0 | |||
# TIMER = NONE | |||
# Default: SECOND and DSECND will use a call to the | |||
# EXTERNAL FUNCTION ETIME | |||
#TIMER = EXT_ETIME | |||
# For RS6K: SECOND and DSECND will use a call to the | |||
# EXTERNAL FUNCTION ETIME_ | |||
#TIMER = EXT_ETIME_ | |||
# For gfortran compiler: SECOND and DSECND will use a call to the | |||
# INTERNAL FUNCTION ETIME | |||
TIMER = INT_ETIME | |||
# If your Fortran compiler does not provide etime (like Nag Fortran | |||
# Compiler, etc...) SECOND and DSECND will use a call to the | |||
# INTERNAL FUNCTION CPU_TIME | |||
#TIMER = INT_CPU_TIME | |||
# If none of these work, you can use the NONE value. | |||
# In that case, SECOND and DSECND will always return 0. | |||
#TIMER = NONE | |||
\end{verbatim} | |||
Refer to the section~\ref{second} to get more information. | |||
Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, | |||
Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, | |||
archiver options, and ranlib for your machine. If your architecture | |||
does not require \texttt{ranlib} to be run after each archive command (as | |||
is the case with CRAY computers running UNICOS, Hewlett Packard | |||
computers running HP-UX, or SUN SPARCstations running Solaris), set | |||
\texttt{ranlib=echo}. And finally, you must | |||
\texttt{RANLIB = echo}. And finally, you must | |||
modify the \texttt{BLASLIB} definition to specify the BLAS library to which | |||
you will be linking. If an optimized version of the BLAS is available | |||
on your machine, you are highly recommended to link to that library. | |||
@@ -721,24 +721,24 @@ The version that will be used depends on the value of the TIMER variable in the | |||
\begin{itemize} | |||
\item If ETIME is available as an external function, set the value of the TIMER variable in your | |||
make.inc to \texttt{EXT\_ETIME}:\texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used. | |||
make.inc to \texttt{EXT\_ETIME}: \texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used. | |||
Usually on HPPA architectures, | |||
the compiler and loader flag \texttt{+U77} should be included to access | |||
the compiler and linker flag \texttt{+U77} should be included to access | |||
the function \texttt{ETIME}. | |||
\item If ETIME\_ is available as an external function, set the value of the TIMER variable in your make.inc | |||
to \texttt{EXT\_ETIME\_}:\texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used. | |||
to \texttt{EXT\_ETIME\_}: \texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used. | |||
It is the case on some IBM architectures such as IBM RS/6000s. | |||
\item If ETIME is available as an internal function, set the value of the TIMER variable in your make.inc | |||
to \texttt{INT\_ETIME}:\texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used. | |||
to \texttt{INT\_ETIME}: \texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used. | |||
This is the case with gfortan. | |||
\item If CPU\_TIME is available as an internal function, set the value of the TIMER variable in your make.inc | |||
to \texttt{INT\_CPU\_TIME}:\texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used. | |||
to \texttt{INT\_CPU\_TIME}: \texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used. | |||
\item If none of these function is available, set the value of the TIMER variable in your make.inc | |||
to \texttt{NONE:}\texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used. | |||
to \texttt{NONE}: \texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used. | |||
These routines will always return zero. | |||
\end{itemize} | |||
@@ -829,8 +829,8 @@ data type to the library if necessary. | |||
\end{itemize} | |||
\noindent | |||
The BLAS library is created in \texttt{LAPACK/blas\_PLAT.a}, where | |||
\texttt{PLAT} is the user-defined architecture suffix specified in the file | |||
The BLAS library is created in \texttt{LAPACK/librefblas.a}, | |||
or in the user-defined location specified by \texttt{BLASLIB} in the file | |||
\texttt{LAPACK/make.inc}. | |||
\subsection{Run the BLAS Test Programs}\label{testblas} | |||
@@ -882,8 +882,8 @@ data type to the library if necessary. | |||
\end{itemize} | |||
\noindent | |||
The LAPACK library is created in \texttt{LAPACK/lapack\_PLAT.a}, where | |||
\texttt{PLAT} is the user-defined architecture suffix specified in the file | |||
The LAPACK library is created in \texttt{LAPACK/liblapack.a}, | |||
or in the user-defined location specified by \texttt{LAPACKLIB} in the file | |||
\texttt{LAPACK/make.inc}. | |||
\subsection{Create the Test Matrix Generator Library} | |||
@@ -902,9 +902,9 @@ data type to the library if necessary. | |||
\end{itemize} | |||
\noindent | |||
The test matrix generator library is created in \texttt{LAPACK/tmglib\_PLAT.a}, | |||
where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||
file \texttt{LAPACK/make.inc}. | |||
The test matrix generator library is created in \texttt{LAPACK/libtmglib.a}, | |||
or in the user-defined location specified by \texttt{TMGLIB} in the file | |||
\texttt{LAPACK/make.inc}. | |||
\subsection{Run the LAPACK Test Programs} | |||
@@ -1114,9 +1114,7 @@ To make a library of the instrumented LAPACK routines, first | |||
go to \texttt{LAPACK/TIMING/LIN/LINSRC} and type \texttt{make} followed | |||
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | |||
The library of instrumented code is created in | |||
\texttt{LAPACK/TIMING/LIN/linsrc\_PLAT.a}, | |||
where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||
file \texttt{LAPACK/make.inc}. | |||
\texttt{LAPACK/TIMING/LIN/linsrc.a}. | |||
\end{sloppypar} | |||
\item[b)] | |||
@@ -1251,9 +1249,7 @@ To make a library of the instrumented LAPACK routines, first | |||
go to \texttt{LAPACK/TIMING/EIG/EIGSRC} and type \texttt{make} followed | |||
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}. | |||
The library of instrumented code is created in | |||
\texttt{LAPACK/TIMING/EIG/eigsrc\_PLAT.a}, | |||
where \texttt{PLAT} is the user-defined architecture suffix specified in the | |||
file \texttt{LAPACK/make.inc}. | |||
\texttt{LAPACK/TIMING/EIG/eigsrc.a}. | |||
\end{sloppypar} | |||
\item[b)] | |||
@@ -1389,7 +1385,7 @@ installing LAPACK on an SGI architecture. | |||
\section{ETIME} | |||
On HPPA architectures, | |||
the compiler and loader flag \texttt{+U77} should be included to access | |||
the compiler and linker flag \texttt{+U77} should be included to access | |||
the function \texttt{ETIME}. | |||
\section{ILAENV and IEEE-754 compliance} | |||
@@ -1494,13 +1490,13 @@ has two options: increase your stack size, or force all local variables | |||
to be allocated statically. | |||
On HPPA architectures, the | |||
compiler and loader flag \texttt{-K} should be used when compiling these testing | |||
compiler and linker flag \texttt{-K} should be used when compiling these testing | |||
and timing main programs to avoid such a stack overflow. I.e., set | |||
\texttt{DRVOPTS = -K} in the \texttt{LAPACK/make.inc} file. | |||
\texttt{FFLAGS\_DRV = -K} in the \texttt{LAPACK/make.inc} file. | |||
For similar reasons, | |||
on SGI architectures, the compiler and loader flag \texttt{-static} should be | |||
used. I.e., set \texttt{DRVOPTS = -static} in the \texttt{LAPACK/make.inc} file. | |||
on SGI architectures, the compiler and linker flag \texttt{-static} should be | |||
used. I.e., set \texttt{FFLAGS\_DRV = -static} in the \texttt{LAPACK/make.inc} file. | |||
\section{IEEE arithmetic} | |||
@@ -1,30 +1,33 @@ | |||
include ../make.inc | |||
TOPSRCDIR = .. | |||
include $(TOPSRCDIR)/make.inc | |||
.PHONY: all testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | |||
all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | |||
testlsame: lsame.o lsametst.o | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
testslamch: slamch.o lsame.o slamchtst.o | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
testdlamch: dlamch.o lsame.o dlamchtst.o | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
testsecond: second_$(TIMER).o secondtst.o | |||
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
testdsecnd: dsecnd_$(TIMER).o dsecndtst.o | |||
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
testieee: tstiee.o | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
testversion: ilaver.o LAPACK_version.o | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
.PHONY: run | |||
run: all | |||
./testlsame | |||
./testslamch | |||
@@ -34,6 +37,7 @@ run: all | |||
./testieee | |||
./testversion | |||
.PHONY: clean cleanobj cleanexe cleantest | |||
clean: cleanobj cleanexe cleantest | |||
cleanobj: | |||
rm -f *.o | |||
@@ -42,9 +46,5 @@ cleanexe: | |||
cleantest: | |||
rm -f core | |||
.SUFFIXES: .o .f | |||
.f.o: | |||
$(FORTRAN) $(OPTS) -c -o $@ $< | |||
slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< | |||
dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< | |||
slamch.o: slamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
dlamch.o: dlamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< |
@@ -10,6 +10,10 @@ | |||
* | |||
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | |||
* | |||
* .. Scalar Arguments .. | |||
* CHARACTER CMACH | |||
* .. | |||
* | |||
* | |||
*> \par Purpose: | |||
* ============= | |||
@@ -24,6 +28,7 @@ | |||
* | |||
*> \param[in] CMACH | |||
*> \verbatim | |||
*> CMACH is CHARACTER*1 | |||
*> Specifies the value to be returned by DLAMCH: | |||
*> = 'E' or 'e', DLAMCH := eps | |||
*> = 'S' or 's , DLAMCH := sfmin | |||
@@ -10,6 +10,10 @@ | |||
* | |||
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) | |||
* | |||
* .. Scalar Arguments .. | |||
* CHARACTER CMACH | |||
* .. | |||
* | |||
* | |||
*> \par Purpose: | |||
* ============= | |||
@@ -25,12 +25,15 @@ | |||
* ========== | |||
* | |||
*> \param[out] VERS_MAJOR | |||
*> VERS_MAJOR is INTEGER | |||
*> return the lapack major version | |||
*> | |||
*> \param[out] VERS_MINOR | |||
*> VERS_MINOR is INTEGER | |||
*> return the lapack minor version from the major version | |||
*> | |||
*> \param[out] VERS_PATCH | |||
*> VERS_PATCH is INTEGER | |||
*> return the lapack patch version from the minor version | |||
* | |||
* Authors: | |||
@@ -41,24 +44,23 @@ | |||
*> \author Univ. of Colorado Denver | |||
*> \author NAG Ltd. | |||
* | |||
*> \date June 2017 | |||
*> \date November 2019 | |||
* | |||
*> \ingroup auxOTHERauxiliary | |||
* | |||
* ===================================================================== | |||
SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) | |||
* | |||
* -- LAPACK computational routine (version 3.7.1) -- | |||
* -- LAPACK computational routine -- | |||
* -- LAPACK is a software package provided by Univ. of Tennessee, -- | |||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||
* June 2017 | |||
* | |||
* ===================================================================== | |||
* | |||
INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH | |||
* ===================================================================== | |||
VERS_MAJOR = 3 | |||
VERS_MINOR = 8 | |||
VERS_MINOR = 9 | |||
VERS_PATCH = 0 | |||
* ===================================================================== | |||
* | |||
@@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = cc | |||
CC = cc | |||
CFLAGS = -O4 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = f77 | |||
OPTS = -O4 -fpe1 | |||
DRVOPTS = $(OPTS) | |||
NOOPT = | |||
FC = f77 | |||
FFLAGS = -O4 -fpe1 | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = f77 | |||
LOADOPTS = | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = ranlib | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = ranlib | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -74,9 +72,9 @@ TIMER = EXT_ETIME | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
#BLASLIB = ../../librefblas.a | |||
#BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
BLASLIB = -ldxml | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = cc | |||
CC = cc | |||
CFLAGS = | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = f77 | |||
OPTS = +O4 +U77 | |||
DRVOPTS = $(OPTS) -K | |||
NOOPT = +U77 | |||
FC = f77 | |||
FFLAGS = +O4 +U77 | |||
FFLAGS_DRV = $(FFLAGS) -K | |||
FFLAGS_NOOPT = +U77 | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = f77 | |||
LOADOPTS = -Aa +U77 | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = echo | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = echo | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -74,9 +72,9 @@ TIMER = EXT_ETIME | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
#BLASLIB = ../../librefblas.a | |||
#BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
BLASLIB = -lblas | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,33 +8,30 @@ SHELL = /sbin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = cc | |||
CC = cc | |||
CFLAGS = -O3 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = f77 | |||
OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
#OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
DRVOPTS = $(OPTS) -static | |||
NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
#NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
FC = f77 | |||
FFLAGS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
#FFLAGS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
FFLAGS_DRV = $(FFLAGS) -static | |||
FFLAGS_NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
#FFLAGS_NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = f77 | |||
LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON | |||
#LOADOPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = echo | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = echo | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -78,8 +75,8 @@ TIMER = EXT_ETIME | |||
# possible.) | |||
# | |||
#BLASLIB = -lblas | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,33 +8,30 @@ SHELL = /sbin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = cc | |||
CC = cc | |||
CFLAGS = -O3 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = f77 | |||
OPTS = -O3 -64 -mips4 -r10000 | |||
#OPTS = -O3 -64 -mips4 -r10000 -mp | |||
DRVOPTS = $(OPTS) -static | |||
NOOPT = -64 -mips4 -r10000 | |||
#NOOPT = -64 -mips4 -r10000 -mp | |||
FC = f77 | |||
FFLAGS = -O3 -64 -mips4 -r10000 | |||
#FFLAGS = -O3 -64 -mips4 -r10000 -mp | |||
FFLAGS_DRV = $(FFLAGS) -static | |||
FFLAGS_NOOPT = -64 -mips4 -r10000 | |||
#FFLAGS_NOOPT = -64 -mips4 -r10000 -mp | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = f77 | |||
LOADOPTS = -O3 -64 -mips4 -r10000 | |||
#LOADOPTS = -O3 -64 -mips4 -r10000 -mp | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = echo | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = echo | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -79,8 +76,8 @@ TIMER = EXT_ETIME | |||
# | |||
BLASLIB = -lblas | |||
#BLASLIB = -lblas_mp | |||
#BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
#BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,30 +8,28 @@ SHELL = /sbin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = cc | |||
CC = cc | |||
CFLAGS = -O4 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = f77 | |||
OPTS = -O4 | |||
DRVOPTS = $(OPTS) -static | |||
NOOPT = | |||
FC = f77 | |||
FFLAGS = -O4 | |||
FFLAGS_DRV = $(FFLAGS) -static | |||
FFLAGS_NOOPT = | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = f77 | |||
LOADOPTS = | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = echo | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = echo | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||
# possible.) | |||
# | |||
#BLASLIB = -lblas | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = cc | |||
CC = cc | |||
CFLAGS = -O3 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = f77 | |||
OPTS = -dalign -O4 -fast | |||
DRVOPTS = $(OPTS) | |||
NOOPT = | |||
FC = f77 | |||
FFLAGS = -dalign -O4 -fast | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = f77 | |||
LOADOPTS = -dalign -O4 -fast | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = ranlib | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = ranlib | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||
# possible.) | |||
# | |||
#BLASLIB = -lblas | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,34 +8,31 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = cc | |||
CC = cc | |||
CFLAGS = -O3 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = f77 | |||
#OPTS = -O4 -u -f -mt | |||
#OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa | |||
OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa | |||
DRVOPTS = $(OPTS) | |||
NOOPT = -u -f | |||
#NOOPT = -u -f -mt | |||
FC = f77 | |||
#FFLAGS = -O4 -u -f -mt | |||
#FFLAGS = -u -f -dalign -native -xO5 -xarch=v8plusa | |||
FFLAGS = -u -f -dalign -native -xO2 -xarch=v8plusa | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = -u -f | |||
#FFLAGS_NOOPT = -u -f -mt | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = f77 | |||
#LOADOPTS = -mt | |||
LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = echo | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = echo | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -78,10 +75,10 @@ TIMER = EXT_ETIME | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
#BLASLIB = ../../librefblas.a | |||
#BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
#BLASLIB = -xlic_lib=sunperf_mt | |||
BLASLIB = -xlic_lib=sunperf | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,31 +8,29 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = xlc | |||
CC = xlc | |||
CFLAGS = -O3 -qnosave | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = xlf | |||
OPTS = -O3 -qfixed -qnosave | |||
FC = xlf | |||
FFLAGS = -O3 -qfixed -qnosave | |||
# For -O2, add -qstrict=none | |||
DRVOPTS = $(OPTS) | |||
NOOPT = -O0 -qfixed -qnosave | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = -O0 -qfixed -qnosave | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = xlf | |||
LOADOPTS = -qnosave | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = ranlib | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = ranlib | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -75,9 +73,9 @@ TIMER = EXT_ETIME_ | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
#BLASLIB = ../../librefblas.a | |||
#BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
BLASLIB = -lessl | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,10 +8,10 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = gcc | |||
CC = gcc | |||
CFLAGS = -O3 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
@@ -19,23 +19,21 @@ CFLAGS = -O3 | |||
# and handle these quantities appropriately. As a consequence, one | |||
# should not compile LAPACK with flags such as -ffpe-trap=overflow. | |||
# | |||
FORTRAN = gfortran | |||
OPTS = -O2 -frecursive | |||
DRVOPTS = $(OPTS) | |||
NOOPT = -O0 -frecursive | |||
FC = gfortran | |||
FFLAGS = -O2 -frecursive | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = -O0 -frecursive | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = gfortran | |||
LOADOPTS = | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = ranlib | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = ranlib | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -78,8 +76,8 @@ TIMER = INT_ETIME | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,10 +8,10 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = gcc | |||
CC = gcc | |||
CFLAGS = -g | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
@@ -19,23 +19,21 @@ CFLAGS = -g | |||
# and handle these quantities appropriately. As a consequence, one | |||
# should not compile LAPACK with flags such as -ffpe-trap=overflow. | |||
# | |||
FORTRAN = gfortran -fimplicit-none -g -frecursive | |||
OPTS = | |||
DRVOPTS = $(OPTS) | |||
NOOPT = -g -O0 -frecursive | |||
FC = gfortran | |||
FFLAGS = -fimplicit-none -g -frecursive | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = $(FFLAGS) -O0 | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = gfortran -g | |||
LOADOPTS = | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = ranlib | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = ranlib | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -78,8 +76,8 @@ TIMER = INT_CPU_TIME | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = icc | |||
CC = icc | |||
CFLAGS = -O3 | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = ifort | |||
OPTS = -O3 -fp-model strict -assume protect_parens | |||
DRVOPTS = $(OPTS) | |||
NOOPT = -O0 -fp-model strict -assume protect_parens | |||
FC = ifort | |||
FFLAGS = -O3 -fp-model strict -assume protect_parens | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = -O0 -fp-model strict -assume protect_parens | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = ifort | |||
LOADOPTS = | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = ranlib | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = ranlib | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -74,8 +72,8 @@ TIMER = EXT_ETIME | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = pgcc | |||
CC = pgcc | |||
CFLAGS = | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = pgf95 | |||
OPTS = -O3 | |||
DRVOPTS = $(OPTS) | |||
NOOPT = -O0 | |||
FC = pgf95 | |||
FFLAGS = -O3 | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = -O0 | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = $(FORTRAN) | |||
LOADOPTS = | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = echo | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = echo | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -74,8 +72,8 @@ TIMER = INT_CPU_TIME | |||
# machine-specific, optimized BLAS library should be used whenever | |||
# possible.) | |||
# | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -8,30 +8,28 @@ SHELL = /bin/sh | |||
# CC is the C compiler, normally invoked with options CFLAGS. | |||
# | |||
CC = pghpc | |||
CC = pghpc | |||
CFLAGS = | |||
# Modify the FORTRAN and OPTS definitions to refer to the compiler | |||
# Modify the FC and FFLAGS definitions to the desired compiler | |||
# and desired compiler options for your machine. NOOPT refers to | |||
# the compiler options desired when NO OPTIMIZATION is selected. | |||
# | |||
FORTRAN = pghpf | |||
OPTS = -O4 -Mnohpfc -Mdclchk | |||
DRVOPTS = $(OPTS) | |||
NOOPT = -Mnohpfc -Mdclchk | |||
FC = pghpf | |||
FFLAGS = -O4 -Mnohpfc -Mdclchk | |||
FFLAGS_DRV = $(FFLAGS) | |||
FFLAGS_NOOPT = -Mnohpfc -Mdclchk | |||
# Define LOADER and LOADOPTS to refer to the loader and desired | |||
# load options for your machine. | |||
# Define LDFLAGS to the desired linker options for your machine. | |||
# | |||
LOADER = pghpf | |||
LOADOPTS = | |||
LDFLAGS = | |||
# The archiver and the flag(s) to use when building an archive | |||
# (library). If your system has no ranlib, set RANLIB = echo. | |||
# | |||
ARCH = ar | |||
ARCHFLAGS = cr | |||
RANLIB = echo | |||
AR = ar | |||
ARFLAGS = cr | |||
RANLIB = echo | |||
# Timer for the SECOND and DSECND routines | |||
# | |||
@@ -75,8 +73,8 @@ TIMER = EXT_ETIME | |||
# possible.) | |||
# | |||
#BLASLIB = -lessl | |||
BLASLIB = ../../librefblas.a | |||
CBLASLIB = ../../libcblas.a | |||
LAPACKLIB = liblapack.a | |||
TMGLIB = libtmglib.a | |||
LAPACKELIB = liblapacke.a | |||
BLASLIB = $(TOPSRCDIR)/librefblas.a | |||
CBLASLIB = $(TOPSRCDIR)/libcblas.a | |||
LAPACKLIB = $(TOPSRCDIR)/liblapack.a | |||
TMGLIB = $(TOPSRCDIR)/libtmglib.a | |||
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a |
@@ -28,6 +28,7 @@ | |||
* | |||
*> \param[in] CMACH | |||
*> \verbatim | |||
*> CMACH is CHARACTER*1 | |||
*> Specifies the value to be returned by SLAMCH: | |||
*> = 'E' or 'e', SLAMCH := eps | |||
*> = 'S' or 's , SLAMCH := sfmin | |||
@@ -16,18 +16,16 @@ if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND) | |||
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h) | |||
endif() | |||
if(WIN32 AND NOT UNIX) | |||
add_definitions(-DHAVE_LAPACK_CONFIG_H -DLAPACK_COMPLEX_STRUCTURE) | |||
message(STATUS "Windows BUILD") | |||
endif() | |||
get_directory_property(DirDefs COMPILE_DEFINITIONS) | |||
include_directories(include ${LAPACK_BINARY_DIR}/include) | |||
add_subdirectory(include) | |||
add_subdirectory(src) | |||
add_subdirectory(utils) | |||
option(LAPACKE_BUILD_SINGLE "Build LAPACKE single precision real" ON) | |||
option(LAPACKE_BUILD_DOUBLE "Build LAPACKE double precision real" ON) | |||
option(LAPACKE_BUILD_COMPLEX "Build LAPACKE single precision complex" ON) | |||
option(LAPACKE_BUILD_COMPLEX16 "Build LAPACKE double precision complex" ON) | |||
macro(append_subdir_files variable dirname) | |||
get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable}) | |||
foreach(depfile ${holder}) | |||
@@ -35,8 +33,29 @@ macro(append_subdir_files variable dirname) | |||
endforeach() | |||
endmacro() | |||
message(STATUS "Build LAPACKE single precision real: ${LAPACKE_BUILD_SINGLE}") | |||
message(STATUS "Build LAPACKE double precision real: ${LAPACKE_BUILD_DOUBLE}") | |||
message(STATUS "Build LAPACKE single precision complex: ${LAPACKE_BUILD_COMPLEX}") | |||
message(STATUS "Build LAPACKE double precision complex: ${LAPACKE_BUILD_COMPLEX16}") | |||
append_subdir_files(LAPACKE_INCLUDE "include") | |||
append_subdir_files(SOURCES "src") | |||
if (LAPACKE_BUILD_SINGLE) | |||
append_subdir_files(SOURCES_SINGLE "src") | |||
list(APPEND SOURCES ${SOURCES_SINGLE}) | |||
endif() | |||
if (LAPACKE_BUILD_DOUBLE) | |||
append_subdir_files(SOURCES_DOUBLE "src") | |||
list(APPEND SOURCES ${SOURCES_DOUBLE}) | |||
endif() | |||
if (LAPACKE_BUILD_COMPLEX) | |||
append_subdir_files(SOURCES_COMPLEX "src") | |||
list(APPEND SOURCES ${SOURCES_COMPLEX}) | |||
endif() | |||
if (LAPACKE_BUILD_COMPLEX16) | |||
append_subdir_files(SOURCES_COMPLEX16 "src") | |||
list(APPEND SOURCES ${SOURCES_COMPLEX16}) | |||
endif() | |||
append_subdir_files(DEPRECATED "src") | |||
append_subdir_files(EXTENDED "src") | |||
append_subdir_files(MATGEN "src") | |||
@@ -61,9 +80,13 @@ set_target_properties( | |||
SOVERSION ${LAPACK_MAJOR_VERSION} | |||
) | |||
target_include_directories(lapacke PUBLIC | |||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> | |||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | |||
$<INSTALL_INTERFACE:include> | |||
) | |||
if(WIN32 AND NOT UNIX) | |||
target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||
message(STATUS "Windows BUILD") | |||
endif() | |||
if(LAPACKE_WITH_TMG) | |||
target_link_libraries(lapacke PRIVATE tmglib) | |||
@@ -71,7 +94,11 @@ endif() | |||
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) | |||
lapack_install_library(lapacke) | |||
install(FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
install( | |||
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h | |||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||
COMPONENT Development | |||
) | |||
if(BUILD_TESTING) | |||
add_subdirectory(example) | |||
@@ -82,6 +109,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_ | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc | |||
DESTINATION ${PKG_CONFIG_DIR} | |||
COMPONENT Development | |||
) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | |||
@@ -95,7 +123,10 @@ install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake | |||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
install(EXPORT lapacke-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}) | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) |
@@ -40,22 +40,26 @@ | |||
# To clean everything including lapacke library type | |||
# 'make cleanall' | |||
# | |||
include ../make.inc | |||
TOPSRCDIR = .. | |||
include $(TOPSRCDIR)/make.inc | |||
.PHONY: all | |||
all: lapacke | |||
.PHONY: lapacke | |||
lapacke: include/lapacke_mangling.h | |||
$(MAKE) -C src | |||
$(MAKE) -C utils | |||
include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in | |||
cp $< $@ | |||
cp include/lapacke_mangling_with_flags.h.in $@ | |||
.PHONY: lapacke_example | |||
lapacke_example: lapacke | |||
$(MAKE) -C example | |||
#clean: cleanlib | |||
clean: cleanobj | |||
.PHONY: clean cleanobj cleanlib cleanexe | |||
clean: | |||
$(MAKE) -C src clean | |||
$(MAKE) -C utils clean | |||
$(MAKE) -C example clean | |||
@@ -64,6 +68,6 @@ cleanobj: | |||
$(MAKE) -C utils cleanobj | |||
$(MAKE) -C example cleanobj | |||
cleanlib: | |||
rm -f ../$(LAPACKELIB) | |||
$(MAKE) -C src cleanlib | |||
cleanexe: | |||
$(MAKE) -C example cleanexe |
@@ -7,8 +7,11 @@ if(NOT TARGET lapacke) | |||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
endif() | |||
# Hint for project building against lapack | |||
set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
# Report lapacke header search locations from build tree. | |||
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | |||
# Report lapacke libraries. | |||
set(LAPACKE_LIBRARIES lapacke) | |||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) |
@@ -13,11 +13,14 @@ if(NOT TARGET lapacke) | |||
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) | |||
endif() | |||
# Hint for project building against lapack | |||
set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
# Report lapacke header search locations. | |||
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | |||
# Report lapacke libraries. | |||
set(LAPACKE_LIBRARIES lapacke) | |||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||
unset(_LAPACKE_PREFIX) | |||
unset(_LAPACKE_SELF_DIR) |
@@ -1,34 +1,38 @@ | |||
include ../../make.inc | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
.SUFFIXES: .c .o | |||
.c.o: | |||
$(CC) $(CFLAGS) -I. -I../include -c -o $@ $< | |||
.PHONY: all | |||
all: xexample_DGESV_rowmajor \ | |||
xexample_DGESV_colmajor \ | |||
xexample_DGELS_rowmajor \ | |||
xexample_DGELS_colmajor | |||
LIBRARIES = ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) | |||
LIBRARIES = $(LAPACKELIB) $(LAPACKLIB) $(BLASLIB) | |||
# Double Precision Examples | |||
xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
./$@ | |||
xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
./$@ | |||
xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
./$@ | |||
xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES) | |||
$(LOADER) $(LOADOPTS) -o $@ $^ | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
./$@ | |||
.PHONY: clean cleanobj cleanexe | |||
clean: cleanobj cleanexe | |||
cleanobj: | |||
rm -f *.o | |||
cleanexe: | |||
rm -f x* | |||
.c.o: | |||
$(CC) $(CFLAGS) -I. -I../include -c -o $@ $< |
@@ -1,3 +1,3 @@ | |||
set(LAPACKE_INCLUDE lapacke.h lapacke_config.h lapacke_utils.h) | |||
set(LAPACKE_INCLUDE lapacke.h lapack.h lapacke_config.h lapacke_utils.h) | |||
file(COPY ${LAPACKE_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include) |
@@ -1,4 +1,4 @@ | |||
set(SOURCES | |||
set(SOURCES_COMPLEX | |||
lapacke_cbbcsd.c | |||
lapacke_cbbcsd_work.c | |||
lapacke_cbdsqr.c | |||
@@ -78,11 +78,11 @@ lapacke_cgeqrf_work.c | |||
lapacke_cgeqrfp.c | |||
lapacke_cgeqrfp_work.c | |||
lapacke_cgeqrt.c | |||
lapacke_cgeqrt_work.c | |||
lapacke_cgeqrt2.c | |||
lapacke_cgeqrt2_work.c | |||
lapacke_cgeqrt3.c | |||
lapacke_cgeqrt3_work.c | |||
lapacke_cgeqrt_work.c | |||
lapacke_cgerfs.c | |||
lapacke_cgerfs_work.c | |||
lapacke_cgerqf.c | |||
@@ -93,6 +93,8 @@ lapacke_cgesv.c | |||
lapacke_cgesv_work.c | |||
lapacke_cgesvd.c | |||
lapacke_cgesvd_work.c | |||
lapacke_cgesvdq.c | |||
lapacke_cgesvdq_work.c | |||
lapacke_cgesvdx.c | |||
lapacke_cgesvdx_work.c | |||
lapacke_cgesvj.c | |||
@@ -129,10 +131,10 @@ lapacke_cggevx.c | |||
lapacke_cggevx_work.c | |||
lapacke_cggglm.c | |||
lapacke_cggglm_work.c | |||
lapacke_cgghrd.c | |||
lapacke_cgghrd_work.c | |||
lapacke_cgghd3.c | |||
lapacke_cgghd3_work.c | |||
lapacke_cgghrd.c | |||
lapacke_cgghrd_work.c | |||
lapacke_cgglse.c | |||
lapacke_cgglse_work.c | |||
lapacke_cggqrf.c | |||
@@ -157,14 +159,14 @@ lapacke_cgttrs.c | |||
lapacke_cgttrs_work.c | |||
lapacke_chbev.c | |||
lapacke_chbev_work.c | |||
lapacke_chbevd.c | |||
lapacke_chbevd_work.c | |||
lapacke_chbevx.c | |||
lapacke_chbevx_work.c | |||
lapacke_chbev_2stage.c | |||
lapacke_chbev_2stage_work.c | |||
lapacke_chbevd.c | |||
lapacke_chbevd_work.c | |||
lapacke_chbevd_2stage.c | |||
lapacke_chbevd_2stage_work.c | |||
lapacke_chbevx.c | |||
lapacke_chbevx_work.c | |||
lapacke_chbevx_2stage.c | |||
lapacke_chbevx_2stage_work.c | |||
lapacke_chbgst.c | |||
@@ -185,18 +187,18 @@ lapacke_cheequb.c | |||
lapacke_cheequb_work.c | |||
lapacke_cheev.c | |||
lapacke_cheev_work.c | |||
lapacke_cheevd.c | |||
lapacke_cheevd_work.c | |||
lapacke_cheevr.c | |||
lapacke_cheevr_work.c | |||
lapacke_cheevx.c | |||
lapacke_cheevx_work.c | |||
lapacke_cheev_2stage.c | |||
lapacke_cheev_2stage_work.c | |||
lapacke_cheevd.c | |||
lapacke_cheevd_work.c | |||
lapacke_cheevd_2stage.c | |||
lapacke_cheevd_2stage_work.c | |||
lapacke_cheevr.c | |||
lapacke_cheevr_work.c | |||
lapacke_cheevr_2stage.c | |||
lapacke_cheevr_2stage_work.c | |||
lapacke_cheevx.c | |||
lapacke_cheevx_work.c | |||
lapacke_cheevx_2stage.c | |||
lapacke_cheevx_2stage_work.c | |||
lapacke_chegst.c | |||
@@ -214,8 +216,8 @@ lapacke_cherfs_work.c | |||
lapacke_chesv.c | |||
lapacke_chesv_work.c | |||
lapacke_chesv_aa.c | |||
lapacke_chesv_aa_2stage.c | |||
lapacke_chesv_aa_work.c | |||
lapacke_chesv_aa_2stage.c | |||
lapacke_chesv_aa_2stage_work.c | |||
lapacke_chesv_rk.c | |||
lapacke_chesv_rk_work.c | |||
@@ -226,35 +228,35 @@ lapacke_cheswapr_work.c | |||
lapacke_chetrd.c | |||
lapacke_chetrd_work.c | |||
lapacke_chetrf.c | |||
lapacke_chetrf_rook.c | |||
lapacke_chetrf_work.c | |||
lapacke_chetrf_rook_work.c | |||
lapacke_chetrf_aa.c | |||
lapacke_chetrf_aa_2stage.c | |||
lapacke_chetrf_aa_work.c | |||
lapacke_chetrf_aa_2stage.c | |||
lapacke_chetrf_aa_2stage_work.c | |||
lapacke_chetrf_rk.c | |||
lapacke_chetrf_rk_work.c | |||
lapacke_chetrf_rook.c | |||
lapacke_chetrf_rook_work.c | |||
lapacke_chetri.c | |||
lapacke_chetri_work.c | |||
lapacke_chetri2.c | |||
lapacke_chetri2_work.c | |||
lapacke_chetri_3.c | |||
lapacke_chetri_3_work.c | |||
lapacke_chetri2x.c | |||
lapacke_chetri2x_work.c | |||
lapacke_chetri_work.c | |||
lapacke_chetri_3.c | |||
lapacke_chetri_3_work.c | |||
lapacke_chetrs.c | |||
lapacke_chetrs_rook.c | |||
lapacke_chetrs_work.c | |||
lapacke_chetrs2.c | |||
lapacke_chetrs2_work.c | |||
lapacke_chetrs_work.c | |||
lapacke_chetrs_rook_work.c | |||
lapacke_chetrs_3.c | |||
lapacke_chetrs_3_work.c | |||
lapacke_chetrs_aa.c | |||
lapacke_chetrs_aa_2stage.c | |||
lapacke_chetrs_aa_work.c | |||
lapacke_chetrs_aa_2stage.c | |||
lapacke_chetrs_aa_2stage_work.c | |||
lapacke_chetrs_3.c | |||
lapacke_chetrs_3_work.c | |||
lapacke_chetrs_rook.c | |||
lapacke_chetrs_rook_work.c | |||
lapacke_chfrk.c | |||
lapacke_chfrk_work.c | |||
lapacke_chgeqz.c | |||
@@ -445,52 +447,54 @@ lapacke_csyconv.c | |||
lapacke_csyconv_work.c | |||
lapacke_csyequb.c | |||
lapacke_csyequb_work.c | |||
lapacke_csyr.c | |||
lapacke_csyr_work.c | |||
lapacke_csyrfs.c | |||
lapacke_csyrfs_work.c | |||
lapacke_csysv.c | |||
lapacke_csysv_rook.c | |||
lapacke_csysv_rook_work.c | |||
lapacke_csysv_work.c | |||
lapacke_csysv_aa.c | |||
lapacke_csysv_aa_2stage.c | |||
lapacke_csysv_aa_work.c | |||
lapacke_csysv_aa_2stage.c | |||
lapacke_csysv_aa_2stage_work.c | |||
lapacke_csysv_rk.c | |||
lapacke_csysv_rk_work.c | |||
lapacke_csysv_rook.c | |||
lapacke_csysv_rook_work.c | |||
lapacke_csysvx.c | |||
lapacke_csysvx_work.c | |||
lapacke_csyswapr.c | |||
lapacke_csyswapr_work.c | |||
lapacke_csytrf.c | |||
lapacke_csytrf_work.c | |||
lapacke_csytrf_rook.c | |||
lapacke_csytrf_rook_work.c | |||
lapacke_csytrf_aa.c | |||
lapacke_csytrf_aa_2stage.c | |||
lapacke_csytrf_aa_work.c | |||
lapacke_csytrf_aa_2stage.c | |||
lapacke_csytrf_aa_2stage_work.c | |||
lapacke_csytrf_rk.c | |||
lapacke_csytrf_rk_work.c | |||
lapacke_csytrf_rook.c | |||
lapacke_csytrf_rook_work.c | |||
lapacke_csytri.c | |||
lapacke_csytri_work.c | |||
lapacke_csytri2.c | |||
lapacke_csytri2_work.c | |||
lapacke_csytri_3.c | |||
lapacke_csytri_3_work.c | |||
lapacke_csytri2x.c | |||
lapacke_csytri2x_work.c | |||
lapacke_csytri_work.c | |||
lapacke_csytri_3.c | |||
lapacke_csytri_3_work.c | |||
lapacke_csytrs.c | |||
lapacke_csytrs_rook.c | |||
lapacke_csytrs_work.c | |||
lapacke_csytrs2.c | |||
lapacke_csytrs2_work.c | |||
lapacke_csytrs_work.c | |||
lapacke_csytrs_rook_work.c | |||
lapacke_csytrs_3.c | |||
lapacke_csytrs_3_work.c | |||
lapacke_csytrs_aa.c | |||
lapacke_csytrs_aa_2stage.c | |||
lapacke_csytrs_aa_work.c | |||
lapacke_csytrs_aa_2stage.c | |||
lapacke_csytrs_aa_2stage_work.c | |||
lapacke_csytrs_3.c | |||
lapacke_csytrs_3_work.c | |||
lapacke_csytrs_rook.c | |||
lapacke_csytrs_rook_work.c | |||
lapacke_ctbcon.c | |||
lapacke_ctbcon_work.c | |||
lapacke_ctbrfs.c | |||
@@ -522,9 +526,9 @@ lapacke_ctpcon_work.c | |||
lapacke_ctpmqrt.c | |||
lapacke_ctpmqrt_work.c | |||
lapacke_ctpqrt.c | |||
lapacke_ctpqrt_work.c | |||
lapacke_ctpqrt2.c | |||
lapacke_ctpqrt2_work.c | |||
lapacke_ctpqrt_work.c | |||
lapacke_ctprfb.c | |||
lapacke_ctprfb_work.c | |||
lapacke_ctprfs.c | |||
@@ -601,14 +605,16 @@ lapacke_cupgtr.c | |||
lapacke_cupgtr_work.c | |||
lapacke_cupmtr.c | |||
lapacke_cupmtr_work.c | |||
) | |||
set(SOURCES_DOUBLE | |||
lapacke_dbbcsd.c | |||
lapacke_dbbcsd_work.c | |||
lapacke_dbdsdc.c | |||
lapacke_dbdsdc_work.c | |||
lapacke_dbdsvdx.c | |||
lapacke_dbdsvdx_work.c | |||
lapacke_dbdsqr.c | |||
lapacke_dbdsqr_work.c | |||
lapacke_dbdsvdx.c | |||
lapacke_dbdsvdx_work.c | |||
lapacke_ddisna.c | |||
lapacke_ddisna_work.c | |||
lapacke_dgbbrd.c | |||
@@ -686,11 +692,11 @@ lapacke_dgeqrf_work.c | |||
lapacke_dgeqrfp.c | |||
lapacke_dgeqrfp_work.c | |||
lapacke_dgeqrt.c | |||
lapacke_dgeqrt_work.c | |||
lapacke_dgeqrt2.c | |||
lapacke_dgeqrt2_work.c | |||
lapacke_dgeqrt3.c | |||
lapacke_dgeqrt3_work.c | |||
lapacke_dgeqrt_work.c | |||
lapacke_dgerfs.c | |||
lapacke_dgerfs_work.c | |||
lapacke_dgerqf.c | |||
@@ -701,6 +707,8 @@ lapacke_dgesv.c | |||
lapacke_dgesv_work.c | |||
lapacke_dgesvd.c | |||
lapacke_dgesvd_work.c | |||
lapacke_dgesvdq.c | |||
lapacke_dgesvdq_work.c | |||
lapacke_dgesvdx.c | |||
lapacke_dgesvdx_work.c | |||
lapacke_dgesvj.c | |||
@@ -737,10 +745,10 @@ lapacke_dggevx.c | |||
lapacke_dggevx_work.c | |||
lapacke_dggglm.c | |||
lapacke_dggglm_work.c | |||
lapacke_dgghrd.c | |||
lapacke_dgghrd_work.c | |||
lapacke_dgghd3.c | |||
lapacke_dgghd3_work.c | |||
lapacke_dgghrd.c | |||
lapacke_dgghrd_work.c | |||
lapacke_dgglse.c | |||
lapacke_dgglse_work.c | |||
lapacke_dggqrf.c | |||
@@ -823,10 +831,10 @@ lapacke_dopmtr.c | |||
lapacke_dopmtr_work.c | |||
lapacke_dorbdb.c | |||
lapacke_dorbdb_work.c | |||
lapacke_dorcsd2by1.c | |||
lapacke_dorcsd2by1_work.c | |||
lapacke_dorcsd.c | |||
lapacke_dorcsd_work.c | |||
lapacke_dorcsd2by1.c | |||
lapacke_dorcsd2by1_work.c | |||
lapacke_dorgbr.c | |||
lapacke_dorgbr_work.c | |||
lapacke_dorghr.c | |||
@@ -933,14 +941,14 @@ lapacke_dpttrs.c | |||
lapacke_dpttrs_work.c | |||
lapacke_dsbev.c | |||
lapacke_dsbev_work.c | |||
lapacke_dsbevd.c | |||
lapacke_dsbevd_work.c | |||
lapacke_dsbevx.c | |||
lapacke_dsbevx_work.c | |||
lapacke_dsbev_2stage.c | |||
lapacke_dsbev_2stage_work.c | |||
lapacke_dsbevd.c | |||
lapacke_dsbevd_work.c | |||
lapacke_dsbevd_2stage.c | |||
lapacke_dsbevd_2stage_work.c | |||
lapacke_dsbevx.c | |||
lapacke_dsbevx_work.c | |||
lapacke_dsbevx_2stage.c | |||
lapacke_dsbevx_2stage_work.c | |||
lapacke_dsbgst.c | |||
@@ -1021,18 +1029,18 @@ lapacke_dsyequb.c | |||
lapacke_dsyequb_work.c | |||
lapacke_dsyev.c | |||
lapacke_dsyev_work.c | |||
lapacke_dsyevd.c | |||
lapacke_dsyevd_work.c | |||
lapacke_dsyevr.c | |||
lapacke_dsyevr_work.c | |||
lapacke_dsyevx.c | |||
lapacke_dsyevx_work.c | |||
lapacke_dsyev_2stage.c | |||
lapacke_dsyev_2stage_work.c | |||
lapacke_dsyevd.c | |||
lapacke_dsyevd_work.c | |||
lapacke_dsyevd_2stage.c | |||
lapacke_dsyevd_2stage_work.c | |||
lapacke_dsyevr.c | |||
lapacke_dsyevr_work.c | |||
lapacke_dsyevr_2stage.c | |||
lapacke_dsyevr_2stage_work.c | |||
lapacke_dsyevx.c | |||
lapacke_dsyevx_work.c | |||
lapacke_dsyevx_2stage.c | |||
lapacke_dsyevx_2stage_work.c | |||
lapacke_dsygst.c | |||
@@ -1048,15 +1056,15 @@ lapacke_dsygvx_work.c | |||
lapacke_dsyrfs.c | |||
lapacke_dsyrfs_work.c | |||
lapacke_dsysv.c | |||
lapacke_dsysv_rook.c | |||
lapacke_dsysv_rook_work.c | |||
lapacke_dsysv_work.c | |||
lapacke_dsysv_aa.c | |||
lapacke_dsysv_aa_2stage.c | |||
lapacke_dsysv_aa_work.c | |||
lapacke_dsysv_aa_2stage.c | |||
lapacke_dsysv_aa_2stage_work.c | |||
lapacke_dsysv_rk.c | |||
lapacke_dsysv_rk_work.c | |||
lapacke_dsysv_rook.c | |||
lapacke_dsysv_rook_work.c | |||
lapacke_dsysvx.c | |||
lapacke_dsysvx_work.c | |||
lapacke_dsyswapr.c | |||
@@ -1065,33 +1073,33 @@ lapacke_dsytrd.c | |||
lapacke_dsytrd_work.c | |||
lapacke_dsytrf.c | |||
lapacke_dsytrf_work.c | |||
lapacke_dsytrf_rook.c | |||
lapacke_dsytrf_rook_work.c | |||
lapacke_dsytrf_aa.c | |||
lapacke_dsytrf_aa_2stage.c | |||
lapacke_dsytrf_aa_work.c | |||
lapacke_dsytrf_aa_2stage.c | |||
lapacke_dsytrf_aa_2stage_work.c | |||
lapacke_dsytrf_rk.c | |||
lapacke_dsytrf_rk_work.c | |||
lapacke_dsytrf_rook.c | |||
lapacke_dsytrf_rook_work.c | |||
lapacke_dsytri.c | |||
lapacke_dsytri_work.c | |||
lapacke_dsytri2.c | |||
lapacke_dsytri2_work.c | |||
lapacke_dsytri_3.c | |||
lapacke_dsytri_3_work.c | |||
lapacke_dsytri2x.c | |||
lapacke_dsytri2x_work.c | |||
lapacke_dsytri_work.c | |||
lapacke_dsytri_3.c | |||
lapacke_dsytri_3_work.c | |||
lapacke_dsytrs.c | |||
lapacke_dsytrs_rook.c | |||
lapacke_dsytrs_work.c | |||
lapacke_dsytrs2.c | |||
lapacke_dsytrs2_work.c | |||
lapacke_dsytrs_3.c | |||
lapacke_dsytrs_3_work.c | |||
lapacke_dsytrs_aa.c | |||
lapacke_dsytrs_aa_2stage.c | |||
lapacke_dsytrs_aa_work.c | |||
lapacke_dsytrs_aa_2stage.c | |||
lapacke_dsytrs_aa_2stage_work.c | |||
lapacke_dsytrs_3.c | |||
lapacke_dsytrs_3_work.c | |||
lapacke_dsytrs_work.c | |||
lapacke_dsytrs_rook.c | |||
lapacke_dsytrs_rook_work.c | |||
lapacke_dtbcon.c | |||
lapacke_dtbcon_work.c | |||
@@ -1124,9 +1132,9 @@ lapacke_dtpcon_work.c | |||
lapacke_dtpmqrt.c | |||
lapacke_dtpmqrt_work.c | |||
lapacke_dtpqrt.c | |||
lapacke_dtpqrt_work.c | |||
lapacke_dtpqrt2.c | |||
lapacke_dtpqrt2_work.c | |||
lapacke_dtpqrt_work.c | |||
lapacke_dtprfb.c | |||
lapacke_dtprfb_work.c | |||
lapacke_dtprfs.c | |||
@@ -1163,15 +1171,21 @@ lapacke_dtrttp.c | |||
lapacke_dtrttp_work.c | |||
lapacke_dtzrzf.c | |||
lapacke_dtzrzf_work.c | |||
) | |||
set(SOURCES | |||
lapacke_nancheck.c | |||
lapacke_ilaver.c | |||
) | |||
set(SOURCES_SINGLE | |||
lapacke_sbbcsd.c | |||
lapacke_sbbcsd_work.c | |||
lapacke_sbdsdc.c | |||
lapacke_sbdsdc_work.c | |||
lapacke_sbdsvdx.c | |||
lapacke_sbdsvdx_work.c | |||
lapacke_sbdsqr.c | |||
lapacke_sbdsqr_work.c | |||
lapacke_sbdsvdx.c | |||
lapacke_sbdsvdx_work.c | |||
lapacke_sdisna.c | |||
lapacke_sdisna_work.c | |||
lapacke_sgbbrd.c | |||
@@ -1249,11 +1263,11 @@ lapacke_sgeqrf_work.c | |||
lapacke_sgeqrfp.c | |||
lapacke_sgeqrfp_work.c | |||
lapacke_sgeqrt.c | |||
lapacke_sgeqrt_work.c | |||
lapacke_sgeqrt2.c | |||
lapacke_sgeqrt2_work.c | |||
lapacke_sgeqrt3.c | |||
lapacke_sgeqrt3_work.c | |||
lapacke_sgeqrt_work.c | |||
lapacke_sgerfs.c | |||
lapacke_sgerfs_work.c | |||
lapacke_sgerqf.c | |||
@@ -1264,6 +1278,8 @@ lapacke_sgesv.c | |||
lapacke_sgesv_work.c | |||
lapacke_sgesvd.c | |||
lapacke_sgesvd_work.c | |||
lapacke_sgesvdq.c | |||
lapacke_sgesvdq_work.c | |||
lapacke_sgesvdx.c | |||
lapacke_sgesvdx_work.c | |||
lapacke_sgesvj.c | |||
@@ -1300,10 +1316,10 @@ lapacke_sggevx.c | |||
lapacke_sggevx_work.c | |||
lapacke_sggglm.c | |||
lapacke_sggglm_work.c | |||
lapacke_sgghrd.c | |||
lapacke_sgghrd_work.c | |||
lapacke_sgghd3.c | |||
lapacke_sgghd3_work.c | |||
lapacke_sgghrd.c | |||
lapacke_sgghrd_work.c | |||
lapacke_sgglse.c | |||
lapacke_sgglse_work.c | |||
lapacke_sggqrf.c | |||
@@ -1496,14 +1512,14 @@ lapacke_spttrs.c | |||
lapacke_spttrs_work.c | |||
lapacke_ssbev.c | |||
lapacke_ssbev_work.c | |||
lapacke_ssbevd.c | |||
lapacke_ssbevd_work.c | |||
lapacke_ssbevx.c | |||
lapacke_ssbevx_work.c | |||
lapacke_ssbev_2stage.c | |||
lapacke_ssbev_2stage_work.c | |||
lapacke_ssbevd.c | |||
lapacke_ssbevd_work.c | |||
lapacke_ssbevd_2stage.c | |||
lapacke_ssbevd_2stage_work.c | |||
lapacke_ssbevx.c | |||
lapacke_ssbevx_work.c | |||
lapacke_ssbevx_2stage.c | |||
lapacke_ssbevx_2stage_work.c | |||
lapacke_ssbgst.c | |||
@@ -1580,18 +1596,18 @@ lapacke_ssyequb.c | |||
lapacke_ssyequb_work.c | |||
lapacke_ssyev.c | |||
lapacke_ssyev_work.c | |||
lapacke_ssyevd.c | |||
lapacke_ssyevd_work.c | |||
lapacke_ssyevr.c | |||
lapacke_ssyevr_work.c | |||
lapacke_ssyevx.c | |||
lapacke_ssyevx_work.c | |||
lapacke_ssyev_2stage.c | |||
lapacke_ssyev_2stage_work.c | |||
lapacke_ssyevd.c | |||
lapacke_ssyevd_work.c | |||
lapacke_ssyevd_2stage.c | |||
lapacke_ssyevd_2stage_work.c | |||
lapacke_ssyevr.c | |||
lapacke_ssyevr_work.c | |||
lapacke_ssyevr_2stage.c | |||
lapacke_ssyevr_2stage_work.c | |||
lapacke_ssyevx.c | |||
lapacke_ssyevx_work.c | |||
lapacke_ssyevx_2stage.c | |||
lapacke_ssyevx_2stage_work.c | |||
lapacke_ssygst.c | |||
@@ -1607,8 +1623,6 @@ lapacke_ssygvx_work.c | |||
lapacke_ssyrfs.c | |||
lapacke_ssyrfs_work.c | |||
lapacke_ssysv.c | |||
lapacke_ssysv_rook.c | |||
lapacke_ssysv_rook_work.c | |||
lapacke_ssysv_work.c | |||
lapacke_ssysv_aa.c | |||
lapacke_ssysv_aa_work.c | |||
@@ -1616,6 +1630,8 @@ lapacke_ssysv_aa_2stage.c | |||
lapacke_ssysv_aa_2stage_work.c | |||
lapacke_ssysv_rk.c | |||
lapacke_ssysv_rk_work.c | |||
lapacke_ssysv_rook.c | |||
lapacke_ssysv_rook_work.c | |||
lapacke_ssysvx.c | |||
lapacke_ssysvx_work.c | |||
lapacke_ssyswapr.c | |||
@@ -1624,33 +1640,33 @@ lapacke_ssytrd.c | |||
lapacke_ssytrd_work.c | |||
lapacke_ssytrf.c | |||
lapacke_ssytrf_work.c | |||
lapacke_ssytrf_rook.c | |||
lapacke_ssytrf_rook_work.c | |||
lapacke_ssytrf_aa.c | |||
lapacke_ssytrf_aa_2stage.c | |||
lapacke_ssytrf_aa_work.c | |||
lapacke_ssytrf_aa_2stage.c | |||
lapacke_ssytrf_aa_2stage_work.c | |||
lapacke_ssytrf_rk.c | |||
lapacke_ssytrf_rk_work.c | |||
lapacke_ssytrf_rook.c | |||
lapacke_ssytrf_rook_work.c | |||
lapacke_ssytri.c | |||
lapacke_ssytri_work.c | |||
lapacke_ssytri2.c | |||
lapacke_ssytri2_work.c | |||
lapacke_ssytri_3.c | |||
lapacke_ssytri_3_work.c | |||
lapacke_ssytri2x.c | |||
lapacke_ssytri2x_work.c | |||
lapacke_ssytri_work.c | |||
lapacke_ssytri_3.c | |||
lapacke_ssytri_3_work.c | |||
lapacke_ssytrs.c | |||
lapacke_ssytrs_rook.c | |||
lapacke_ssytrs_work.c | |||
lapacke_ssytrs2.c | |||
lapacke_ssytrs2_work.c | |||
lapacke_ssytrs_3.c | |||
lapacke_ssytrs_3_work.c | |||
lapacke_ssytrs_aa.c | |||
lapacke_ssytrs_aa_2stage.c | |||
lapacke_ssytrs_aa_work.c | |||
lapacke_ssytrs_aa_2stage.c | |||
lapacke_ssytrs_aa_2stage_work.c | |||
lapacke_ssytrs_3.c | |||
lapacke_ssytrs_3_work.c | |||
lapacke_ssytrs_work.c | |||
lapacke_ssytrs_rook.c | |||
lapacke_ssytrs_rook_work.c | |||
lapacke_stbcon.c | |||
lapacke_stbcon_work.c | |||
@@ -1722,6 +1738,8 @@ lapacke_strttp.c | |||
lapacke_strttp_work.c | |||
lapacke_stzrzf.c | |||
lapacke_stzrzf_work.c | |||
) | |||
set(SOURCES_COMPLEX16 | |||
lapacke_zbbcsd.c | |||
lapacke_zbbcsd_work.c | |||
lapacke_zbdsqr.c | |||
@@ -1805,11 +1823,11 @@ lapacke_zgeqrf_work.c | |||
lapacke_zgeqrfp.c | |||
lapacke_zgeqrfp_work.c | |||
lapacke_zgeqrt.c | |||
lapacke_zgeqrt_work.c | |||
lapacke_zgeqrt2.c | |||
lapacke_zgeqrt2_work.c | |||
lapacke_zgeqrt3.c | |||
lapacke_zgeqrt3_work.c | |||
lapacke_zgeqrt_work.c | |||
lapacke_zgerfs.c | |||
lapacke_zgerfs_work.c | |||
lapacke_zgerqf.c | |||
@@ -1820,6 +1838,8 @@ lapacke_zgesv.c | |||
lapacke_zgesv_work.c | |||
lapacke_zgesvd.c | |||
lapacke_zgesvd_work.c | |||
lapacke_zgesvdq.c | |||
lapacke_zgesvdq_work.c | |||
lapacke_zgesvdx.c | |||
lapacke_zgesvdx_work.c | |||
lapacke_zgesvj.c | |||
@@ -1856,10 +1876,10 @@ lapacke_zggevx.c | |||
lapacke_zggevx_work.c | |||
lapacke_zggglm.c | |||
lapacke_zggglm_work.c | |||
lapacke_zgghrd.c | |||
lapacke_zgghrd_work.c | |||
lapacke_zgghd3.c | |||
lapacke_zgghd3_work.c | |||
lapacke_zgghrd.c | |||
lapacke_zgghrd_work.c | |||
lapacke_zgglse.c | |||
lapacke_zgglse_work.c | |||
lapacke_zggqrf.c | |||
@@ -1884,14 +1904,14 @@ lapacke_zgttrs.c | |||
lapacke_zgttrs_work.c | |||
lapacke_zhbev.c | |||
lapacke_zhbev_work.c | |||
lapacke_zhbevd.c | |||
lapacke_zhbevd_work.c | |||
lapacke_zhbevx.c | |||
lapacke_zhbevx_work.c | |||
lapacke_zhbev_2stage.c | |||
lapacke_zhbev_2stage_work.c | |||
lapacke_zhbevd.c | |||
lapacke_zhbevd_work.c | |||
lapacke_zhbevd_2stage.c | |||
lapacke_zhbevd_2stage_work.c | |||
lapacke_zhbevx.c | |||
lapacke_zhbevx_work.c | |||
lapacke_zhbevx_2stage.c | |||
lapacke_zhbevx_2stage_work.c | |||
lapacke_zhbgst.c | |||
@@ -1912,18 +1932,18 @@ lapacke_zheequb.c | |||
lapacke_zheequb_work.c | |||
lapacke_zheev.c | |||
lapacke_zheev_work.c | |||
lapacke_zheevd.c | |||
lapacke_zheevd_work.c | |||
lapacke_zheevr.c | |||
lapacke_zheevr_work.c | |||
lapacke_zheevx.c | |||
lapacke_zheevx_work.c | |||
lapacke_zheev_2stage.c | |||
lapacke_zheev_2stage_work.c | |||
lapacke_zheevd.c | |||
lapacke_zheevd_work.c | |||
lapacke_zheevd_2stage.c | |||
lapacke_zheevd_2stage_work.c | |||
lapacke_zheevr.c | |||
lapacke_zheevr_work.c | |||
lapacke_zheevr_2stage.c | |||
lapacke_zheevr_2stage_work.c | |||
lapacke_zheevx.c | |||
lapacke_zheevx_work.c | |||
lapacke_zheevx_2stage.c | |||
lapacke_zheevx_2stage_work.c | |||
lapacke_zhegst.c | |||
@@ -1941,8 +1961,8 @@ lapacke_zherfs_work.c | |||
lapacke_zhesv.c | |||
lapacke_zhesv_work.c | |||
lapacke_zhesv_aa.c | |||
lapacke_zhesv_aa_2stage.c | |||
lapacke_zhesv_aa_work.c | |||
lapacke_zhesv_aa_2stage.c | |||
lapacke_zhesv_aa_2stage_work.c | |||
lapacke_zhesv_rk.c | |||
lapacke_zhesv_rk_work.c | |||
@@ -1953,34 +1973,34 @@ lapacke_zheswapr_work.c | |||
lapacke_zhetrd.c | |||
lapacke_zhetrd_work.c | |||
lapacke_zhetrf.c | |||
lapacke_zhetrf_rook.c | |||
lapacke_zhetrf_work.c | |||
lapacke_zhetrf_rook_work.c | |||
lapacke_zhetrf_aa.c | |||
lapacke_zhetrf_aa_2stage.c | |||
lapacke_zhetrf_aa_work.c | |||
lapacke_zhetrf_aa_2stage.c | |||
lapacke_zhetrf_aa_2stage_work.c | |||
lapacke_zhetrf_rk.c | |||
lapacke_zhetrf_rk_work.c | |||
lapacke_zhetrf_rook.c | |||
lapacke_zhetrf_rook_work.c | |||
lapacke_zhetri.c | |||
lapacke_zhetri_work.c | |||
lapacke_zhetri2.c | |||
lapacke_zhetri2_work.c | |||
lapacke_zhetri_3.c | |||
lapacke_zhetri_3_work.c | |||
lapacke_zhetri2x.c | |||
lapacke_zhetri2x_work.c | |||
lapacke_zhetri_work.c | |||
lapacke_zhetri_3.c | |||
lapacke_zhetri_3_work.c | |||
lapacke_zhetrs.c | |||
lapacke_zhetrs_rook.c | |||
lapacke_zhetrs_work.c | |||
lapacke_zhetrs2.c | |||
lapacke_zhetrs2_work.c | |||
lapacke_zhetrs_work.c | |||
lapacke_zhetrs_3.c | |||
lapacke_zhetrs_3_work.c | |||
lapacke_zhetrs_aa.c | |||
lapacke_zhetrs_aa_2stage.c | |||
lapacke_zhetrs_aa_work.c | |||
lapacke_zhetrs_aa_2stage.c | |||
lapacke_zhetrs_aa_2stage_work.c | |||
lapacke_zhetrs_3.c | |||
lapacke_zhetrs_3_work.c | |||
lapacke_zhetrs_rook.c | |||
lapacke_zhetrs_rook_work.c | |||
lapacke_zhfrk.c | |||
lapacke_zhfrk_work.c | |||
@@ -2172,52 +2192,54 @@ lapacke_zsyconv.c | |||
lapacke_zsyconv_work.c | |||
lapacke_zsyequb.c | |||
lapacke_zsyequb_work.c | |||
lapacke_zsyr.c | |||
lapacke_zsyr_work.c | |||
lapacke_zsyrfs.c | |||
lapacke_zsyrfs_work.c | |||
lapacke_zsysv.c | |||
lapacke_zsysv_rook.c | |||
lapacke_zsysv_rook_work.c | |||
lapacke_zsysv_work.c | |||
lapacke_zsysv_aa.c | |||
lapacke_zsysv_aa_2stage.c | |||
lapacke_zsysv_aa_work.c | |||
lapacke_zsysv_aa_2stage.c | |||
lapacke_zsysv_aa_2stage_work.c | |||
lapacke_zsysv_rk.c | |||
lapacke_zsysv_rk_work.c | |||
lapacke_zsysv_rook.c | |||
lapacke_zsysv_rook_work.c | |||
lapacke_zsysvx.c | |||
lapacke_zsysvx_work.c | |||
lapacke_zsyswapr.c | |||
lapacke_zsyswapr_work.c | |||
lapacke_zsytrf.c | |||
lapacke_zsytrf_work.c | |||
lapacke_zsytrf_rook.c | |||
lapacke_zsytrf_rook_work.c | |||
lapacke_zsytrf_aa.c | |||
lapacke_zsytrf_aa_2stage.c | |||
lapacke_zsytrf_aa_work.c | |||
lapacke_zsytrf_aa_2stage.c | |||
lapacke_zsytrf_aa_2stage_work.c | |||
lapacke_zsytrf_rk.c | |||
lapacke_zsytrf_rk_work.c | |||
lapacke_zsytrf_rook.c | |||
lapacke_zsytrf_rook_work.c | |||
lapacke_zsytri.c | |||
lapacke_zsytri_work.c | |||
lapacke_zsytri2.c | |||
lapacke_zsytri2_work.c | |||
lapacke_zsytri_3.c | |||
lapacke_zsytri_3_work.c | |||
lapacke_zsytri2x.c | |||
lapacke_zsytri2x_work.c | |||
lapacke_zsytri_work.c | |||
lapacke_zsytri_3.c | |||
lapacke_zsytri_3_work.c | |||
lapacke_zsytrs.c | |||
lapacke_zsytrs_rook.c | |||
lapacke_zsytrs_work.c | |||
lapacke_zsytrs2.c | |||
lapacke_zsytrs2_work.c | |||
lapacke_zsytrs_work.c | |||
lapacke_zsytrs_rook_work.c | |||
lapacke_zsytrs_3.c | |||
lapacke_zsytrs_3_work.c | |||
lapacke_zsytrs_aa.c | |||
lapacke_zsytrs_aa_2stage.c | |||
lapacke_zsytrs_aa_work.c | |||
lapacke_zsytrs_aa_2stage.c | |||
lapacke_zsytrs_aa_2stage_work.c | |||
lapacke_zsytrs_3.c | |||
lapacke_zsytrs_3_work.c | |||
lapacke_zsytrs_rook.c | |||
lapacke_zsytrs_rook_work.c | |||
lapacke_ztbcon.c | |||
lapacke_ztbcon_work.c | |||
lapacke_ztbrfs.c | |||
@@ -2249,9 +2271,9 @@ lapacke_ztpcon_work.c | |||
lapacke_ztpmqrt.c | |||
lapacke_ztpmqrt_work.c | |||
lapacke_ztpqrt.c | |||
lapacke_ztpqrt_work.c | |||
lapacke_ztpqrt2.c | |||
lapacke_ztpqrt2_work.c | |||
lapacke_ztpqrt_work.c | |||
lapacke_ztprfb.c | |||
lapacke_ztprfb_work.c | |||
lapacke_ztprfs.c | |||
@@ -2328,11 +2350,6 @@ lapacke_zupgtr.c | |||
lapacke_zupgtr_work.c | |||
lapacke_zupmtr.c | |||
lapacke_zupmtr_work.c | |||
lapacke_zsyr.c | |||
lapacke_csyr.c | |||
lapacke_zsyr_work.c | |||
lapacke_csyr_work.c | |||
lapacke_ilaver.c | |||
) | |||
set(DEPRECATED | |||
@@ -32,12 +32,21 @@ | |||
############################################################################## | |||
# makefile for LAPACKE, used to build lapacke binary. | |||
# | |||
# Note: we use multiple OBJ_A, OBJ_B, etc, instead of a single OBJ | |||
# Note: we use multiple OBJ_S, OBJ_C, etc, instead of a single OBJ | |||
# to allow build with mingw (argument list too long for the msys ar) | |||
# | |||
include ../../make.inc | |||
TOPSRCDIR = ../.. | |||
include $(TOPSRCDIR)/make.inc | |||
OBJ_A = \ | |||
.SUFFIXES: .c .o | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
OBJ = \ | |||
lapacke_ilaver.o \ | |||
lapacke_nancheck.o | |||
OBJ_C = \ | |||
lapacke_cbbcsd.o \ | |||
lapacke_cbbcsd_work.o \ | |||
lapacke_cbdsqr.o \ | |||
@@ -82,12 +91,12 @@ lapacke_cgeevx.o \ | |||
lapacke_cgeevx_work.o \ | |||
lapacke_cgehrd.o \ | |||
lapacke_cgehrd_work.o \ | |||
lapacke_cgejsv.o \ | |||
lapacke_cgejsv_work.o \ | |||
lapacke_cgelq.o \ | |||
lapacke_cgelq_work.o \ | |||
lapacke_cgelq2.o \ | |||
lapacke_cgelq2_work.o \ | |||
lapacke_cgejsv.o \ | |||
lapacke_cgejsv_work.o \ | |||
lapacke_cgelqf.o \ | |||
lapacke_cgelqf_work.o \ | |||
lapacke_cgels.o \ | |||
@@ -117,11 +126,11 @@ lapacke_cgeqrf_work.o \ | |||
lapacke_cgeqrfp.o \ | |||
lapacke_cgeqrfp_work.o \ | |||
lapacke_cgeqrt.o \ | |||
lapacke_cgeqrt_work.o \ | |||
lapacke_cgeqrt2.o \ | |||
lapacke_cgeqrt2_work.o \ | |||
lapacke_cgeqrt3.o \ | |||
lapacke_cgeqrt3_work.o \ | |||
lapacke_cgeqrt_work.o \ | |||
lapacke_cgerfs.o \ | |||
lapacke_cgerfs_work.o \ | |||
lapacke_cgerqf.o \ | |||
@@ -132,6 +141,8 @@ lapacke_cgesv.o \ | |||
lapacke_cgesv_work.o \ | |||
lapacke_cgesvd.o \ | |||
lapacke_cgesvd_work.o \ | |||
lapacke_cgesvdq.o \ | |||
lapacke_cgesvdq_work.o \ | |||
lapacke_cgesvdx.o \ | |||
lapacke_cgesvdx_work.o \ | |||
lapacke_cgesvj.o \ | |||
@@ -168,10 +179,10 @@ lapacke_cggevx.o \ | |||
lapacke_cggevx_work.o \ | |||
lapacke_cggglm.o \ | |||
lapacke_cggglm_work.o \ | |||
lapacke_cgghrd.o \ | |||
lapacke_cgghrd_work.o \ | |||
lapacke_cgghd3.o \ | |||
lapacke_cgghd3_work.o \ | |||
lapacke_cgghrd.o \ | |||
lapacke_cgghrd_work.o \ | |||
lapacke_cgglse.o \ | |||
lapacke_cgglse_work.o \ | |||
lapacke_cggqrf.o \ | |||
@@ -196,14 +207,14 @@ lapacke_cgttrs.o \ | |||
lapacke_cgttrs_work.o \ | |||
lapacke_chbev.o \ | |||
lapacke_chbev_work.o \ | |||
lapacke_chbevd.o \ | |||
lapacke_chbevd_work.o \ | |||
lapacke_chbevx.o \ | |||
lapacke_chbevx_work.o \ | |||
lapacke_chbev_2stage.o \ | |||
lapacke_chbev_2stage_work.o \ | |||
lapacke_chbevd.o \ | |||
lapacke_chbevd_work.o \ | |||
lapacke_chbevd_2stage.o \ | |||
lapacke_chbevd_2stage_work.o \ | |||
lapacke_chbevx.o \ | |||
lapacke_chbevx_work.o \ | |||
lapacke_chbevx_2stage.o \ | |||
lapacke_chbevx_2stage_work.o \ | |||
lapacke_chbgst.o \ | |||
@@ -224,18 +235,18 @@ lapacke_cheequb.o \ | |||
lapacke_cheequb_work.o \ | |||
lapacke_cheev.o \ | |||
lapacke_cheev_work.o \ | |||
lapacke_cheevd.o \ | |||
lapacke_cheevd_work.o \ | |||
lapacke_cheevr.o \ | |||
lapacke_cheevr_work.o \ | |||
lapacke_cheevx.o \ | |||
lapacke_cheevx_work.o \ | |||
lapacke_cheev_2stage.o \ | |||
lapacke_cheev_2stage_work.o \ | |||
lapacke_cheevd.o \ | |||
lapacke_cheevd_work.o \ | |||
lapacke_cheevd_2stage.o \ | |||
lapacke_cheevd_2stage_work.o \ | |||
lapacke_cheevr.o \ | |||
lapacke_cheevr_work.o \ | |||
lapacke_cheevr_2stage.o \ | |||
lapacke_cheevr_2stage_work.o \ | |||
lapacke_cheevx.o \ | |||
lapacke_cheevx_work.o \ | |||
lapacke_cheevx_2stage.o \ | |||
lapacke_cheevx_2stage_work.o \ | |||
lapacke_chegst.o \ | |||
@@ -265,35 +276,35 @@ lapacke_cheswapr_work.o \ | |||
lapacke_chetrd.o \ | |||
lapacke_chetrd_work.o \ | |||
lapacke_chetrf.o \ | |||
lapacke_chetrf_rook.o \ | |||
lapacke_chetrf_work.o \ | |||
lapacke_chetrf_rook_work.o \ | |||
lapacke_chetrf_aa.o \ | |||
lapacke_chetrf_aa_2stage.o \ | |||
lapacke_chetrf_aa_work.o \ | |||
lapacke_chetrf_aa_2stage.o \ | |||
lapacke_chetrf_aa_2stage_work.o \ | |||
lapacke_chetrf_rk.o \ | |||
lapacke_chetrf_rk_work.o \ | |||
lapacke_chetrf_rook.o \ | |||
lapacke_chetrf_rook_work.o \ | |||
lapacke_chetri.o \ | |||
lapacke_chetri_work.o \ | |||
lapacke_chetri2.o \ | |||
lapacke_chetri2_work.o \ | |||
lapacke_chetri_3.o \ | |||
lapacke_chetri_3_work.o \ | |||
lapacke_chetri2x.o \ | |||
lapacke_chetri2x_work.o \ | |||
lapacke_chetri_work.o \ | |||
lapacke_chetri_3.o \ | |||
lapacke_chetri_3_work.o \ | |||
lapacke_chetrs.o \ | |||
lapacke_chetrs_rook.o \ | |||
lapacke_chetrs_work.o \ | |||
lapacke_chetrs2.o \ | |||
lapacke_chetrs2_work.o \ | |||
lapacke_chetrs_work.o \ | |||
lapacke_chetrs_rook_work.o \ | |||
lapacke_chetrs_3.o \ | |||
lapacke_chetrs_3_work.o \ | |||
lapacke_chetrs_aa.o \ | |||
lapacke_chetrs_aa_2stage.o \ | |||
lapacke_chetrs_aa_work.o \ | |||
lapacke_chetrs_aa_2stage.o \ | |||
lapacke_chetrs_aa_2stage_work.o \ | |||
lapacke_chetrs_3.o \ | |||
lapacke_chetrs_3_work.o \ | |||
lapacke_chetrs_rook.o \ | |||
lapacke_chetrs_rook_work.o \ | |||
lapacke_chfrk.o \ | |||
lapacke_chfrk_work.o \ | |||
lapacke_chgeqz.o \ | |||
@@ -484,11 +495,11 @@ lapacke_csyconv.o \ | |||
lapacke_csyconv_work.o \ | |||
lapacke_csyequb.o \ | |||
lapacke_csyequb_work.o \ | |||
lapacke_csyr.o \ | |||
lapacke_csyr_work.o \ | |||
lapacke_csyrfs.o \ | |||
lapacke_csyrfs_work.o \ | |||
lapacke_csysv.o \ | |||
lapacke_csysv_rook.o \ | |||
lapacke_csysv_rook_work.o \ | |||
lapacke_csysv_work.o \ | |||
lapacke_csysv_aa.o \ | |||
lapacke_csysv_aa_work.o \ | |||
@@ -496,40 +507,42 @@ lapacke_csysv_aa_2stage.o \ | |||
lapacke_csysv_aa_2stage_work.o \ | |||
lapacke_csysv_rk.o \ | |||
lapacke_csysv_rk_work.o \ | |||
lapacke_csysv_rook.o \ | |||
lapacke_csysv_rook_work.o \ | |||
lapacke_csysvx.o \ | |||
lapacke_csysvx_work.o \ | |||
lapacke_csyswapr.o \ | |||
lapacke_csyswapr_work.o \ | |||
lapacke_csytrf.o \ | |||
lapacke_csytrf_work.o \ | |||
lapacke_csytrf_rook.o \ | |||
lapacke_csytrf_rook_work.o \ | |||
lapacke_csytrf_aa.o \ | |||
lapacke_csytrf_aa_2stage.o \ | |||
lapacke_csytrf_aa_work.o \ | |||
lapacke_csytrf_aa_2stage.o \ | |||
lapacke_csytrf_aa_2stage_work.o \ | |||
lapacke_csytrf_rk.o \ | |||
lapacke_csytrf_rk_work.o \ | |||
lapacke_csytrf_rook.o \ | |||
lapacke_csytrf_rook_work.o \ | |||
lapacke_csytri.o \ | |||
lapacke_csytri_work.o \ | |||
lapacke_csytri2.o \ | |||
lapacke_csytri2_work.o \ | |||
lapacke_csytri_3.o \ | |||
lapacke_csytri_3_work.o \ | |||
lapacke_csytri2x.o \ | |||
lapacke_csytri2x_work.o \ | |||
lapacke_csytri_work.o \ | |||
lapacke_csytri_3.o \ | |||
lapacke_csytri_3_work.o \ | |||
lapacke_csytrs.o \ | |||
lapacke_csytrs_rook.o \ | |||
lapacke_csytrs_work.o \ | |||
lapacke_csytrs2.o \ | |||
lapacke_csytrs2_work.o \ | |||
lapacke_csytrs_work.o \ | |||
lapacke_csytrs_rook_work.o \ | |||
lapacke_csytrs_3.o \ | |||
lapacke_csytrs_3_work.o \ | |||
lapacke_csytrs_aa.o \ | |||
lapacke_csytrs_aa_2stage.o \ | |||
lapacke_csytrs_aa_work.o \ | |||
lapacke_csytrs_aa_2stage.o \ | |||
lapacke_csytrs_aa_2stage_work.o \ | |||
lapacke_csytrs_3.o \ | |||
lapacke_csytrs_3_work.o \ | |||
lapacke_csytrs_rook.o \ | |||
lapacke_csytrs_rook_work.o \ | |||
lapacke_ctbcon.o \ | |||
lapacke_ctbcon_work.o \ | |||
lapacke_ctbrfs.o \ | |||
@@ -561,9 +574,9 @@ lapacke_ctpcon_work.o \ | |||
lapacke_ctpmqrt.o \ | |||
lapacke_ctpmqrt_work.o \ | |||
lapacke_ctpqrt.o \ | |||
lapacke_ctpqrt_work.o \ | |||
lapacke_ctpqrt2.o \ | |||
lapacke_ctpqrt2_work.o \ | |||
lapacke_ctpqrt_work.o \ | |||
lapacke_ctprfb.o \ | |||
lapacke_ctprfb_work.o \ | |||
lapacke_ctprfs.o \ | |||
@@ -639,15 +652,17 @@ lapacke_cunmtr_work.o \ | |||
lapacke_cupgtr.o \ | |||
lapacke_cupgtr_work.o \ | |||
lapacke_cupmtr.o \ | |||
lapacke_cupmtr_work.o \ | |||
lapacke_cupmtr_work.o | |||
OBJ_D = \ | |||
lapacke_dbbcsd.o \ | |||
lapacke_dbbcsd_work.o \ | |||
lapacke_dbdsdc.o \ | |||
lapacke_dbdsdc_work.o \ | |||
lapacke_dbdsvdx.o \ | |||
lapacke_dbdsvdx_work.o \ | |||
lapacke_dbdsqr.o \ | |||
lapacke_dbdsqr_work.o \ | |||
lapacke_dbdsvdx.o \ | |||
lapacke_dbdsvdx_work.o \ | |||
lapacke_ddisna.o \ | |||
lapacke_ddisna_work.o \ | |||
lapacke_dgbbrd.o \ | |||
@@ -725,11 +740,11 @@ lapacke_dgeqrf_work.o \ | |||
lapacke_dgeqrfp.o \ | |||
lapacke_dgeqrfp_work.o \ | |||
lapacke_dgeqrt.o \ | |||
lapacke_dgeqrt_work.o \ | |||
lapacke_dgeqrt2.o \ | |||
lapacke_dgeqrt2_work.o \ | |||
lapacke_dgeqrt3.o \ | |||
lapacke_dgeqrt3_work.o \ | |||
lapacke_dgeqrt_work.o \ | |||
lapacke_dgerfs.o \ | |||
lapacke_dgerfs_work.o \ | |||
lapacke_dgerqf.o \ | |||
@@ -740,6 +755,8 @@ lapacke_dgesv.o \ | |||
lapacke_dgesv_work.o \ | |||
lapacke_dgesvd.o \ | |||
lapacke_dgesvd_work.o \ | |||
lapacke_dgesvdq.o \ | |||
lapacke_dgesvdq_work.o \ | |||
lapacke_dgesvdx.o \ | |||
lapacke_dgesvdx_work.o \ | |||
lapacke_dgesvj.o \ | |||
@@ -776,10 +793,10 @@ lapacke_dggevx.o \ | |||
lapacke_dggevx_work.o \ | |||
lapacke_dggglm.o \ | |||
lapacke_dggglm_work.o \ | |||
lapacke_dgghrd.o \ | |||
lapacke_dgghrd_work.o \ | |||
lapacke_dgghd3.o \ | |||
lapacke_dgghd3_work.o \ | |||
lapacke_dgghrd.o \ | |||
lapacke_dgghrd_work.o \ | |||
lapacke_dgglse.o \ | |||
lapacke_dgglse_work.o \ | |||
lapacke_dggqrf.o \ | |||
@@ -972,14 +989,14 @@ lapacke_dpttrs.o \ | |||
lapacke_dpttrs_work.o \ | |||
lapacke_dsbev.o \ | |||
lapacke_dsbev_work.o \ | |||
lapacke_dsbevd.o \ | |||
lapacke_dsbevd_work.o \ | |||
lapacke_dsbevx.o \ | |||
lapacke_dsbevx_work.o \ | |||
lapacke_dsbev_2stage.o \ | |||
lapacke_dsbev_2stage_work.o \ | |||
lapacke_dsbevd.o \ | |||
lapacke_dsbevd_work.o \ | |||
lapacke_dsbevd_2stage.o \ | |||
lapacke_dsbevd_2stage_work.o \ | |||
lapacke_dsbevx.o \ | |||
lapacke_dsbevx_work.o \ | |||
lapacke_dsbevx_2stage.o \ | |||
lapacke_dsbevx_2stage_work.o \ | |||
lapacke_dsbgst.o \ | |||
@@ -1060,18 +1077,18 @@ lapacke_dsyequb.o \ | |||
lapacke_dsyequb_work.o \ | |||
lapacke_dsyev.o \ | |||
lapacke_dsyev_work.o \ | |||
lapacke_dsyevd.o \ | |||
lapacke_dsyevd_work.o \ | |||
lapacke_dsyevr.o \ | |||
lapacke_dsyevr_work.o \ | |||
lapacke_dsyevx.o \ | |||
lapacke_dsyevx_work.o \ | |||
lapacke_dsyev_2stage.o \ | |||
lapacke_dsyev_2stage_work.o \ | |||
lapacke_dsyevd.o \ | |||
lapacke_dsyevd_work.o \ | |||
lapacke_dsyevd_2stage.o \ | |||
lapacke_dsyevd_2stage_work.o \ | |||
lapacke_dsyevr.o \ | |||
lapacke_dsyevr_work.o \ | |||
lapacke_dsyevr_2stage.o \ | |||
lapacke_dsyevr_2stage_work.o \ | |||
lapacke_dsyevx.o \ | |||
lapacke_dsyevx_work.o \ | |||
lapacke_dsyevx_2stage.o \ | |||
lapacke_dsyevx_2stage_work.o \ | |||
lapacke_dsygst.o \ | |||
@@ -1087,8 +1104,6 @@ lapacke_dsygvx_work.o \ | |||
lapacke_dsyrfs.o \ | |||
lapacke_dsyrfs_work.o \ | |||
lapacke_dsysv.o \ | |||
lapacke_dsysv_rook.o \ | |||
lapacke_dsysv_rook_work.o \ | |||
lapacke_dsysv_work.o \ | |||
lapacke_dsysv_aa.o \ | |||
lapacke_dsysv_aa_work.o \ | |||
@@ -1096,6 +1111,8 @@ lapacke_dsysv_aa_2stage.o \ | |||
lapacke_dsysv_aa_2stage_work.o \ | |||
lapacke_dsysv_rk.o \ | |||
lapacke_dsysv_rk_work.o \ | |||
lapacke_dsysv_rook.o \ | |||
lapacke_dsysv_rook_work.o \ | |||
lapacke_dsysvx.o \ | |||
lapacke_dsysvx_work.o \ | |||
lapacke_dsyswapr.o \ | |||
@@ -1104,36 +1121,34 @@ lapacke_dsytrd.o \ | |||
lapacke_dsytrd_work.o \ | |||
lapacke_dsytrf.o \ | |||
lapacke_dsytrf_work.o \ | |||
lapacke_dsytrf_rook.o \ | |||
lapacke_dsytrf_rook_work.o \ | |||
lapacke_dsytrf_aa.o \ | |||
lapacke_dsytrf_aa_work.o \ | |||
lapacke_dsytrf_aa_2stage.o \ | |||
lapacke_dsytrf_aa_2stage_work.o \ | |||
lapacke_dsytrf_rk.o \ | |||
lapacke_dsytrf_rk_work.o \ | |||
lapacke_dsytrf_rook.o \ | |||
lapacke_dsytrf_rook_work.o \ | |||
lapacke_dsytri.o \ | |||
lapacke_dsytri_work.o \ | |||
lapacke_dsytri2.o \ | |||
lapacke_dsytri2_work.o \ | |||
lapacke_dsytri_3.o \ | |||
lapacke_dsytri_3_work.o \ | |||
lapacke_dsytri2x.o \ | |||
lapacke_dsytri2x_work.o \ | |||
lapacke_dsytri_work.o | |||
OBJ_B = \ | |||
lapacke_dsytri_3.o \ | |||
lapacke_dsytri_3_work.o \ | |||
lapacke_dsytrs.o \ | |||
lapacke_dsytrs_rook.o \ | |||
lapacke_dsytrs_work.o \ | |||
lapacke_dsytrs2.o \ | |||
lapacke_dsytrs2_work.o \ | |||
lapacke_dsytrs_work.o \ | |||
lapacke_dsytrs_rook_work.o \ | |||
lapacke_dsytrs_3.o \ | |||
lapacke_dsytrs_3_work.o \ | |||
lapacke_dsytrs_aa.o \ | |||
lapacke_dsytrs_aa_2stage.o \ | |||
lapacke_dsytrs_aa_work.o \ | |||
lapacke_dsytrs_aa_2stage.o \ | |||
lapacke_dsytrs_aa_2stage_work.o \ | |||
lapacke_dsytrs_3.o \ | |||
lapacke_dsytrs_3_work.o \ | |||
lapacke_dsytrs_rook.o \ | |||
lapacke_dsytrs_rook_work.o \ | |||
lapacke_dtbcon.o \ | |||
lapacke_dtbcon_work.o \ | |||
lapacke_dtbrfs.o \ | |||
@@ -1165,9 +1180,9 @@ lapacke_dtpcon_work.o \ | |||
lapacke_dtpmqrt.o \ | |||
lapacke_dtpmqrt_work.o \ | |||
lapacke_dtpqrt.o \ | |||
lapacke_dtpqrt_work.o \ | |||
lapacke_dtpqrt2.o \ | |||
lapacke_dtpqrt2_work.o \ | |||
lapacke_dtpqrt_work.o \ | |||
lapacke_dtprfb.o \ | |||
lapacke_dtprfb_work.o \ | |||
lapacke_dtprfs.o \ | |||
@@ -1203,16 +1218,17 @@ lapacke_dtrttf_work.o \ | |||
lapacke_dtrttp.o \ | |||
lapacke_dtrttp_work.o \ | |||
lapacke_dtzrzf.o \ | |||
lapacke_dtzrzf_work.o \ | |||
lapacke_nancheck.o \ | |||
lapacke_dtzrzf_work.o | |||
OBJ_S = \ | |||
lapacke_sbbcsd.o \ | |||
lapacke_sbbcsd_work.o \ | |||
lapacke_sbdsdc.o \ | |||
lapacke_sbdsdc_work.o \ | |||
lapacke_sbdsvdx.o \ | |||
lapacke_sbdsvdx_work.o \ | |||
lapacke_sbdsqr.o \ | |||
lapacke_sbdsqr_work.o \ | |||
lapacke_sbdsvdx.o \ | |||
lapacke_sbdsvdx_work.o \ | |||
lapacke_sdisna.o \ | |||
lapacke_sdisna_work.o \ | |||
lapacke_sgbbrd.o \ | |||
@@ -1290,11 +1306,11 @@ lapacke_sgeqrf_work.o \ | |||
lapacke_sgeqrfp.o \ | |||
lapacke_sgeqrfp_work.o \ | |||
lapacke_sgeqrt.o \ | |||
lapacke_sgeqrt_work.o \ | |||
lapacke_sgeqrt2.o \ | |||
lapacke_sgeqrt2_work.o \ | |||
lapacke_sgeqrt3.o \ | |||
lapacke_sgeqrt3_work.o \ | |||
lapacke_sgeqrt_work.o \ | |||
lapacke_sgerfs.o \ | |||
lapacke_sgerfs_work.o \ | |||
lapacke_sgerqf.o \ | |||
@@ -1305,6 +1321,8 @@ lapacke_sgesv.o \ | |||
lapacke_sgesv_work.o \ | |||
lapacke_sgesvd.o \ | |||
lapacke_sgesvd_work.o \ | |||
lapacke_sgesvdq.o \ | |||
lapacke_sgesvdq_work.o \ | |||
lapacke_sgesvdx.o \ | |||
lapacke_sgesvdx_work.o \ | |||
lapacke_sgesvj.o \ | |||
@@ -1341,10 +1359,10 @@ lapacke_sggevx.o \ | |||
lapacke_sggevx_work.o \ | |||
lapacke_sggglm.o \ | |||
lapacke_sggglm_work.o \ | |||
lapacke_sgghrd.o \ | |||
lapacke_sgghrd_work.o \ | |||
lapacke_sgghd3.o \ | |||
lapacke_sgghd3_work.o \ | |||
lapacke_sgghrd.o \ | |||
lapacke_sgghrd_work.o \ | |||
lapacke_sgglse.o \ | |||
lapacke_sgglse_work.o \ | |||
lapacke_sggqrf.o \ | |||
@@ -1537,14 +1555,14 @@ lapacke_spttrs.o \ | |||
lapacke_spttrs_work.o \ | |||
lapacke_ssbev.o \ | |||
lapacke_ssbev_work.o \ | |||
lapacke_ssbevd.o \ | |||
lapacke_ssbevd_work.o \ | |||
lapacke_ssbevx.o \ | |||
lapacke_ssbevx_work.o \ | |||
lapacke_ssbev_2stage.o \ | |||
lapacke_ssbev_2stage_work.o \ | |||
lapacke_ssbevd.o \ | |||
lapacke_ssbevd_work.o \ | |||
lapacke_ssbevd_2stage.o \ | |||
lapacke_ssbevd_2stage_work.o \ | |||
lapacke_ssbevx.o \ | |||
lapacke_ssbevx_work.o \ | |||
lapacke_ssbevx_2stage.o \ | |||
lapacke_ssbevx_2stage_work.o \ | |||
lapacke_ssbgst.o \ | |||
@@ -1621,18 +1639,18 @@ lapacke_ssyequb.o \ | |||
lapacke_ssyequb_work.o \ | |||
lapacke_ssyev.o \ | |||
lapacke_ssyev_work.o \ | |||
lapacke_ssyevd.o \ | |||
lapacke_ssyevd_work.o \ | |||
lapacke_ssyevr.o \ | |||
lapacke_ssyevr_work.o \ | |||
lapacke_ssyevx.o \ | |||
lapacke_ssyevx_work.o \ | |||
lapacke_ssyev_2stage.o \ | |||
lapacke_ssyev_2stage_work.o \ | |||
lapacke_ssyevd.o \ | |||
lapacke_ssyevd_work.o \ | |||
lapacke_ssyevd_2stage.o \ | |||
lapacke_ssyevd_2stage_work.o \ | |||
lapacke_ssyevr.o \ | |||
lapacke_ssyevr_work.o \ | |||
lapacke_ssyevr_2stage.o \ | |||
lapacke_ssyevr_2stage_work.o \ | |||
lapacke_ssyevx.o \ | |||
lapacke_ssyevx_work.o \ | |||
lapacke_ssyevx_2stage.o \ | |||
lapacke_ssyevx_2stage_work.o \ | |||
lapacke_ssygst.o \ | |||
@@ -1648,8 +1666,6 @@ lapacke_ssygvx_work.o \ | |||
lapacke_ssyrfs.o \ | |||
lapacke_ssyrfs_work.o \ | |||
lapacke_ssysv.o \ | |||
lapacke_ssysv_rook.o \ | |||
lapacke_ssysv_rook_work.o \ | |||
lapacke_ssysv_work.o \ | |||
lapacke_ssysv_aa.o \ | |||
lapacke_ssysv_aa_work.o \ | |||
@@ -1657,6 +1673,8 @@ lapacke_ssysv_aa_2stage.o \ | |||
lapacke_ssysv_aa_2stage_work.o \ | |||
lapacke_ssysv_rk.o \ | |||
lapacke_ssysv_rk_work.o \ | |||
lapacke_ssysv_rook.o \ | |||
lapacke_ssysv_rook_work.o \ | |||
lapacke_ssysvx.o \ | |||
lapacke_ssysvx_work.o \ | |||
lapacke_ssyswapr.o \ | |||
@@ -1665,34 +1683,34 @@ lapacke_ssytrd.o \ | |||
lapacke_ssytrd_work.o \ | |||
lapacke_ssytrf.o \ | |||
lapacke_ssytrf_work.o \ | |||
lapacke_ssytrf_rook.o \ | |||
lapacke_ssytrf_rook_work.o \ | |||
lapacke_ssytrf_aa.o \ | |||
lapacke_ssytrf_aa_work.o \ | |||
lapacke_ssytrf_aa_2stage.o \ | |||
lapacke_ssytrf_aa_2stage_work.o \ | |||
lapacke_ssytrf_rk.o \ | |||
lapacke_ssytrf_rk_work.o \ | |||
lapacke_ssytrf_rook.o \ | |||
lapacke_ssytrf_rook_work.o \ | |||
lapacke_ssytri.o \ | |||
lapacke_ssytri_work.o \ | |||
lapacke_ssytri2.o \ | |||
lapacke_ssytri2_work.o \ | |||
lapacke_ssytri_3.o \ | |||
lapacke_ssytri_3_work.o \ | |||
lapacke_ssytri2x.o \ | |||
lapacke_ssytri2x_work.o \ | |||
lapacke_ssytri_work.o \ | |||
lapacke_ssytri_3.o \ | |||
lapacke_ssytri_3_work.o \ | |||
lapacke_ssytrs.o \ | |||
lapacke_ssytrs_rook.o \ | |||
lapacke_ssytrs_work.o \ | |||
lapacke_ssytrs2.o \ | |||
lapacke_ssytrs2_work.o \ | |||
lapacke_ssytrs_work.o \ | |||
lapacke_ssytrs_rook_work.o \ | |||
lapacke_ssytrs_3.o \ | |||
lapacke_ssytrs_3_work.o \ | |||
lapacke_ssytrs_aa.o \ | |||
lapacke_ssytrs_aa_2stage.o \ | |||
lapacke_ssytrs_aa_work.o \ | |||
lapacke_ssytrs_aa_2stage.o \ | |||
lapacke_ssytrs_aa_2stage_work.o \ | |||
lapacke_ssytrs_3.o \ | |||
lapacke_ssytrs_3_work.o \ | |||
lapacke_ssytrs_rook.o \ | |||
lapacke_ssytrs_rook_work.o \ | |||
lapacke_stbcon.o \ | |||
lapacke_stbcon_work.o \ | |||
lapacke_stbrfs.o \ | |||
@@ -1762,7 +1780,9 @@ lapacke_strttf_work.o \ | |||
lapacke_strttp.o \ | |||
lapacke_strttp_work.o \ | |||
lapacke_stzrzf.o \ | |||
lapacke_stzrzf_work.o \ | |||
lapacke_stzrzf_work.o | |||
OBJ_Z = \ | |||
lapacke_zbbcsd.o \ | |||
lapacke_zbbcsd_work.o \ | |||
lapacke_zbdsqr.o \ | |||
@@ -1846,11 +1866,11 @@ lapacke_zgeqrf_work.o \ | |||
lapacke_zgeqrfp.o \ | |||
lapacke_zgeqrfp_work.o \ | |||
lapacke_zgeqrt.o \ | |||
lapacke_zgeqrt_work.o \ | |||
lapacke_zgeqrt2.o \ | |||
lapacke_zgeqrt2_work.o \ | |||
lapacke_zgeqrt3.o \ | |||
lapacke_zgeqrt3_work.o \ | |||
lapacke_zgeqrt_work.o \ | |||
lapacke_zgerfs.o \ | |||
lapacke_zgerfs_work.o \ | |||
lapacke_zgerqf.o \ | |||
@@ -1861,6 +1881,8 @@ lapacke_zgesv.o \ | |||
lapacke_zgesv_work.o \ | |||
lapacke_zgesvd.o \ | |||
lapacke_zgesvd_work.o \ | |||
lapacke_zgesvdq.o \ | |||
lapacke_zgesvdq_work.o \ | |||
lapacke_zgesvdx.o \ | |||
lapacke_zgesvdx_work.o \ | |||
lapacke_zgesvj.o \ | |||
@@ -1897,10 +1919,10 @@ lapacke_zggevx.o \ | |||
lapacke_zggevx_work.o \ | |||
lapacke_zggglm.o \ | |||
lapacke_zggglm_work.o \ | |||
lapacke_zgghrd.o \ | |||
lapacke_zgghrd_work.o \ | |||
lapacke_zgghd3.o \ | |||
lapacke_zgghd3_work.o \ | |||
lapacke_zgghrd.o \ | |||
lapacke_zgghrd_work.o \ | |||
lapacke_zgglse.o \ | |||
lapacke_zgglse_work.o \ | |||
lapacke_zggqrf.o \ | |||
@@ -1925,14 +1947,14 @@ lapacke_zgttrs.o \ | |||
lapacke_zgttrs_work.o \ | |||
lapacke_zhbev.o \ | |||
lapacke_zhbev_work.o \ | |||
lapacke_zhbevd.o \ | |||
lapacke_zhbevd_work.o \ | |||
lapacke_zhbevx.o \ | |||
lapacke_zhbevx_work.o \ | |||
lapacke_zhbev_2stage.o \ | |||
lapacke_zhbev_2stage_work.o \ | |||
lapacke_zhbevd.o \ | |||
lapacke_zhbevd_work.o \ | |||
lapacke_zhbevd_2stage.o \ | |||
lapacke_zhbevd_2stage_work.o \ | |||
lapacke_zhbevx.o \ | |||
lapacke_zhbevx_work.o \ | |||
lapacke_zhbevx_2stage.o \ | |||
lapacke_zhbevx_2stage_work.o \ | |||
lapacke_zhbgst.o \ | |||
@@ -1953,18 +1975,18 @@ lapacke_zheequb.o \ | |||
lapacke_zheequb_work.o \ | |||
lapacke_zheev.o \ | |||
lapacke_zheev_work.o \ | |||
lapacke_zheevd.o \ | |||
lapacke_zheevd_work.o \ | |||
lapacke_zheevr.o \ | |||
lapacke_zheevr_work.o \ | |||
lapacke_zheevx.o \ | |||
lapacke_zheevx_work.o \ | |||
lapacke_zheev_2stage.o \ | |||
lapacke_zheev_2stage_work.o \ | |||
lapacke_zheevd.o \ | |||
lapacke_zheevd_work.o \ | |||
lapacke_zheevd_2stage.o \ | |||
lapacke_zheevd_2stage_work.o \ | |||
lapacke_zheevr.o \ | |||
lapacke_zheevr_work.o \ | |||
lapacke_zheevr_2stage.o \ | |||
lapacke_zheevr_2stage_work.o \ | |||
lapacke_zheevx.o \ | |||
lapacke_zheevx_work.o \ | |||
lapacke_zheevx_2stage.o \ | |||
lapacke_zheevx_2stage_work.o \ | |||
lapacke_zhegst.o \ | |||
@@ -1994,35 +2016,35 @@ lapacke_zheswapr_work.o \ | |||
lapacke_zhetrd.o \ | |||
lapacke_zhetrd_work.o \ | |||
lapacke_zhetrf.o \ | |||
lapacke_zhetrf_rook.o \ | |||
lapacke_zhetrf_work.o \ | |||
lapacke_zhetrf_rook_work.o \ | |||
lapacke_zhetrf_aa.o \ | |||
lapacke_zhetrf_aa_2stage.o \ | |||
lapacke_zhetrf_aa_work.o \ | |||
lapacke_zhetrf_aa_2stage.o \ | |||
lapacke_zhetrf_aa_2stage_work.o \ | |||
lapacke_zhetrf_rk.o \ | |||
lapacke_zhetrf_rk_work.o \ | |||
lapacke_zhetrf_rook.o \ | |||
lapacke_zhetrf_rook_work.o \ | |||
lapacke_zhetri.o \ | |||
lapacke_zhetri_work.o \ | |||
lapacke_zhetri2.o \ | |||
lapacke_zhetri2_work.o \ | |||
lapacke_zhetri_3.o \ | |||
lapacke_zhetri_3_work.o \ | |||
lapacke_zhetri2x.o \ | |||
lapacke_zhetri2x_work.o \ | |||
lapacke_zhetri_work.o \ | |||
lapacke_zhetri_3.o \ | |||
lapacke_zhetri_3_work.o \ | |||
lapacke_zhetrs.o \ | |||
lapacke_zhetrs_rook.o \ | |||
lapacke_zhetrs_work.o \ | |||
lapacke_zhetrs2.o \ | |||
lapacke_zhetrs2_work.o \ | |||
lapacke_zhetrs_work.o \ | |||
lapacke_zhetrs_rook_work.o \ | |||
lapacke_zhetrs_3.o \ | |||
lapacke_zhetrs_3_work.o \ | |||
lapacke_zhetrs_aa.o \ | |||
lapacke_zhetrs_aa_2stage.o \ | |||
lapacke_zhetrs_aa_work.o \ | |||
lapacke_zhetrs_aa_2stage.o \ | |||
lapacke_zhetrs_aa_2stage_work.o \ | |||
lapacke_zhetrs_3.o \ | |||
lapacke_zhetrs_3_work.o \ | |||
lapacke_zhetrs_rook.o \ | |||
lapacke_zhetrs_rook_work.o \ | |||
lapacke_zhfrk.o \ | |||
lapacke_zhfrk_work.o \ | |||
lapacke_zhgeqz.o \ | |||
@@ -2213,11 +2235,11 @@ lapacke_zsyconv.o \ | |||
lapacke_zsyconv_work.o \ | |||
lapacke_zsyequb.o \ | |||
lapacke_zsyequb_work.o \ | |||
lapacke_zsyr.o \ | |||
lapacke_zsyr_work.o \ | |||
lapacke_zsyrfs.o \ | |||
lapacke_zsyrfs_work.o \ | |||
lapacke_zsysv.o \ | |||
lapacke_zsysv_rook.o \ | |||
lapacke_zsysv_rook_work.o \ | |||
lapacke_zsysv_work.o \ | |||
lapacke_zsysv_aa.o \ | |||
lapacke_zsysv_aa_work.o \ | |||
@@ -2225,40 +2247,42 @@ lapacke_zsysv_aa_2stage.o \ | |||
lapacke_zsysv_aa_2stage_work.o \ | |||
lapacke_zsysv_rk.o \ | |||
lapacke_zsysv_rk_work.o \ | |||
lapacke_zsysv_rook.o \ | |||
lapacke_zsysv_rook_work.o \ | |||
lapacke_zsysvx.o \ | |||
lapacke_zsysvx_work.o \ | |||
lapacke_zsyswapr.o \ | |||
lapacke_zsyswapr_work.o \ | |||
lapacke_zsytrf.o \ | |||
lapacke_zsytrf_work.o \ | |||
lapacke_zsytrf_rook.o \ | |||
lapacke_zsytrf_rook_work.o \ | |||
lapacke_zsytrf_aa.o \ | |||
lapacke_zsytrf_aa_2stage.o \ | |||
lapacke_zsytrf_aa_work.o \ | |||
lapacke_zsytrf_aa_2stage.o \ | |||
lapacke_zsytrf_aa_2stage_work.o \ | |||
lapacke_zsytrf_rk.o \ | |||
lapacke_zsytrf_rk_work.o \ | |||
lapacke_zsytrf_rook.o \ | |||
lapacke_zsytrf_rook_work.o \ | |||
lapacke_zsytri.o \ | |||
lapacke_zsytri_work.o \ | |||
lapacke_zsytri2.o \ | |||
lapacke_zsytri2_work.o \ | |||
lapacke_zsytri_3.o \ | |||
lapacke_zsytri_3_work.o \ | |||
lapacke_zsytri2x.o \ | |||
lapacke_zsytri2x_work.o \ | |||
lapacke_zsytri_work.o \ | |||
lapacke_zsytri_3.o \ | |||
lapacke_zsytri_3_work.o \ | |||
lapacke_zsytrs.o \ | |||
lapacke_zsytrs_rook.o \ | |||
lapacke_zsytrs_work.o \ | |||
lapacke_zsytrs2.o \ | |||
lapacke_zsytrs2_work.o \ | |||
lapacke_zsytrs_work.o \ | |||
lapacke_zsytrs_rook_work.o \ | |||
lapacke_zsytrs_3.o \ | |||
lapacke_zsytrs_3_work.o \ | |||
lapacke_zsytrs_aa.o \ | |||
lapacke_zsytrs_aa_2stage.o \ | |||
lapacke_zsytrs_aa_work.o \ | |||
lapacke_zsytrs_aa_2stage.o \ | |||
lapacke_zsytrs_aa_2stage_work.o \ | |||
lapacke_zsytrs_3.o \ | |||
lapacke_zsytrs_3_work.o \ | |||
lapacke_zsytrs_rook.o \ | |||
lapacke_zsytrs_rook_work.o \ | |||
lapacke_ztbcon.o \ | |||
lapacke_ztbcon_work.o \ | |||
lapacke_ztbrfs.o \ | |||
@@ -2290,9 +2314,9 @@ lapacke_ztpcon_work.o \ | |||
lapacke_ztpmqrt.o \ | |||
lapacke_ztpmqrt_work.o \ | |||
lapacke_ztpqrt.o \ | |||
lapacke_ztpqrt_work.o \ | |||
lapacke_ztpqrt2.o \ | |||
lapacke_ztpqrt2_work.o \ | |||
lapacke_ztpqrt_work.o \ | |||
lapacke_ztprfb.o \ | |||
lapacke_ztprfb_work.o \ | |||
lapacke_ztprfs.o \ | |||
@@ -2368,12 +2392,7 @@ lapacke_zunmtr_work.o \ | |||
lapacke_zupgtr.o \ | |||
lapacke_zupgtr_work.o \ | |||
lapacke_zupmtr.o \ | |||
lapacke_zupmtr_work.o \ | |||
lapacke_zsyr.o \ | |||
lapacke_csyr.o \ | |||
lapacke_zsyr_work.o \ | |||
lapacke_csyr_work.o \ | |||
lapacke_ilaver.o | |||
lapacke_zupmtr_work.o | |||
ifdef BUILD_DEPRECATED | |||
DEPRECATED = \ | |||
@@ -2452,27 +2471,29 @@ lapacke_zlagsy.o \ | |||
lapacke_zlagsy_work.o | |||
endif | |||
all: ../../$(LAPACKELIB) | |||
.PHONY: all | |||
all: $(LAPACKELIB) | |||
.PHONY: ../../$(LAPACKELIB) | |||
../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN) | |||
$(ARCH) $(ARCHFLAGS) $@ $(OBJ_A) | |||
$(ARCH) $(ARCHFLAGS) $@ $(OBJ_B) | |||
$(LAPACKELIB): $(OBJ) $(OBJ_S) $(OBJ_C) $(OBJ_D) $(OBJ_Z) $(DEPRECATED) $(EXTENDED) $(MATGEN) | |||
$(AR) $(ARFLAGS) $@ $(OBJ) | |||
$(AR) $(ARFLAGS) $@ $(OBJ_S) | |||
$(AR) $(ARFLAGS) $@ $(OBJ_C) | |||
$(AR) $(ARFLAGS) $@ $(OBJ_D) | |||
$(AR) $(ARFLAGS) $@ $(OBJ_Z) | |||
ifdef BUILD_DEPRECATED | |||
$(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED) | |||
$(AR) $(ARFLAGS) $@ $(DEPRECATED) | |||
endif | |||
ifdef (USEXBLAS) | |||
$(ARCH) $(ARCHFLAGS) $@ $(EXTENDED) | |||
$(AR) $(ARFLAGS) $@ $(EXTENDED) | |||
endif | |||
ifdef LAPACKE_WITH_TMG | |||
$(ARCH) $(ARCHFLAGS) $@ $(MATGEN) | |||
$(AR) $(ARFLAGS) $@ $(MATGEN) | |||
endif | |||
$(RANLIB) $@ | |||
clean: cleanobj | |||
.PHONY: clean cleanobj cleanlib | |||
clean: cleanobj cleanlib | |||
cleanobj: | |||
rm -f *.o | |||
.c.o: | |||
$(CC) $(CFLAGS) -I../include -c -o $@ $< | |||
cleanlib: | |||
rm -f $(LAPACKELIB) |
@@ -124,7 +124,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, | |||
float* rwork = NULL; | |||
lapack_complex_float* cwork = NULL; | |||
lapack_int i; | |||
lapack_int nu, nv; | |||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { | |||
LAPACKE_xerbla( "LAPACKE_cgejsv", -1 ); | |||
return -1; | |||
@@ -132,8 +131,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv, | |||
#ifndef LAPACK_DISABLE_NAN_CHECK | |||
if( LAPACKE_get_nancheck() ) { | |||
/* Optionally check input matrices for NaNs */ | |||
nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m; | |||
nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n; | |||
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { | |||
return -10; | |||
} | |||
@@ -75,7 +75,7 @@ lapack_int LAPACKE_cgelsd( int matrix_layout, lapack_int m, lapack_int n, | |||
if( info != 0 ) { | |||
goto exit_level_0; | |||
} | |||
liwork = (lapack_int)iwork_query; | |||
liwork = iwork_query; | |||
lrwork = (lapack_int)rwork_query; | |||
lwork = LAPACK_C2INT( work_query ); | |||
/* Allocate memory for work arrays */ | |||
@@ -0,0 +1,106 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2014, Intel Corp. | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are met: | |||
* Redistributions of source code must retain the above copyright notice, | |||
this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in the | |||
documentation and/or other materials provided with the distribution. | |||
* Neither the name of Intel Corporation nor the names of its contributors | |||
may be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||
THE POSSIBILITY OF SUCH DAMAGE. | |||
***************************************************************************** | |||
* Contents: Native high-level C interface to LAPACK function cgesvdq | |||
* Author: Intel Corporation | |||
* Generated November 2018 | |||
*****************************************************************************/ | |||
#include "lapacke_utils.h" | |||
lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp, | |||
char jobr, char jobu, char jobv, | |||
lapack_int m, lapack_int n, lapack_complex_float* a, | |||
lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, | |||
lapack_complex_float* v, lapack_int ldv, lapack_int* numrank) | |||
{ | |||
lapack_int info = 0; | |||
lapack_int liwork = -1; | |||
lapack_int* iwork = NULL; | |||
lapack_int iwork_query; | |||
lapack_int lcwork = -1; | |||
lapack_complex_float* cwork = NULL; | |||
lapack_complex_float cwork_query; | |||
lapack_int lrwork = -1; | |||
double* rwork = NULL; | |||
double rwork_query; | |||
lapack_int i; | |||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { | |||
LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 ); | |||
return -1; | |||
} | |||
#ifndef LAPACK_DISABLE_NAN_CHECK | |||
if( LAPACKE_get_nancheck() ) { | |||
/* Optionally check input matrices for NaNs */ | |||
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) { | |||
return -6; | |||
} | |||
} | |||
#endif | |||
/* Query optimal working array(s) size */ | |||
info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv, | |||
m, n, a, lda, s, u, ldu, v, ldv, numrank, | |||
&iwork_query, liwork, &cwork_query, lcwork, | |||
&rwork_query, lrwork ); | |||
if( info != 0 ) { | |||
goto exit_level_0; | |||
} | |||
liwork = iwork_query; | |||
lcwork = LAPACK_C2INT(cwork_query); | |||
lrwork = (lapack_int)rwork_query; | |||
/* Allocate memory for work arrays */ | |||
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); | |||
if( iwork == NULL ) { | |||
info = LAPACK_WORK_MEMORY_ERROR; | |||
goto exit_level_0; | |||
} | |||
cwork = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lcwork ); | |||
if( cwork == NULL ) { | |||
info = LAPACK_WORK_MEMORY_ERROR; | |||
goto exit_level_0; | |||
} | |||
rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork ); | |||
if( rwork == NULL ) { | |||
info = LAPACK_WORK_MEMORY_ERROR; | |||
goto exit_level_0; | |||
} | |||
/* Call middle-level interface */ | |||
info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv, | |||
m, n, a, lda, s, u, ldu, v, ldv, numrank, | |||
iwork, liwork, cwork, lcwork, rwork, lrwork ); | |||
/* Release memory and exit */ | |||
LAPACKE_free( iwork ); | |||
LAPACKE_free( cwork ); | |||
LAPACKE_free( rwork ); | |||
exit_level_0: | |||
if( info == LAPACK_WORK_MEMORY_ERROR ) { | |||
LAPACKE_xerbla( "LAPACKE_cgesvdq", info ); | |||
} | |||
return info; | |||
} |
@@ -0,0 +1,149 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2014, Intel Corp. | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are met: | |||
* Redistributions of source code must retain the above copyright notice, | |||
this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in the | |||
documentation and/or other materials provided with the distribution. | |||
* Neither the name of Intel Corporation nor the names of its contributors | |||
may be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||
THE POSSIBILITY OF SUCH DAMAGE. | |||
***************************************************************************** | |||
* Contents: Native middle-level C interface to LAPACK function cgesvdq | |||
* Author: Intel Corporation | |||
* Generated November 2015 | |||
*****************************************************************************/ | |||
#include "lapacke_utils.h" | |||
lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp, | |||
char jobr, char jobu, char jobv, | |||
lapack_int m, lapack_int n, lapack_complex_float* a, | |||
lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, | |||
lapack_complex_float* v, lapack_int ldv, lapack_int* numrank, | |||
lapack_int* iwork, lapack_int liwork, | |||
lapack_complex_float* cwork, lapack_int lcwork, | |||
float* rwork, lapack_int lrwork ) | |||
{ | |||
lapack_int info = 0; | |||
if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv, | |||
numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) { | |||
lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) || | |||
LAPACKE_lsame( jobu, 's' ) ) ? m : 1; | |||
lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m : | |||
(LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1); | |||
lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n : | |||
( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1); | |||
lapack_int lda_t = MAX(1,m); | |||
lapack_int ldu_t = MAX(1,nrows_u); | |||
lapack_int ldv_t = MAX(1,nrows_v); | |||
lapack_complex_float* a_t = NULL; | |||
lapack_complex_float* u_t = NULL; | |||
lapack_complex_float* v_t = NULL; | |||
/* Check leading dimension(s) */ | |||
if( lda < n ) { | |||
info = -9; | |||
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
return info; | |||
} | |||
if( ldu < ncols_u ) { | |||
info = -12; | |||
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
return info; | |||
} | |||
if( ldv < n ) { | |||
info = -14; | |||
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
return info; | |||
} | |||
/* Query optimal working array(s) size if requested */ | |||
if( lcwork == -1 ) { | |||
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, | |||
s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, | |||
cwork, &lcwork, rwork, &lrwork, &info ); | |||
return (info < 0) ? (info - 1) : info; | |||
} | |||
/* Allocate memory for temporary array(s) */ | |||
a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); | |||
if( a_t == NULL ) { | |||
info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||
goto exit_level_0; | |||
} | |||
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||
u_t = (lapack_complex_float*) | |||
LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) ); | |||
if( u_t == NULL ) { | |||
info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||
goto exit_level_1; | |||
} | |||
} | |||
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||
v_t = (lapack_complex_float*) | |||
LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); | |||
if( v_t == NULL ) { | |||
info = LAPACK_TRANSPOSE_MEMORY_ERROR; | |||
goto exit_level_2; | |||
} | |||
} | |||
/* Transpose input matrices */ | |||
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t, | |||
s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork, | |||
cwork, &lcwork, rwork, &lrwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
/* Transpose output matrices */ | |||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); | |||
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t, | |||
u, ldu ); | |||
} | |||
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v, | |||
ldv ); | |||
} | |||
/* Release memory and exit */ | |||
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) { | |||
LAPACKE_free( v_t ); | |||
} | |||
exit_level_2: | |||
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) { | |||
LAPACKE_free( u_t ); | |||
} | |||
exit_level_1: | |||
LAPACKE_free( a_t ); | |||
exit_level_0: | |||
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { | |||
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
} | |||
} else { | |||
info = -1; | |||
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info ); | |||
} | |||
return info; | |||
} |
@@ -91,7 +91,7 @@ lapack_int LAPACKE_cggesx( int matrix_layout, char jobvsl, char jobvsr, | |||
if( info != 0 ) { | |||
goto exit_level_2; | |||
} | |||
liwork = (lapack_int)iwork_query; | |||
liwork = iwork_query; | |||
lwork = LAPACK_C2INT( work_query ); | |||
/* Allocate memory for work arrays */ | |||
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); | |||
@@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
if( info != 0 ) { | |||
goto exit_level_0; | |||
} | |||
liwork = (lapack_int)iwork_query; | |||
liwork = iwork_query; | |||
lrwork = (lapack_int)rwork_query; | |||
lwork = LAPACK_C2INT( work_query ); | |||
/* Allocate memory for work arrays */ | |||
@@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||
if( info != 0 ) { | |||
goto exit_level_0; | |||
} | |||
liwork = (lapack_int)iwork_query; | |||
liwork = iwork_query; | |||
lrwork = (lapack_int)rwork_query; | |||
lwork = LAPACK_C2INT( work_query ); | |||
/* Allocate memory for work arrays */ | |||
@@ -71,7 +71,7 @@ lapack_int LAPACKE_chbgvd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
if( info != 0 ) { | |||
goto exit_level_0; | |||
} | |||
liwork = (lapack_int)iwork_query; | |||
liwork = iwork_query; | |||
lrwork = (lapack_int)rwork_query; | |||
lwork = LAPACK_C2INT( work_query ); | |||
/* Allocate memory for work arrays */ | |||
@@ -70,7 +70,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo, | |||
goto exit_level_0; | |||
} | |||
/* Transpose input matrices */ | |||
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_cheev( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | |||
&info ); | |||
@@ -78,7 +78,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo, | |||
info = info - 1; | |||
} | |||
/* Transpose output matrices */ | |||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||
/* Release memory and exit */ | |||
LAPACKE_free( a_t ); | |||
exit_level_0: | |||
@@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
#ifndef LAPACK_DISABLE_NAN_CHECK | |||
if( LAPACKE_get_nancheck() ) { | |||
/* Optionally check input matrices for NaNs */ | |||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { | |||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { | |||
return -5; | |||
} | |||
} | |||
@@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n | |||
if( info != 0 ) { | |||
goto exit_level_0; | |||
} | |||
liwork = (lapack_int)iwork_query; | |||
liwork = iwork_query; | |||
lrwork = (lapack_int)rwork_query; | |||
lwork = LAPACK_C2INT( work_query ); | |||
/* Allocate memory for work arrays */ | |||
@@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||
#ifndef LAPACK_DISABLE_NAN_CHECK | |||
if( LAPACKE_get_nancheck() ) { | |||
/* Optionally check input matrices for NaNs */ | |||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) { | |||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) { | |||
return -5; | |||
} | |||
} | |||
@@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac | |||
if( info != 0 ) { | |||
goto exit_level_0; | |||
} | |||
liwork = (lapack_int)iwork_query; | |||
liwork = iwork_query; | |||
lrwork = (lapack_int)rwork_query; | |||
lwork = LAPACK_C2INT( work_query ); | |||
/* Allocate memory for work arrays */ | |||
@@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo, | |||
goto exit_level_0; | |||
} | |||
/* Transpose input matrices */ | |||
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_cheevd_2stage( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | |||
&lrwork, iwork, &liwork, &info ); | |||
@@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo, | |||
info = info - 1; | |||
} | |||
/* Transpose output matrices */ | |||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||
/* Release memory and exit */ | |||
LAPACKE_free( a_t ); | |||
exit_level_0: | |||
@@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo, | |||
goto exit_level_0; | |||
} | |||
/* Transpose input matrices */ | |||
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t ); | |||
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_cheevd( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork, | |||
&lrwork, iwork, &liwork, &info ); | |||
@@ -79,7 +79,8 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo, | |||
info = info - 1; | |||
} | |||
/* Transpose output matrices */ | |||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda ); | |||
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda ); | |||
/* Release memory and exit */ | |||
LAPACKE_free( a_t ); | |||
exit_level_0: | |||