@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) | |||
project(OpenBLAS C ASM) | |||
set(OpenBLAS_MAJOR_VERSION 0) | |||
set(OpenBLAS_MINOR_VERSION 3) | |||
set(OpenBLAS_PATCH_VERSION 0.dev) | |||
set(OpenBLAS_PATCH_VERSION 1.dev) | |||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | |||
# Adhere to GNU filesystem layout conventions | |||
@@ -294,9 +294,10 @@ endif | |||
lapack-test : | |||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) | |||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | |||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz | |||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | |||
ifneq ($(CROSS), 1) | |||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \ | |||
./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||
endif | |||
@@ -308,9 +309,9 @@ lapack-runtest: | |||
blas-test: | |||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) | |||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out) | |||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing | |||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out) | |||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out) | |||
dummy : | |||
@@ -3,7 +3,7 @@ | |||
# | |||
# This library's version | |||
VERSION = 0.3.0.dev | |||
VERSION = 0.3.1.dev | |||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
@@ -201,6 +201,21 @@ $architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
$binformat = bin32; | |||
$binformat = bin64 if ($data =~ /BINARY_64/); | |||
$no_avx512= 0; | |||
if (($architecture eq "x86") || ($architecture eq "x86_64")) { | |||
$code = '"vaddps %zmm1, %zmm0, %zmm0"'; | |||
print $tmpf "void main(void){ __asm__ volatile($code); }\n"; | |||
$args = " -o $tmpf.o -x c $tmpf"; | |||
my @cmd = ("$compiler_name $args"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$no_avx512 = 1; | |||
} else { | |||
$no_avx512 = 0; | |||
} | |||
unlink("tmpf.o"); | |||
} | |||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | |||
$data =~ /globl\s([_\.]*)(.*)/; | |||
@@ -288,6 +303,7 @@ print MAKEFILE "CROSS=1\n" if $cross != 0; | |||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | |||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | |||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | |||
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | |||
$os =~ tr/[a-z]/[A-Z]/; | |||
$architecture =~ tr/[a-z]/[A-Z]/; | |||
@@ -66,3 +66,12 @@ else() | |||
set(BINARY32 1) | |||
endif() | |||
if (X86_64 OR X86) | |||
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "void main(void){ __asm__ volatile(\"vaddps %zmm1, %zmm0, %zmm0\"); }") | |||
execute_process(COMMAND ${CMAKE_C_COMPILER} -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp RESULT_VARIABLE NO_AVX512) | |||
if (NO_AVX512 EQUAL 1) | |||
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") | |||
endif() | |||
file(REMOVE "avx512.tmp" "avx512.o") | |||
endif() | |||
@@ -642,6 +642,7 @@ void gotoblas_profile_init(void); | |||
void gotoblas_profile_quit(void); | |||
#ifdef USE_OPENMP | |||
#ifndef C_MSVC | |||
int omp_in_parallel(void); | |||
int omp_get_num_procs(void); | |||
@@ -663,7 +664,6 @@ __declspec(dllimport) int __cdecl omp_get_num_procs(void); | |||
#define _Atomic volatile | |||
#endif | |||
#else | |||
#ifdef __ELF__ | |||
int omp_in_parallel (void) __attribute__ ((weak)); | |||
@@ -362,7 +362,7 @@ cgemm_ct.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm_cr.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm_cc.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -410,7 +410,7 @@ zgemm_ct.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm_cr.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm_cc.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -458,7 +458,7 @@ xgemm_ct.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm_cr.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm_cc.$(SUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -558,7 +558,7 @@ cgemm_thread_ct.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm_thread_cr.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm_thread_cc.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -606,7 +606,7 @@ zgemm_thread_ct.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm_thread_cr.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm_thread_cc.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -654,7 +654,7 @@ xgemm_thread_ct.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm_thread_cr.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm_thread_cc.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -1821,7 +1821,7 @@ cgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -1869,7 +1869,7 @@ zgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -1917,7 +1917,7 @@ xgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -1974,7 +1974,7 @@ cgemm3m_thread_ct.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm3m_thread_cr.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm3m_thread_cc.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -2022,7 +2022,7 @@ zgemm3m_thread_ct.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm3m_thread_cr.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm3m_thread_cc.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -2070,7 +2070,7 @@ xgemm3m_thread_ct.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm3m_thread_cr.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm3m_thread_cc.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -2731,7 +2731,7 @@ cgemm_ct.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm_cr.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm_cc.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -2779,7 +2779,7 @@ zgemm_ct.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm_cr.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm_cc.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -2827,7 +2827,7 @@ xgemm_ct.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm_cr.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm_cc.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -2927,7 +2927,7 @@ cgemm_thread_ct.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm_thread_cr.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm_thread_cc.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -2975,7 +2975,7 @@ zgemm_thread_ct.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm_thread_cr.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm_thread_cc.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -3023,7 +3023,7 @@ xgemm_thread_ct.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm_thread_cr.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm_thread_cc.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -4190,7 +4190,7 @@ cgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -4238,7 +4238,7 @@ zgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -4286,7 +4286,7 @@ xgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -4343,7 +4343,7 @@ cgemm3m_thread_ct.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
cgemm3m_thread_cr.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
cgemm3m_thread_cc.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -4391,7 +4391,7 @@ zgemm3m_thread_ct.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
zgemm3m_thread_cr.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
zgemm3m_thread_cc.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -4439,7 +4439,7 @@ xgemm3m_thread_ct.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) | |||
xgemm3m_thread_cr.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) | |||
xgemm3m_thread_cc.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h | |||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) | |||
@@ -180,7 +180,7 @@ int get_num_procs(void) { | |||
cpu_set_t *cpusetp; | |||
size_t size; | |||
int ret; | |||
// int i,n; | |||
int i,n; | |||
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | |||
#if !defined(OS_LINUX) | |||
@@ -128,6 +128,8 @@ so : ../$(LIBSONAME) | |||
ifeq ($(OSNAME), Android) | |||
INTERNALNAME = $(LIBPREFIX).so | |||
FEXTRALIB += -lm | |||
EXTRALIB += -lm | |||
else | |||
INTERNALNAME = $(LIBPREFIX).so.$(MAJOR_VERSION) | |||
endif | |||
@@ -133,7 +133,7 @@ ZNRM2KERNEL = ../arm/znrm2.c | |||
# | |||
SROTKERNEL = srot.c | |||
DROTKERNEL = drot.c | |||
#CROTKERNEL = ../arm/zrot.c | |||
CROTKERNEL = zrot.c | |||
ZROTKERNEL = zrot.c | |||
# | |||
SSCALKERNEL = sscal.c | |||
@@ -1,3 +1 @@ | |||
include $(KERNELDIR)/KERNEL.PENRYN | |||
SSWAPKERNEL = ../arm/swap.c | |||
DSWAPKERNEL = ../arm/swap.c |
@@ -138,6 +138,14 @@ | |||
/* INCX != 1 or INCY != 1 */ | |||
.L14: | |||
cmpl $0, %ebx | |||
jne .L141 | |||
cmpl $0, %ecx | |||
jne .L141 | |||
/* INCX == 0 and INCY == 0 */ | |||
jmp .L27 | |||
.L141: | |||
movl %edx, %eax | |||
sarl $2, %eax | |||
jle .L28 | |||
@@ -41,7 +41,7 @@ lapack_int LAPACKE_chetrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
lapack_int info = 0; | |||
if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_chetrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
LAPACK_chetrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -56,7 +56,7 @@ lapack_int LAPACKE_chetrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
} | |||
/* Query optimal working array(s) size if requested */ | |||
if( lwork == -1 ) { | |||
LAPACK_chetrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_chetrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
return (info < 0) ? (info - 1) : info; | |||
} | |||
/* Allocate memory for temporary array(s) */ | |||
@@ -69,7 +69,7 @@ lapack_int LAPACKE_chetrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
/* Transpose input matrices */ | |||
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_chetrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_chetrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -41,7 +41,7 @@ lapack_int LAPACKE_csytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
lapack_int info = 0; | |||
if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_csytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
LAPACK_csytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -56,7 +56,7 @@ lapack_int LAPACKE_csytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
} | |||
/* Query optimal working array(s) size if requested */ | |||
if( lwork == -1 ) { | |||
LAPACK_csytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_csytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
return (info < 0) ? (info - 1) : info; | |||
} | |||
/* Allocate memory for temporary array(s) */ | |||
@@ -69,7 +69,7 @@ lapack_int LAPACKE_csytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
/* Transpose input matrices */ | |||
LAPACKE_csy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_csytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_csytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -40,7 +40,7 @@ lapack_int LAPACKE_dsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
lapack_int info = 0; | |||
if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_dsytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
LAPACK_dsytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -55,7 +55,7 @@ lapack_int LAPACKE_dsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
} | |||
/* Query optimal working array(s) size if requested */ | |||
if( lwork == -1 ) { | |||
LAPACK_dsytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_dsytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
return (info < 0) ? (info - 1) : info; | |||
} | |||
/* Allocate memory for temporary array(s) */ | |||
@@ -67,7 +67,7 @@ lapack_int LAPACKE_dsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
/* Transpose input matrices */ | |||
LAPACKE_dsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_dsytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_dsytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -40,7 +40,7 @@ lapack_int LAPACKE_ssytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
lapack_int info = 0; | |||
if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_ssytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
LAPACK_ssytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -55,7 +55,7 @@ lapack_int LAPACKE_ssytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
} | |||
/* Query optimal working array(s) size if requested */ | |||
if( lwork == -1 ) { | |||
LAPACK_ssytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_ssytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
return (info < 0) ? (info - 1) : info; | |||
} | |||
/* Allocate memory for temporary array(s) */ | |||
@@ -67,7 +67,7 @@ lapack_int LAPACKE_ssytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
/* Transpose input matrices */ | |||
LAPACKE_ssy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_ssytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_ssytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -41,7 +41,7 @@ lapack_int LAPACKE_zhetrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
lapack_int info = 0; | |||
if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_zhetrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
LAPACK_zhetrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -56,7 +56,7 @@ lapack_int LAPACKE_zhetrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
} | |||
/* Query optimal working array(s) size if requested */ | |||
if( lwork == -1 ) { | |||
LAPACK_zhetrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_zhetrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
return (info < 0) ? (info - 1) : info; | |||
} | |||
/* Allocate memory for temporary array(s) */ | |||
@@ -69,7 +69,7 @@ lapack_int LAPACKE_zhetrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
/* Transpose input matrices */ | |||
LAPACKE_zhe_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_zhetrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_zhetrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -41,7 +41,7 @@ lapack_int LAPACKE_zsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
lapack_int info = 0; | |||
if( matrix_layout == LAPACK_COL_MAJOR ) { | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_zsytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
LAPACK_zsytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -56,7 +56,7 @@ lapack_int LAPACKE_zsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
} | |||
/* Query optimal working array(s) size if requested */ | |||
if( lwork == -1 ) { | |||
LAPACK_zsytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_zsytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); | |||
return (info < 0) ? (info - 1) : info; | |||
} | |||
/* Allocate memory for temporary array(s) */ | |||
@@ -69,7 +69,7 @@ lapack_int LAPACKE_zsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, | |||
/* Transpose input matrices */ | |||
LAPACKE_zsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); | |||
/* Call LAPACK function and adjust info */ | |||
LAPACK_zsytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
LAPACK_zsytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); | |||
if( info < 0 ) { | |||
info = info - 1; | |||
} | |||
@@ -701,7 +701,7 @@ | |||
LWSVDJ = MAX( 2 * N, 1 ) | |||
LWSVDJV = MAX( 2 * N, 1 ) | |||
* .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ | |||
LRWQP3 = N | |||
LRWQP3 = 2 * N | |||
LRWCON = N | |||
LRWSVDJ = N | |||
IF ( LQUERY ) THEN | |||
@@ -939,7 +939,7 @@ | |||
END IF | |||
END IF | |||
MINWRK = MAX( 2, MINWRK ) | |||
OPTWRK = MAX( 2, OPTWRK ) | |||
OPTWRK = MAX( OPTWRK, MINWRK ) | |||
IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 | |||
IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 | |||
END IF | |||
@@ -209,6 +209,8 @@ | |||
INFO = -5 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -8 | |||
ELSE IF( LWORK.LT.MAX( 2*N, 3*N-2 ) .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
@@ -219,9 +221,6 @@ | |||
LWKOPT_HETRS = INT( WORK(1) ) | |||
LWKOPT = MAX( LWKOPT_HETRF, LWKOPT_HETRS ) | |||
WORK( 1 ) = LWKOPT | |||
IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -105,6 +105,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -124,7 +125,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -150,6 +151,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -233,19 +235,18 @@ | |||
INFO = -3 | |||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |||
INFO = -5 | |||
ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -11 | |||
ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, | |||
$ IPIV2, WORK, -1, INFO ) | |||
LWKOPT = INT( WORK(1) ) | |||
IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -270,6 +271,8 @@ | |||
END IF | |||
* | |||
WORK( 1 ) = LWKOPT | |||
* | |||
RETURN | |||
* | |||
* End of CHESV_AA_2STAGE | |||
* | |||
@@ -93,6 +93,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -112,7 +113,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -125,6 +126,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -658,6 +660,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) | |||
* | |||
* Factor the band matrix | |||
CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) | |||
* | |||
RETURN | |||
* | |||
* End of CHETRF_AA_2STAGE | |||
* | |||
@@ -87,6 +87,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N. | |||
*> \endverbatim | |||
*> | |||
@@ -241,7 +241,7 @@ | |||
INFO = 10 | |||
END IF | |||
IF( INFO.NE.0 )THEN | |||
CALL XERBLA( 'SSYMV ', INFO ) | |||
CALL XERBLA( 'CLA_SYAMV', INFO ) | |||
RETURN | |||
END IF | |||
* | |||
@@ -142,6 +142,13 @@ | |||
CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) | |||
* .. | |||
* .. Executable Statements .. | |||
* | |||
* Quick return if possible | |||
* | |||
IF( N.NE.2 .AND. N.NE.3 ) THEN | |||
RETURN | |||
END IF | |||
* | |||
IF( N.EQ.2 ) THEN | |||
S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) | |||
IF( S.EQ.RZERO ) THEN | |||
@@ -221,9 +221,6 @@ | |||
LWKOPT_SYTRS = INT( WORK(1) ) | |||
LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) | |||
WORK( 1 ) = LWKOPT | |||
IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -105,6 +105,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -124,7 +125,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -150,6 +151,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -233,19 +235,18 @@ | |||
INFO = -3 | |||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |||
INFO = -5 | |||
ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -11 | |||
ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
CALL CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, | |||
$ IPIV2, WORK, -1, INFO ) | |||
LWKOPT = INT( WORK(1) ) | |||
IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -270,6 +271,8 @@ | |||
END IF | |||
* | |||
WORK( 1 ) = LWKOPT | |||
* | |||
RETURN | |||
* | |||
* End of CSYSV_AA_2STAGE | |||
* | |||
@@ -93,6 +93,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -112,7 +113,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -125,6 +126,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -662,6 +664,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) | |||
* | |||
* Factor the band matrix | |||
CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) | |||
* | |||
RETURN | |||
* | |||
* End of CSYTRF_AA_2STAGE | |||
* | |||
@@ -96,11 +96,11 @@ | |||
*> LWORK is INTEGER | |||
*> The dimension of the array WORK. | |||
*> WORK is size >= (N+NB+1)*(NB+3) | |||
*> If LDWORK = -1, then a workspace query is assumed; the routine | |||
*> If LWORK = -1, then a workspace query is assumed; the routine | |||
*> calculates: | |||
*> - the optimal size of the WORK array, returns | |||
*> this value as the first entry of the WORK array, | |||
*> - and no error message related to LDWORK is issued by XERBLA. | |||
*> - and no error message related to LWORK is issued by XERBLA. | |||
*> \endverbatim | |||
*> | |||
*> \param[out] INFO | |||
@@ -163,7 +163,7 @@ | |||
UPPER = LSAME( UPLO, 'U' ) | |||
LQUERY = ( LWORK.EQ.-1 ) | |||
* Get blocksize | |||
NBMAX = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) | |||
NBMAX = ILAENV( 1, 'CSYTRI2', UPLO, N, -1, -1, -1 ) | |||
IF ( NBMAX .GE. N ) THEN | |||
MINSIZE = N | |||
ELSE | |||
@@ -85,6 +85,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N. | |||
*> \endverbatim | |||
*> | |||
@@ -27,8 +27,8 @@ | |||
* .. | |||
* .. Array Arguments .. | |||
* LOGICAL SELECT( * ) | |||
* REAL RWORK( * ) | |||
* COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
* REAL RWORK( * ) | |||
* COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
* $ WORK( * ) | |||
* .. | |||
* | |||
@@ -258,17 +258,17 @@ | |||
* .. | |||
* .. Array Arguments .. | |||
LOGICAL SELECT( * ) | |||
REAL RWORK( * ) | |||
COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
REAL RWORK( * ) | |||
COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
$ WORK( * ) | |||
* .. | |||
* | |||
* ===================================================================== | |||
* | |||
* .. Parameters .. | |||
REAL ZERO, ONE | |||
REAL ZERO, ONE | |||
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) | |||
COMPLEX CZERO, CONE | |||
COMPLEX CZERO, CONE | |||
PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), | |||
$ CONE = ( 1.0E+0, 0.0E+0 ) ) | |||
INTEGER NBMIN, NBMAX | |||
@@ -277,13 +277,13 @@ | |||
* .. Local Scalars .. | |||
LOGICAL ALLV, BOTHV, LEFTV, LQUERY, OVER, RIGHTV, SOMEV | |||
INTEGER I, II, IS, J, K, KI, IV, MAXWRK, NB | |||
REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL | |||
COMPLEX CDUM | |||
REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL | |||
COMPLEX CDUM | |||
* .. | |||
* .. External Functions .. | |||
LOGICAL LSAME | |||
INTEGER ILAENV, ICAMAX | |||
REAL SLAMCH, SCASUM | |||
REAL SLAMCH, SCASUM | |||
EXTERNAL LSAME, ILAENV, ICAMAX, SLAMCH, SCASUM | |||
* .. | |||
* .. External Subroutines .. | |||
@@ -158,7 +158,7 @@ | |||
INTEGER I, IB, IINFO, K | |||
* .. | |||
* .. External Subroutines .. | |||
EXTERNAL DGEQRT2, DGELQT3, DGEQRT3, DLARFB, XERBLA | |||
EXTERNAL DGELQT3, DLARFB, XERBLA | |||
* .. | |||
* .. Executable Statements .. | |||
* | |||
@@ -230,7 +230,7 @@ | |||
INFO = 10 | |||
END IF | |||
IF( INFO.NE.0 )THEN | |||
CALL XERBLA( 'DSYMV ', INFO ) | |||
CALL XERBLA( 'DLA_SYAMV', INFO ) | |||
RETURN | |||
END IF | |||
* | |||
@@ -147,6 +147,13 @@ | |||
INTRINSIC ABS | |||
* .. | |||
* .. Executable Statements .. | |||
* | |||
* Quick return if possible | |||
* | |||
IF( N.NE.2 .AND. N.NE.3 ) THEN | |||
RETURN | |||
END IF | |||
* | |||
IF( N.EQ.2 ) THEN | |||
S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) | |||
IF( S.EQ.ZERO ) THEN | |||
@@ -221,9 +221,6 @@ | |||
LWKOPT_SYTRS = INT( WORK(1) ) | |||
LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) | |||
WORK( 1 ) = LWKOPT | |||
IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -107,6 +107,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -126,7 +127,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -152,6 +153,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -235,19 +237,18 @@ | |||
INFO = -3 | |||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |||
INFO = -5 | |||
ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -11 | |||
ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
CALL DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, | |||
$ IPIV2, WORK, -1, INFO ) | |||
LWKOPT = INT( WORK(1) ) | |||
IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -93,6 +93,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -109,6 +110,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -128,10 +130,10 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
*> row and column IPIV2(k). | |||
*> \endverbatim | |||
*> | |||
*> \param[out] INFO | |||
@@ -641,6 +643,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) | |||
* | |||
* Factor the band matrix | |||
CALL DGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) | |||
* | |||
RETURN | |||
* | |||
* End of DSYTRF_AA_2STAGE | |||
* | |||
@@ -96,11 +96,11 @@ | |||
*> LWORK is INTEGER | |||
*> The dimension of the array WORK. | |||
*> WORK is size >= (N+NB+1)*(NB+3) | |||
*> If LDWORK = -1, then a workspace query is assumed; the routine | |||
*> If LWORK = -1, then a workspace query is assumed; the routine | |||
*> calculates: | |||
*> - the optimal size of the WORK array, returns | |||
*> this value as the first entry of the WORK array, | |||
*> - and no error message related to LDWORK is issued by XERBLA. | |||
*> - and no error message related to LWORK is issued by XERBLA. | |||
*> \endverbatim | |||
*> | |||
*> \param[out] INFO | |||
@@ -163,7 +163,7 @@ | |||
UPPER = LSAME( UPLO, 'U' ) | |||
LQUERY = ( LWORK.EQ.-1 ) | |||
* Get blocksize | |||
NBMAX = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) | |||
NBMAX = ILAENV( 1, 'DSYTRI2', UPLO, N, -1, -1, -1 ) | |||
IF ( NBMAX .GE. N ) THEN | |||
MINSIZE = N | |||
ELSE | |||
@@ -85,6 +85,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N. | |||
*> \endverbatim | |||
*> | |||
@@ -45,9 +45,9 @@ | |||
*> The right eigenvector x and the left eigenvector y of T corresponding | |||
*> to an eigenvalue w are defined by: | |||
*> | |||
*> T*x = w*x, (y**H)*T = w*(y**H) | |||
*> T*x = w*x, (y**T)*T = w*(y**T) | |||
*> | |||
*> where y**H denotes the conjugate transpose of y. | |||
*> where y**T denotes the transpose of the vector y. | |||
*> The eigenvalues are not input to this routine, but are read directly | |||
*> from the diagonal blocks of T. | |||
*> | |||
@@ -104,13 +104,13 @@ | |||
*> | |||
*> \param[in] NAME | |||
*> \verbatim | |||
*> NAME is character string | |||
*> NAME is CHARACTER string | |||
*> Name of the calling subroutine | |||
*> \endverbatim | |||
*> | |||
*> \param[in] OPTS | |||
*> \verbatim | |||
*> OPTS is character string | |||
*> OPTS is CHARACTER string | |||
*> This is a concatenation of the string arguments to | |||
*> TTQRE. | |||
*> \endverbatim | |||
@@ -230,7 +230,7 @@ | |||
INFO = 10 | |||
END IF | |||
IF( INFO.NE.0 )THEN | |||
CALL XERBLA( 'SSYMV ', INFO ) | |||
CALL XERBLA( 'SLA_SYAMV', INFO ) | |||
RETURN | |||
END IF | |||
* | |||
@@ -147,6 +147,13 @@ | |||
INTRINSIC ABS | |||
* .. | |||
* .. Executable Statements .. | |||
* | |||
* Quick return if possible | |||
* | |||
IF( N.NE.2 .AND. N.NE.3 ) THEN | |||
RETURN | |||
END IF | |||
* | |||
IF( N.EQ.2 ) THEN | |||
S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) | |||
IF( S.EQ.ZERO ) THEN | |||
@@ -220,9 +220,6 @@ | |||
LWKOPT_SYTRS = INT( WORK(1) ) | |||
LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) | |||
WORK( 1 ) = LWKOPT | |||
IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -106,6 +106,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -125,7 +126,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -151,6 +152,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -234,19 +236,18 @@ | |||
INFO = -3 | |||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |||
INFO = -5 | |||
ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -11 | |||
ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
CALL SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, | |||
$ IPIV2, WORK, -1, INFO ) | |||
LWKOPT = INT( WORK(1) ) | |||
IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -93,6 +93,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -112,7 +113,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -125,6 +126,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -641,6 +643,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) | |||
* | |||
* Factor the band matrix | |||
CALL SGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) | |||
* | |||
RETURN | |||
* | |||
* End of SSYTRF_AA_2STAGE | |||
* | |||
@@ -96,11 +96,11 @@ | |||
*> LWORK is INTEGER | |||
*> The dimension of the array WORK. | |||
*> WORK is size >= (N+NB+1)*(NB+3) | |||
*> If LDWORK = -1, then a workspace query is assumed; the routine | |||
*> If LWORK = -1, then a workspace query is assumed; the routine | |||
*> calculates: | |||
*> - the optimal size of the WORK array, returns | |||
*> this value as the first entry of the WORK array, | |||
*> - and no error message related to LDWORK is issued by XERBLA. | |||
*> - and no error message related to LWORK is issued by XERBLA. | |||
*> \endverbatim | |||
*> | |||
*> \param[out] INFO | |||
@@ -85,6 +85,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N. | |||
*> \endverbatim | |||
*> | |||
@@ -27,7 +27,7 @@ | |||
* .. | |||
* .. Array Arguments .. | |||
* LOGICAL SELECT( * ) | |||
* REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
* REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
* $ WORK( * ) | |||
* .. | |||
* | |||
@@ -45,9 +45,9 @@ | |||
*> The right eigenvector x and the left eigenvector y of T corresponding | |||
*> to an eigenvalue w are defined by: | |||
*> | |||
*> T*x = w*x, (y**H)*T = w*(y**H) | |||
*> T*x = w*x, (y**T)*T = w*(y**T) | |||
*> | |||
*> where y**H denotes the conjugate transpose of y. | |||
*> where y**T denotes the transpose of the vector y. | |||
*> The eigenvalues are not input to this routine, but are read directly | |||
*> from the diagonal blocks of T. | |||
*> | |||
@@ -251,14 +251,14 @@ | |||
* .. | |||
* .. Array Arguments .. | |||
LOGICAL SELECT( * ) | |||
REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), | |||
$ WORK( * ) | |||
* .. | |||
* | |||
* ===================================================================== | |||
* | |||
* .. Parameters .. | |||
REAL ZERO, ONE | |||
REAL ZERO, ONE | |||
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) | |||
INTEGER NBMIN, NBMAX | |||
PARAMETER ( NBMIN = 8, NBMAX = 128 ) | |||
@@ -268,7 +268,7 @@ | |||
$ RIGHTV, SOMEV | |||
INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, | |||
$ IV, MAXWRK, NB, KI2 | |||
REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, | |||
REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, | |||
$ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, | |||
$ XNORM | |||
* .. | |||
@@ -704,7 +704,7 @@ | |||
LWSVDJ = MAX( 2 * N, 1 ) | |||
LWSVDJV = MAX( 2 * N, 1 ) | |||
* .. minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ | |||
LRWQP3 = N | |||
LRWQP3 = 2 * N | |||
LRWCON = N | |||
LRWSVDJ = N | |||
IF ( LQUERY ) THEN | |||
@@ -942,7 +942,7 @@ | |||
END IF | |||
END IF | |||
MINWRK = MAX( 2, MINWRK ) | |||
OPTWRK = MAX( 2, OPTWRK ) | |||
OPTWRK = MAX( MINWRK, OPTWRK ) | |||
IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 | |||
IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 | |||
END IF | |||
@@ -209,6 +209,8 @@ | |||
INFO = -5 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -8 | |||
ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
@@ -219,9 +221,6 @@ | |||
LWKOPT_HETRS = INT( WORK(1) ) | |||
LWKOPT = MAX( LWKOPT_HETRF, LWKOPT_HETRS ) | |||
WORK( 1 ) = LWKOPT | |||
IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -106,6 +106,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -125,7 +126,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -151,6 +152,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -240,19 +242,18 @@ | |||
INFO = -3 | |||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |||
INFO = -5 | |||
ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -11 | |||
ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
CALL ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, | |||
$ IPIV2, WORK, -1, INFO ) | |||
LWKOPT = INT( WORK(1) ) | |||
IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -93,6 +93,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -112,7 +113,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -125,6 +126,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -657,6 +659,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) | |||
* | |||
* Factor the band matrix | |||
CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) | |||
* | |||
RETURN | |||
* | |||
* End of ZHETRF_AA_2STAGE | |||
* | |||
@@ -69,7 +69,7 @@ | |||
*> | |||
*> \param[in] A | |||
*> \verbatim | |||
*> A is COMPLEX*16array, dimension (LDA,N) | |||
*> A is COMPLEX*16 array, dimension (LDA,N) | |||
*> Details of factors computed by ZHETRF_AA_2STAGE. | |||
*> \endverbatim | |||
*> | |||
@@ -81,12 +81,13 @@ | |||
*> | |||
*> \param[out] TB | |||
*> \verbatim | |||
*> TB is COMPLEX*16array, dimension (LTB) | |||
*> TB is COMPLEX*16 array, dimension (LTB) | |||
*> Details of factors computed by ZHETRF_AA_2STAGE. | |||
*> \endverbatim | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N. | |||
*> \endverbatim | |||
*> | |||
@@ -106,7 +107,7 @@ | |||
*> | |||
*> \param[in,out] B | |||
*> \verbatim | |||
*> B is COMPLEX*16array, dimension (LDB,NRHS) | |||
*> B is COMPLEX*16 array, dimension (LDB,NRHS) | |||
*> On entry, the right hand side matrix B. | |||
*> On exit, the solution matrix X. | |||
*> \endverbatim | |||
@@ -241,7 +241,7 @@ | |||
INFO = 10 | |||
END IF | |||
IF( INFO.NE.0 )THEN | |||
CALL XERBLA( 'DSYMV ', INFO ) | |||
CALL XERBLA( 'ZLA_SYAMV', INFO ) | |||
RETURN | |||
END IF | |||
* | |||
@@ -142,6 +142,13 @@ | |||
CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) | |||
* .. | |||
* .. Executable Statements .. | |||
* | |||
* Quick return if possible | |||
* | |||
IF( N.NE.2 .AND. N.NE.3 ) THEN | |||
RETURN | |||
END IF | |||
* | |||
IF( N.EQ.2 ) THEN | |||
S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) | |||
IF( S.EQ.RZERO ) THEN | |||
@@ -221,9 +221,6 @@ | |||
LWKOPT_SYTRS = INT( WORK(1) ) | |||
LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) | |||
WORK( 1 ) = LWKOPT | |||
IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN | |||
INFO = -10 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -105,6 +105,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -124,7 +125,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -150,6 +151,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -233,19 +235,18 @@ | |||
INFO = -3 | |||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |||
INFO = -5 | |||
ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN | |||
INFO = -11 | |||
ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
* | |||
IF( INFO.EQ.0 ) THEN | |||
CALL ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, | |||
$ IPIV2, WORK, -1, INFO ) | |||
LWKOPT = INT( WORK(1) ) | |||
IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN | |||
INFO = -7 | |||
ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN | |||
INFO = -13 | |||
END IF | |||
END IF | |||
* | |||
IF( INFO.NE.0 ) THEN | |||
@@ -93,6 +93,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N, internally | |||
*> used to select NB such that LTB >= (3*NB+1)*N. | |||
*> | |||
@@ -112,7 +113,7 @@ | |||
*> | |||
*> \param[out] IPIV2 | |||
*> \verbatim | |||
*> IPIV is INTEGER array, dimension (N) | |||
*> IPIV2 is INTEGER array, dimension (N) | |||
*> On exit, it contains the details of the interchanges, i.e., | |||
*> the row and column k of T were interchanged with the | |||
*> row and column IPIV(k). | |||
@@ -125,6 +126,7 @@ | |||
*> | |||
*> \param[in] LWORK | |||
*> \verbatim | |||
*> LWORK is INTEGER | |||
*> The size of WORK. LWORK >= N, internally used to select NB | |||
*> such that LWORK >= N*NB. | |||
*> | |||
@@ -662,6 +664,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) | |||
* | |||
* Factor the band matrix | |||
CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) | |||
* | |||
RETURN | |||
* | |||
* End of ZSYTRF_AA_2STAGE | |||
* | |||
@@ -163,7 +163,7 @@ | |||
UPPER = LSAME( UPLO, 'U' ) | |||
LQUERY = ( LWORK.EQ.-1 ) | |||
* Get blocksize | |||
NBMAX = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) | |||
NBMAX = ILAENV( 1, 'ZSYTRI2', UPLO, N, -1, -1, -1 ) | |||
IF ( NBMAX .GE. N ) THEN | |||
MINSIZE = N | |||
ELSE | |||
@@ -85,6 +85,7 @@ | |||
*> | |||
*> \param[in] LTB | |||
*> \verbatim | |||
*> LTB is INTEGER | |||
*> The size of the array TB. LTB >= 4*N. | |||
*> \endverbatim | |||
*> | |||
@@ -218,7 +218,7 @@ | |||
* .. | |||
* .. External Subroutines .. | |||
EXTERNAL ALAERH, ALAHD, ALASUM, DERRSY, DLACPY, DLARHS, | |||
$ DLATB4, DLATMS, DPOT02, DSYTRF_AA_2STAGE | |||
$ DLATB4, DLATMS, DPOT02, DSYTRF_AA_2STAGE, | |||
$ DSYTRS_AA_2STAGE, XLAENV | |||
* .. | |||
* .. Intrinsic Functions .. | |||
@@ -204,7 +204,7 @@ | |||
* .. External Subroutines .. | |||
EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, DERRVX, | |||
$ DGET04, DLACPY, DLARHS, DLATB4, DLATMS, | |||
$ DSYSV_AA_2STAGE, CHET01_AA, DPOT02, | |||
$ DSYSV_AA_2STAGE, DPOT02, | |||
$ DSYTRF_AA_2STAGE | |||
* .. | |||
* .. Scalars in Common .. | |||
@@ -203,7 +203,7 @@ | |||
* .. | |||
* .. External Subroutines .. | |||
EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, SERRVX, | |||
$ CGET04, SLACPY, SLARHS, SLATB4, SLATMS, | |||
$ SLACPY, SLARHS, SLATB4, SLATMS, | |||
$ SSYSV_AA_2STAGE, SSYT01_AA, SPOT02, | |||
$ SSYTRF_AA_2STAGE | |||
* .. | |||
@@ -217,8 +217,8 @@ | |||
DOUBLE PRECISION RESULT( NTESTS ) | |||
* .. | |||
* .. External Subroutines .. | |||
EXTERNAL ALAERH, ALAHD, ALASUM, CERRSY, ZLACPY, ZLARHS, | |||
$ CLATB4, ZLATMS, ZSYT02, ZSYT01, | |||
EXTERNAL ALAERH, ALAHD, ALASUM, ZERRSY, ZLACPY, ZLARHS, | |||
$ ZLATB4, ZLATMS, ZSYT02, ZSYT01, | |||
$ ZSYTRF_AA_2STAGE, ZSYTRS_AA_2STAGE, | |||
$ XLAENV | |||
* .. | |||
@@ -257,16 +257,16 @@ for dtype in range_prec: | |||
else: | |||
if dtest==16: | |||
# LIN TESTS | |||
cmdbase="xlintst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
cmdbase="LIN/xlintst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
elif dtest==17: | |||
# PROTO LIN TESTS | |||
cmdbase="xlintst"+letter+dtypes[0][dtype-1]+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
cmdbase="LIN/xlintst"+letter+dtypes[0][dtype-1]+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
elif dtest==18: | |||
# PROTO LIN TESTS | |||
cmdbase="xlintstrf"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
cmdbase="LIN/xlintstrf"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
else: | |||
# EIG TESTS | |||
cmdbase="xeigtst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
cmdbase="EIG/xeigtst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" | |||
if (not just_errors and not short_summary): | |||
print("Testing "+name+" "+dtests[1][dtest]+"-"+cmdbase, end=' ') | |||
# Run the process: either to read the file or run the LAPACK testing | |||