| @@ -769,6 +769,9 @@ else | |||
| FCOMMON_OPT += -m32 | |||
| endif | |||
| endif | |||
| ifneq ($(NO_LAPACKE), 1) | |||
| FCOMMON_OPT += -fno-second-underscore | |||
| endif | |||
| endif | |||
| endif | |||
| @@ -73,7 +73,7 @@ if (DYNAMIC_ARCH) | |||
| endif () | |||
| if (NOT NO_AVX512) | |||
| set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX) | |||
| string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) | |||
| string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | |||
| endif () | |||
| if (DYNAMIC_LIST) | |||
| set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST}) | |||
| @@ -78,7 +78,18 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||
| #define BLAS_LOCK_DEFINED | |||
| #if !defined(OS_DARWIN) && !defined (OS_ANDROID) | |||
| static __inline BLASULONG rpcc(void){ | |||
| BLASULONG ret = 0; | |||
| __asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret)); | |||
| return ret; | |||
| } | |||
| #define RPCC_DEFINED | |||
| #define RPCC64BIT | |||
| #endif | |||
| static inline int blas_quickdivide(blasint x, blasint y){ | |||
| return x / y; | |||
| @@ -194,10 +194,6 @@ int trsm_thread(int mode, BLASLONG m, BLASLONG n, | |||
| int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG); | |||
| int beta_thread(int mode, BLASLONG m, BLASLONG n, | |||
| double alpha_r, double alpha_i, | |||
| void *c, BLASLONG ldc, int (*fuction)()); | |||
| int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, | |||
| void *offsetA, BLASLONG lda, | |||
| void *offsetB, BLASLONG jb, | |||
| @@ -1197,7 +1197,11 @@ int get_cpuname(void){ | |||
| case 3: | |||
| case 5: | |||
| case 6: | |||
| #if defined(__x86_64__) || defined(__amd64__) | |||
| return CPUTYPE_CORE2; | |||
| #else | |||
| return CPUTYPE_PENTIUM2; | |||
| #endif | |||
| case 7: | |||
| case 8: | |||
| case 10: | |||
| @@ -1379,6 +1383,8 @@ int get_cpuname(void){ | |||
| break; | |||
| case 7: // family 6 exmodel 7 | |||
| switch (model) { | |||
| case 10: // Goldmont Plus | |||
| return CPUTYPE_NEHALEM; | |||
| case 14: // Ice Lake | |||
| if(support_avx512()) | |||
| return CPUTYPE_SKYLAKEX; | |||
| @@ -129,7 +129,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <unistd.h> | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||
| #include <sys/sysctl.h> | |||
| #include <sys/resource.h> | |||
| #endif | |||
| @@ -192,7 +192,7 @@ void goto_set_num_threads(int num_threads) {}; | |||
| #else | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) | |||
| #ifndef NO_AFFINITY | |||
| int get_num_procs(void); | |||
| #else | |||
| @@ -312,7 +312,7 @@ int get_num_procs(void) { | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) | |||
| int get_num_procs(void) { | |||
| @@ -404,7 +404,7 @@ extern int openblas_goto_num_threads_env(); | |||
| extern int openblas_omp_num_threads_env(); | |||
| int blas_get_cpu_number(void){ | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| int max_num; | |||
| #endif | |||
| int blas_goto_num = 0; | |||
| @@ -412,7 +412,7 @@ int blas_get_cpu_number(void){ | |||
| if (blas_num_threads) return blas_num_threads; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| max_num = get_num_procs(); | |||
| #endif | |||
| @@ -436,7 +436,7 @@ int blas_get_cpu_number(void){ | |||
| else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | |||
| else blas_num_threads = MAX_CPU_NUMBER; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| if (blas_num_threads > max_num) blas_num_threads = max_num; | |||
| #endif | |||
| @@ -1673,7 +1673,7 @@ void gotoblas_dummy_for_PGI(void) { | |||
| #include <sys/resource.h> | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) | |||
| #include <sys/sysctl.h> | |||
| #include <sys/resource.h> | |||
| #endif | |||
| @@ -1736,7 +1736,7 @@ void goto_set_num_threads(int num_threads) {}; | |||
| #else | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) || defined(OS_NETBSD) | |||
| #if defined(OS_LINUX) || defined(OS_SUNOS) | |||
| #ifndef NO_AFFINITY | |||
| int get_num_procs(void); | |||
| #else | |||
| @@ -1855,7 +1855,7 @@ int get_num_procs(void) { | |||
| #endif | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) | |||
| #if defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) | |||
| int get_num_procs(void) { | |||
| @@ -1945,7 +1945,7 @@ extern int openblas_goto_num_threads_env(); | |||
| extern int openblas_omp_num_threads_env(); | |||
| int blas_get_cpu_number(void){ | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| int max_num; | |||
| #endif | |||
| int blas_goto_num = 0; | |||
| @@ -1953,7 +1953,7 @@ int blas_get_cpu_number(void){ | |||
| if (blas_num_threads) return blas_num_threads; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| max_num = get_num_procs(); | |||
| #endif | |||
| @@ -1977,7 +1977,7 @@ int blas_get_cpu_number(void){ | |||
| else if (blas_omp_num > 0) blas_num_threads = blas_omp_num; | |||
| else blas_num_threads = MAX_CPU_NUMBER; | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLY) || defined(OS_DARWIN) || defined(OS_ANDROID) | |||
| if (blas_num_threads > max_num) blas_num_threads = max_num; | |||
| #endif | |||
| @@ -618,19 +618,6 @@ | |||
| # functions added for lapack-3.7.0 | |||
| slarfy, | |||
| slasyf_rk, | |||
| ssyconvf_rook, | |||
| ssytf2_rk, | |||
| ssytrf_rk, | |||
| ssytrs_3, | |||
| ssytri_3, | |||
| ssytri_3x, | |||
| ssycon_3, | |||
| ssysv_rk, | |||
| slasyf_aa, | |||
| ssysv_aa, | |||
| ssytrf_aa, | |||
| ssytrs_aa, | |||
| strevc3, | |||
| sgelqt, | |||
| sgelqt3, | |||
| @@ -647,33 +634,8 @@ | |||
| stplqt, | |||
| stplqt2, | |||
| stpmlqt, | |||
| ssytrd_2stage, | |||
| ssytrd_sy2sb, | |||
| ssytrd_sb2st, | |||
| ssb2st_kernels, | |||
| ssyevd_2stage, | |||
| ssyev_2stage, | |||
| ssyevx_2stage, | |||
| ssyevr_2stage, | |||
| ssbev_2stage, | |||
| ssbevx_2stage, | |||
| ssbevd_2stage, | |||
| ssygv_2stage, | |||
| dlarfy, | |||
| dlasyf_rk, | |||
| dsyconvf, | |||
| dsyconvf_rook, | |||
| dsytf2_rk, | |||
| dsytrf_rk, | |||
| dsytrs_3, | |||
| dsytri_3, | |||
| dsytri_3x, | |||
| dsycon_3, | |||
| dsysv_rk, | |||
| dlasyf_aa, | |||
| dsysv_aa, | |||
| dsytrf_aa, | |||
| dsytrs_aa, | |||
| dtrevc3, | |||
| dgelqt, | |||
| dgelqt3, | |||
| @@ -690,45 +652,8 @@ | |||
| dtplqt, | |||
| dtplqt2, | |||
| dtpmlqt, | |||
| dsytrd_2stage, | |||
| dsytrd_sy2sb, | |||
| dsytrd_sb2st, | |||
| dsb2st_kernels, | |||
| dsyevd_2stage, | |||
| dsyev_2stage, | |||
| dsyevx_2stage, | |||
| dsyevr_2stage, | |||
| dsbev_2stage, | |||
| dsbevx_2stage, | |||
| dsbevd_2stage, | |||
| dsygv_2stage, | |||
| chetf2_rk, | |||
| chetrf_rk, | |||
| chetri_3, | |||
| chetri_3x, | |||
| chetrs_3, | |||
| checon_3, | |||
| chesv_rk, | |||
| chesv_aa, | |||
| chetrf_aa, | |||
| chetrs_aa, | |||
| clahef_aa, | |||
| clahef_rk, | |||
| clarfy, | |||
| clasyf_rk, | |||
| clasyf_aa, | |||
| csyconvf, | |||
| csyconvf_rook, | |||
| csytf2_rk, | |||
| csytrf_rk, | |||
| csytrf_aa, | |||
| csytrs_3, | |||
| csytrs_aa, | |||
| csytri_3, | |||
| csytri_3x, | |||
| csycon_3, | |||
| csysv_rk, | |||
| csysv_aa, | |||
| ctrevc3, | |||
| cgelqt, | |||
| cgelqt3, | |||
| @@ -745,45 +670,8 @@ | |||
| ctplqt, | |||
| ctplqt2, | |||
| ctpmlqt, | |||
| chetrd_2stage, | |||
| chetrd_he2hb, | |||
| chetrd_hb2st, | |||
| chb2st_kernels, | |||
| cheevd_2stage, | |||
| cheev_2stage, | |||
| cheevx_2stage, | |||
| cheevr_2stage, | |||
| chbev_2stage, | |||
| chbevx_2stage, | |||
| chbevd_2stage, | |||
| chegv_2stage, | |||
| zhetf2_rk, | |||
| zhetrf_rk, | |||
| zhetri_3, | |||
| zhetri_3x, | |||
| zhetrs_3, | |||
| zhecon_3, | |||
| zhesv_rk, | |||
| zhesv_aa, | |||
| zhetrf_aa, | |||
| zhetrs_aa, | |||
| zlahef_aa, | |||
| zlahef_rk, | |||
| zlarfy, | |||
| zlasyf_rk, | |||
| zlasyf_aa, | |||
| zsyconvf, | |||
| zsyconvf_rook, | |||
| zsytrs_aa, | |||
| zsytf2_rk, | |||
| zsytrf_rk, | |||
| zsytrf_aa, | |||
| zsytrs_3, | |||
| zsytri_3, | |||
| zsytri_3x, | |||
| zsycon_3, | |||
| zsysv_rk, | |||
| zsysv_aa, | |||
| ztrevc3, | |||
| ztplqt, | |||
| ztplqt2, | |||
| @@ -800,43 +688,13 @@ | |||
| zlaswlq, | |||
| zlamswlq, | |||
| zgemlq, | |||
| zhetrd_2stage, | |||
| zhetrd_he2hb, | |||
| zhetrd_hb2st, | |||
| zhb2st_kernels, | |||
| zheevd_2stage, | |||
| zheev_2stage, | |||
| zheevx_2stage, | |||
| zheevr_2stage, | |||
| zhbev_2stage, | |||
| zhbevx_2stage, | |||
| zhbevd_2stage, | |||
| zhegv_2stage, | |||
| sladiv1, | |||
| dladiv1, | |||
| iparam2stage, | |||
| # functions added for lapack-3.8.0 | |||
| ilaenv2stage, | |||
| ssysv_aa_2stage, | |||
| ssytrf_aa_2stage, | |||
| ssytrs_aa_2stage, | |||
| chesv_aa_2stage, | |||
| chetrf_aa_2stage, | |||
| chetrs_aa_2stage, | |||
| csysv_aa_2stage, | |||
| csytrf_aa_2stage, | |||
| csytrs_aa_2stage, | |||
| dsysv_aa_2stage, | |||
| dsytrf_aa_2stage, | |||
| dsytrs_aa_2stage, | |||
| zhesv_aa_2stage, | |||
| zhetrf_aa_2stage, | |||
| zhetrs_aa_2stage, | |||
| zsysv_aa_2stage, | |||
| zsytrf_aa_2stage, | |||
| zsytrs_aa_2stage | |||
| ilaenv2stage | |||
| ); | |||
| @lapack_extendedprecision_objs = ( | |||
| @@ -3509,6 +3367,59 @@ | |||
| zlahef_rook, zlasyf_rook, | |||
| zsytf2_rook, zsytrf_rook, zsytrs_rook, | |||
| zsytri_rook, zsycon_rook, zsysv_rook, | |||
| # 3.7.0 | |||
| slasyf_rk, ssyconvf_rook, ssytf2_rk, | |||
| ssytrf_rk, ssytrs_3, ssytri_3, | |||
| ssytri_3x, ssycon_3, ssysv_rk, | |||
| slasyf_aa, ssysv_aa, ssytrf_aa, | |||
| ssytrs_aa, ssytrd_2stage, ssytrd_sy2sb, | |||
| ssytrd_sb2st, ssb2st_kernels, ssyevd_2stage, | |||
| ssyev_2stage, ssyevx_2stage, ssyevr_2stage, | |||
| ssbev_2stage, ssbevx_2stage, ssbevd_2stage, | |||
| ssygv_2stage, dlasyf_rk, dsyconvf_rook, | |||
| dsytf2_rk, dsytrf_rk, dsytrs_3, | |||
| dsytri_3, dsytri_3x, dsycon_3, | |||
| dsysv_rk, dlasyf_aa, dsysv_aa, | |||
| dsytrf_aa, dsytrs_aa, dsytrd_2stage, | |||
| dsytrd_sy2sb, dsytrd_sb2st, dsb2st_kernels, | |||
| dsyevd_2stage, dsyev_2stage, dsyevx_2stage, | |||
| dsyevr_2stage, dsbev_2stage, dsbevx_2stage, | |||
| dsbevd_2stage, dsygv_2stage, chetf2_rk, | |||
| chetrf_rk, chetri_3, chetri_3x, | |||
| chetrs_3, checon_3, chesv_rk, | |||
| chesv_aa, chetrf_aa, chetrs_aa, | |||
| clahef_aa, clahef_rk, clasyf_rk, | |||
| clasyf_aa, csytf2_rk, csytrf_rk, | |||
| csytrf_aa, csytrs_3, csytrs_aa, | |||
| csytri_3, csytri_3x, csycon_3, | |||
| csysv_rk, csysv_aa, csyconvf_rook, | |||
| chetrd_2stage, chetrd_he2hb, chetrd_hb2st, | |||
| chb2st_kernels, cheevd_2stage, cheev_2stage, | |||
| cheevx_2stage, cheevr_2stage, chbev_2stage, | |||
| chbevx_2stage, chbevd_2stage, chegv_2stage, | |||
| zhetf2_rk, zhetrf_rk, zhetri_3, | |||
| zhetri_3x, zhetrs_3, zhecon_3, | |||
| zhesv_rk, zhesv_aa, zhetrf_aa, | |||
| zhetrs_aa, zlahef_aa, zlahef_rk, | |||
| zlasyf_rk, zlasyf_aa, zsyconvf_rook, | |||
| zsytrs_aa, zsytf2_rk, zsytrf_rk, | |||
| zsytrf_aa, zsytrs_3, zsytri_3, | |||
| zsytri_3x, zsycon_3, zsysv_rk, | |||
| zsysv_aa, zhetrd_2stage, zhetrd_he2hb, | |||
| zhetrd_hb2st, zhb2st_kernels, zheevd_2stage, | |||
| zheev_2stage, zheevx_2stage, zheevr_2stage, | |||
| zhbev_2stage, zhbevx_2stage, zhbevd_2stage, | |||
| zhegv_2stage, | |||
| # 3.8.0 | |||
| ssysv_aa_2stage, ssytrf_aa_2stage, | |||
| ssytrs_aa_2stage, chesv_aa_2stage, | |||
| chetrf_aa_2stage, chetrs_aa_2stage, | |||
| csysv_aa_2stage, csytrf_aa_2stage, | |||
| csytrs_aa_2stage, dsysv_aa_2stage, | |||
| dsytrf_aa_2stage, dsytrs_aa_2stage, | |||
| zhesv_aa_2stage, zhetrf_aa_2stage, | |||
| zhetrs_aa_2stage, zsysv_aa_2stage, | |||
| zsytrf_aa_2stage, zsytrs_aa_2stage | |||
| ); | |||
| @@ -19,7 +19,7 @@ $nofortran = 0; | |||
| $compiler = join(" ", @ARGV); | |||
| $compiler_bin = shift(@ARGV); | |||
| # f77 is too ambiguous | |||
| $compiler = "" if $compiler eq "f77"; | |||
| @@ -130,6 +130,11 @@ if ($compiler eq "") { | |||
| if ($data =~ / zho_ge__/) { | |||
| $need2bu = 1; | |||
| } | |||
| if ($vendor =~ /G95/) { | |||
| if ($ENV{NO_LAPACKE} != 1) { | |||
| $need2bu = ""; | |||
| } | |||
| } | |||
| } | |||
| if ($vendor eq "") { | |||
| @@ -277,6 +282,8 @@ $linker_a = ""; | |||
| if ($link ne "") { | |||
| $link =~ s/\-Y\sP\,/\-Y/g; | |||
| $link =~ s/\-R\s*/\-rpath\@/g; | |||
| $link =~ s/\-rpath\s+/\-rpath\@/g; | |||
| @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #ifdef OS_WINDOWS | |||
| #include <windows.h> | |||
| #endif | |||
| #if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__) | |||
| #if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) | |||
| #include <sys/types.h> | |||
| #include <sys/sysctl.h> | |||
| #endif | |||
| @@ -1201,7 +1201,7 @@ static int get_num_cores(void) { | |||
| #ifdef OS_WINDOWS | |||
| SYSTEM_INFO sysinfo; | |||
| #elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__) | |||
| #elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) | |||
| int m[2], count; | |||
| size_t len; | |||
| #endif | |||
| @@ -1215,7 +1215,7 @@ static int get_num_cores(void) { | |||
| GetSystemInfo(&sysinfo); | |||
| return sysinfo.dwNumberOfProcessors; | |||
| #elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__APPLE__) | |||
| #elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) | |||
| m[0] = CTL_HW; | |||
| m[1] = HW_NCPU; | |||
| len = sizeof(int); | |||