| @@ -91,3 +91,4 @@ benchmark/*.goto | |||
| benchmark/smallscaling | |||
| CMakeCache.txt | |||
| CMakeFiles/* | |||
| .vscode | |||
| @@ -212,7 +212,8 @@ Please note that it is not possible to combine support for different architectur | |||
| - **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>. | |||
| - **AIX**: Supported on PPC up to POWER8 | |||
| - **Haiku**: Supported by the community. We don't actively test the library on this OS. | |||
| - **SunOS**: Supported by the community. We don't actively test the library on this OS: | |||
| - **SunOS**: Supported by the community. We don't actively test the library on this OS. | |||
| - **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>. | |||
| ## Usage | |||
| @@ -30,10 +30,11 @@ environment: | |||
| CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 | |||
| matrix: | |||
| - COMPILER: clang-cl | |||
| WITH_FORTRAN: yes | |||
| WITH_FORTRAN: ON | |||
| USE_OPENMP: ON | |||
| - COMPILER: clang-cl | |||
| DYNAMIC_ARCH: ON | |||
| WITH_FORTRAN: no | |||
| WITH_FORTRAN: OFF | |||
| - COMPILER: cl | |||
| - COMPILER: MinGW64-gcc-7.2.0-mingw | |||
| DYNAMIC_ARCH: OFF | |||
| @@ -47,12 +48,7 @@ environment: | |||
| install: | |||
| - if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat | |||
| - if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force | |||
| - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake | |||
| - if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja | |||
| - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja | |||
| - if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang | |||
| - if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1 | |||
| - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 | |||
| - if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" | |||
| - if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" | |||
| @@ -68,8 +64,9 @@ before_build: | |||
| - if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 .. | |||
| - if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. | |||
| - if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. | |||
| - if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON .. | |||
| - if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. | |||
| - if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON .. | |||
| - if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. | |||
| - if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON .. | |||
| - if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' .. | |||
| build_script: | |||
| @@ -148,16 +148,20 @@ endif () | |||
| include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") | |||
| if (DEFINED TARGET) | |||
| if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512) | |||
| # if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||
| if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) | |||
| if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 10.09) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") | |||
| else() | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||
| endif() | |||
| # elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||
| # set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||
| # endif() | |||
| elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") | |||
| if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") | |||
| else() | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||
| endif() | |||
| endif() | |||
| endif() | |||
| if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) | |||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||
| @@ -233,6 +237,11 @@ if (BINARY64) | |||
| endif () | |||
| endif () | |||
| if(EMBEDDED) | |||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DOS_EMBEDDED") | |||
| set(CCOMMON_OPT "${CCOMMON_OPT} -mthumb -mcpu=cortex-m4 -mfloat-abi=hard -mfpu=fpv4-sp-d16") | |||
| endif() | |||
| if (NEED_PIC) | |||
| if (${CMAKE_C_COMPILER} STREQUAL "IBM") | |||
| set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large") | |||
| @@ -122,7 +122,7 @@ extern "C" { | |||
| #define ATOM GOTO_ATOM | |||
| #undef GOTO_ATOM | |||
| #endif | |||
| #else | |||
| #elif !defined(OS_EMBEDDED) | |||
| #include <sys/mman.h> | |||
| #ifndef NO_SYSV_IPC | |||
| #include <sys/shm.h> | |||
| @@ -134,6 +134,9 @@ extern "C" { | |||
| #if defined(SMP) || defined(USE_LOCKING) | |||
| #include <pthread.h> | |||
| #endif | |||
| #else | |||
| #include <time.h> | |||
| #include <math.h> | |||
| #endif | |||
| #if defined(OS_SUNOS) | |||
| @@ -488,10 +491,12 @@ static inline unsigned long long rpcc(void){ | |||
| struct timespec ts; | |||
| clock_gettime(CLOCK_MONOTONIC, &ts); | |||
| return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec; | |||
| #else | |||
| #elif !defined(OS_EMBEDDED) | |||
| struct timeval tv; | |||
| gettimeofday(&tv,NULL); | |||
| return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000; | |||
| #else | |||
| return 0; | |||
| #endif | |||
| } | |||
| #define RPCC_DEFINED | |||
| @@ -521,6 +526,10 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||
| #include "common_linux.h" | |||
| #endif | |||
| #ifdef OS_EMBEDDED | |||
| #define DTB_DEFAULT_ENTRIES 64 | |||
| #endif | |||
| #define MMAP_ACCESS (PROT_READ | PROT_WRITE) | |||
| #ifdef __NetBSD__ | |||
| @@ -1668,16 +1668,23 @@ void gotoblas_dummy_for_PGI(void) { | |||
| #ifndef MEM_LARGE_PAGES | |||
| #define MEM_LARGE_PAGES 0x20000000 | |||
| #endif | |||
| #else | |||
| #elif !defined(OS_EMBEDDED) | |||
| #define ALLOC_MMAP | |||
| #define ALLOC_MALLOC | |||
| #else | |||
| #define ALLOC_MALLOC | |||
| inline int puts(const char *str) { return 0; } | |||
| inline int printf(const char *format, ...) { return 0; } | |||
| inline char *getenv(const char *name) { return ""; } | |||
| inline int atoi(const char *str) { return 0; } | |||
| #endif | |||
| #include <stdlib.h> | |||
| #include <stdio.h> | |||
| #include <fcntl.h> | |||
| #if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT) | |||
| #if (!defined(OS_WINDOWS) || defined(OS_CYGWIN_NT)) && !defined(OS_EMBEDDED) | |||
| #include <sys/mman.h> | |||
| #ifndef NO_SYSV_IPC | |||
| #include <sys/shm.h> | |||
| @@ -1634,10 +1634,10 @@ cblas_srotg.$(SUFFIX) cblas_srotg.$(PSUFFIX): rotg.c | |||
| cblas_drotg.$(SUFFIX) cblas_drotg.$(PSUFFIX): rotg.c | |||
| $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
| cblas_crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c | |||
| cblas_crotg.$(SUFFIX) cblas_crotg.$(PSUFFIX): zrotg.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| cblas_zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c | |||
| cblas_zrotg.$(SUFFIX) cblas_zrotg.$(PSUFFIX): zrotg.c | |||
| $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) | |||
| cblas_srotm.$(SUFFIX) cblas_srotm.$(PSUFFIX): rotm.c | |||
| @@ -0,0 +1,176 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2021, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #define HAVE_KERNEL_8 1 | |||
| static void zscal_kernel_8 (long n, float *x, float alpha_r, float alpha_i) | |||
| { | |||
| __vector float t0 = {-alpha_i, alpha_i, -alpha_i, alpha_i}; | |||
| __vector unsigned char mask = { 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4}; | |||
| __asm__ | |||
| ( | |||
| "dcbt 0, %2 \n\t" | |||
| "xscvdpspn 32, %x3 \n\t" | |||
| "xxspltw 32, 32, 0 \n\t" | |||
| "lxvp 40, 0(%2) \n\t" | |||
| "lxvp 42, 32(%2) \n\t" | |||
| "lxvp 44, 64(%2) \n\t" | |||
| "lxvp 46, 96(%2) \n\t" | |||
| "addic. %1, %1, -16 \n\t" | |||
| "ble two%= \n\t" | |||
| ".align 5 \n" | |||
| "one%=: \n\t" | |||
| "xvmulsp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r | |||
| "xvmulsp 49, 41, 32 \n\t" | |||
| "xvmulsp 50, 42, 32 \n\t" | |||
| "xvmulsp 51, 43, 32 \n\t" | |||
| "xvmulsp 52, 44, 32 \n\t" | |||
| "xvmulsp 53, 45, 32 \n\t" | |||
| "xvmulsp 54, 46, 32 \n\t" | |||
| "xvmulsp 55, 47, 32 \n\t" | |||
| "xxperm 34, 40, %x5 \n\t" | |||
| "xxperm 35, 41, %x5 \n\t" | |||
| "xxperm 36, 42, %x5 \n\t" | |||
| "xxperm 37, 43, %x5 \n\t" | |||
| "xxperm 38, 44, %x5 \n\t" | |||
| "xxperm 39, 45, %x5 \n\t" | |||
| "xxperm 56, 46, %x5 \n\t" | |||
| "xxperm 57, 47, %x5 \n\t" | |||
| "xvmulsp 34, 34, %x4 \n\t" // x0_i * -alpha_i, x0_r * alpha_i | |||
| "xvmulsp 35, 35, %x4 \n\t" | |||
| "lxvp 40, 128(%2) \n\t" | |||
| "xvmulsp 36, 36, %x4 \n\t" | |||
| "xvmulsp 37, 37, %x4 \n\t" | |||
| "lxvp 42, 160(%2) \n\t" | |||
| "xvmulsp 38, 38, %x4 \n\t" | |||
| "xvmulsp 39, 39, %x4 \n\t" | |||
| "lxvp 44, 192(%2) \n\t" | |||
| "xvmulsp 56, 56, %x4 \n\t" | |||
| "xvmulsp 57, 57, %x4 \n\t" | |||
| "lxvp 46, 224(%2) \n\t" | |||
| "xvaddsp 48, 48, 34 \n\t" | |||
| "xvaddsp 49, 49, 35 \n\t" | |||
| "xvaddsp 50, 50, 36 \n\t" | |||
| "xvaddsp 51, 51, 37 \n\t" | |||
| "stxvp 48, 0(%2) \n\t" | |||
| "xvaddsp 52, 52, 38 \n\t" | |||
| "xvaddsp 53, 53, 39 \n\t" | |||
| "stxvp 50, 32(%2) \n\t" | |||
| "xvaddsp 54, 54, 56 \n\t" | |||
| "xvaddsp 55, 55, 57 \n\t" | |||
| "stxvp 52, 64(%2) \n\t" | |||
| "stxvp 54, 96(%2) \n\t" | |||
| "addi %2, %2, 128 \n\t" | |||
| "addic. %1, %1, -16 \n\t" | |||
| "bgt one%= \n" | |||
| "two%=: \n\t" | |||
| "xvmulsp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r | |||
| "xvmulsp 49, 41, 32 \n\t" | |||
| "xvmulsp 50, 42, 32 \n\t" | |||
| "xvmulsp 51, 43, 32 \n\t" | |||
| "xvmulsp 52, 44, 32 \n\t" | |||
| "xvmulsp 53, 45, 32 \n\t" | |||
| "xvmulsp 54, 46, 32 \n\t" | |||
| "xvmulsp 55, 47, 32 \n\t" | |||
| "xxperm 34, 40, %x5 \n\t" | |||
| "xxperm 35, 41, %x5 \n\t" | |||
| "xxperm 36, 42, %x5 \n\t" | |||
| "xxperm 37, 43, %x5 \n\t" | |||
| "xxperm 38, 44, %x5 \n\t" | |||
| "xxperm 39, 45, %x5 \n\t" | |||
| "xxperm 56, 46, %x5 \n\t" | |||
| "xxperm 57, 47, %x5 \n\t" | |||
| "xvmulsp 34, 34, %x4 \n\t" // x0_i * -alpha_i, x0_r * alpha_i | |||
| "xvmulsp 35, 35, %x4 \n\t" | |||
| "xvmulsp 36, 36, %x4 \n\t" | |||
| "xvmulsp 37, 37, %x4 \n\t" | |||
| "xvmulsp 38, 38, %x4 \n\t" | |||
| "xvmulsp 39, 39, %x4 \n\t" | |||
| "xvmulsp 56, 56, %x4 \n\t" | |||
| "xvmulsp 57, 57, %x4 \n\t" | |||
| "xvaddsp 48, 48, 34 \n\t" | |||
| "xvaddsp 49, 49, 35 \n\t" | |||
| "xvaddsp 50, 50, 36 \n\t" | |||
| "xvaddsp 51, 51, 37 \n\t" | |||
| "stxvp 48, 0(%2) \n\t" | |||
| "xvaddsp 52, 52, 38 \n\t" | |||
| "xvaddsp 53, 53, 39 \n\t" | |||
| "stxvp 50, 32(%2) \n\t" | |||
| "xvaddsp 54, 54, 56 \n\t" | |||
| "xvaddsp 55, 55, 57 \n\t" | |||
| "stxvp 52, 64(%2) \n\t" | |||
| "stxvp 54, 96(%2) \n\t" | |||
| "#n=%1 x=%0=%2 alpha=(%3,%4)\n" | |||
| : | |||
| "+m" (*x), | |||
| "+r" (n), // 1 | |||
| "+b" (x) // 2 | |||
| : | |||
| "f" (alpha_r), // 3 | |||
| "wa" (t0), // 4 | |||
| "wa" (mask) // 5 | |||
| : | |||
| "cr0", | |||
| "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", | |||
| "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47", | |||
| "vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55", | |||
| "vs56","vs57" | |||
| ); | |||
| } | |||
| @@ -38,11 +38,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #pragma GCC optimize "O1" | |||
| #if defined(POWER8) || defined(POWER9) || defined(POWER10) | |||
| #if defined(__VEC__) || defined(__ALTIVEC__) | |||
| #if defined(POWER8) || defined(POWER9) | |||
| #if defined(DOUBLE) | |||
| #include "zscal_microk_power8.c" | |||
| #endif | |||
| #elif defined(POWER10) | |||
| #if defined(DOUBLE) | |||
| #include "zscal_microk_power8.c" | |||
| #else | |||
| #include "cscal_microk_power10.c" | |||
| #endif | |||
| #endif | |||
| #endif | |||
| @@ -145,7 +151,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F | |||
| { | |||
| #if defined(DOUBLE) | |||
| n1 = n & -8; | |||
| #else | |||
| n1 = n & -16; | |||
| #endif | |||
| if ( n1 > 0 ) | |||
| { | |||
| zscal_kernel_8(n1, x, da_r, da_i); | |||
| @@ -320,12 +320,13 @@ | |||
| $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP | |||
| COMPLEX ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, | |||
| $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, | |||
| $ U12, X | |||
| $ U12, X, ABI12, Y | |||
| * .. | |||
| * .. External Functions .. | |||
| COMPLEX CLADIV | |||
| LOGICAL LSAME | |||
| REAL CLANHS, SLAMCH | |||
| EXTERNAL LSAME, CLANHS, SLAMCH | |||
| EXTERNAL CLADIV, LLSAME, CLANHS, SLAMCH | |||
| * .. | |||
| * .. External Subroutines .. | |||
| EXTERNAL CLARTG, CLASET, CROT, CSCAL, XERBLA | |||
| @@ -729,15 +730,21 @@ | |||
| AD22 = ( ASCALE*H( ILAST, ILAST ) ) / | |||
| $ ( BSCALE*T( ILAST, ILAST ) ) | |||
| ABI22 = AD22 - U12*AD21 | |||
| ABI12 = AD12 - U12*AD11 | |||
| * | |||
| T1 = HALF*( AD11+ABI22 ) | |||
| RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 ) | |||
| TEMP = REAL( T1-ABI22 )*REAL( RTDISC ) + | |||
| $ AIMAG( T1-ABI22 )*AIMAG( RTDISC ) | |||
| IF( TEMP.LE.ZERO ) THEN | |||
| SHIFT = T1 + RTDISC | |||
| ELSE | |||
| SHIFT = T1 - RTDISC | |||
| SHIFT = ABI22 | |||
| CTEMP = SQRT( ABI12 )*SQRT( AD21 ) | |||
| TEMP = ABS1( CTEMP ) | |||
| IF( CTEMP.NE.ZERO ) THEN | |||
| X = HALF*( AD11-SHIFT ) | |||
| TEMP2 = ABS1( X ) | |||
| TEMP = MAX( TEMP, ABS1( X ) ) | |||
| Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 ) | |||
| IF( TEMP2.GT.ZERO ) THEN | |||
| IF( REAL( X / TEMP2 )*REAL( Y )+ | |||
| $ AIMAG( X / TEMP2 )*AIMAG( Y ).LT.ZERO )Y = -Y | |||
| END IF | |||
| SHIFT = SHIFT - CTEMP*CLADIV( CTEMP, ( X+Y ) ) | |||
| END IF | |||
| ELSE | |||
| * | |||
| @@ -320,12 +320,13 @@ | |||
| $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP | |||
| COMPLEX*16 ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, | |||
| $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, | |||
| $ U12, X | |||
| $ U12, X, ABI12, Y | |||
| * .. | |||
| * .. External Functions .. | |||
| COMPLEX*16 ZLADIV | |||
| LOGICAL LSAME | |||
| DOUBLE PRECISION DLAMCH, ZLANHS | |||
| EXTERNAL LSAME, DLAMCH, ZLANHS | |||
| EXTERNAL ZLADIV, LSAME, DLAMCH, ZLANHS | |||
| * .. | |||
| * .. External Subroutines .. | |||
| EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL | |||
| @@ -730,15 +731,21 @@ | |||
| AD22 = ( ASCALE*H( ILAST, ILAST ) ) / | |||
| $ ( BSCALE*T( ILAST, ILAST ) ) | |||
| ABI22 = AD22 - U12*AD21 | |||
| ABI12 = AD12 - U12*AD11 | |||
| * | |||
| T1 = HALF*( AD11+ABI22 ) | |||
| RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 ) | |||
| TEMP = DBLE( T1-ABI22 )*DBLE( RTDISC ) + | |||
| $ DIMAG( T1-ABI22 )*DIMAG( RTDISC ) | |||
| IF( TEMP.LE.ZERO ) THEN | |||
| SHIFT = T1 + RTDISC | |||
| ELSE | |||
| SHIFT = T1 - RTDISC | |||
| SHIFT = ABI22 | |||
| CTEMP = SQRT( ABI12 )*SQRT( AD21 ) | |||
| TEMP = ABS1( CTEMP ) | |||
| IF( CTEMP.NE.ZERO ) THEN | |||
| X = HALF*( AD11-SHIFT ) | |||
| TEMP2 = ABS1( X ) | |||
| TEMP = MAX( TEMP, ABS1( X ) ) | |||
| Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 ) | |||
| IF( TEMP2.GT.ZERO ) THEN | |||
| IF( DBLE( X / TEMP2 )*DBLE( Y )+ | |||
| $ DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y | |||
| END IF | |||
| SHIFT = SHIFT - CTEMP*ZLADIV( CTEMP, ( X+Y ) ) | |||
| END IF | |||
| ELSE | |||
| * | |||