@@ -5,27 +5,20 @@ on: [push, pull_request] | |||
jobs: | |||
build: | |||
runs-on: ${{ matrix.os }} | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
os: [ubuntu-latest, macos-latest] | |||
fortran: [gfortran, flang] | |||
build: [cmake, make] | |||
exclude: | |||
- os: macos-latest | |||
fortran: flang | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v2 | |||
- name: Compilation cache | |||
uses: actions/cache@v2 | |||
with: | |||
path: ~/.ccache | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
key: ${{ runner.os }}-ccache-${{ github.sha }} | |||
# Restore any ccache cache entry, if none for | |||
# ${{ runner.os }}-ccache-${{ github.sha }} exists | |||
restore-keys: | | |||
${{ runner.os }}-ccache- | |||
uses: actions/checkout@v3 | |||
- name: Print system information | |||
run: | | |||
@@ -34,7 +27,7 @@ jobs: | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
sysctl -a | grep machdep.cpu | |||
else | |||
echo "$RUNNER_OS not supported" | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
@@ -43,61 +36,224 @@ jobs: | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
sudo apt-get install -y gfortran cmake ccache | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed. | |||
brew reinstall gcc | |||
brew install coreutils cmake ccache | |||
else | |||
echo "$RUNNER_OS not supported" | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB | |||
- name: gfortran build | |||
if: matrix.build == 'make' && matrix.fortran == 'gfortran' | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
# GNU make and cmake call the compilers differently. It looks like | |||
# that causes the cache to mismatch. Keep the ccache for both build | |||
# tools separate to avoid polluting each other. | |||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} | |||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} | |||
ccache-${{ runner.os }}-${{ matrix.build }} | |||
- name: Configure ccache | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
export PATH="/usr/lib/ccache:${PATH}" | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}" | |||
else | |||
echo "$RUNNER_OS not supported" | |||
exit 1 | |||
if [ "${{ matrix.build }}" = "make" ]; then | |||
# Add ccache to path | |||
if [ "$RUNNER_OS" = "Linux" ]; then | |||
echo "/usr/lib/ccache" >> $GITHUB_PATH | |||
elif [ "$RUNNER_OS" = "macOS" ]; then | |||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
fi | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 | |||
- name: flang build | |||
if: matrix.build == 'make' && matrix.fortran == 'flang' | |||
- name: Build OpenBLAS | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
export PATH="/usr/lib/ccache:${PATH}" | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
exit 0 | |||
else | |||
echo "$RUNNER_OS not supported" | |||
exit 1 | |||
if [ "${{ matrix.fortran }}" = "flang" ]; then | |||
# download and install classic flang | |||
cd /usr/ | |||
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz | |||
sudo tar xf flang-20190329-x86-70.tgz | |||
sudo rm flang-20190329-x86-70.tgz | |||
cd - | |||
fi | |||
case "${{ matrix.build }}" in | |||
"make") | |||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" | |||
;; | |||
"cmake") | |||
mkdir build && cd build | |||
cmake -DDYNAMIC_ARCH=1 \ | |||
-DNOFORTRAN=0 \ | |||
-DBUILD_WITHOUT_LAPACK=0 \ | |||
-DCMAKE_VERBOSE_MAKEFILE=ON \ | |||
-DCMAKE_BUILD_TYPE=Release \ | |||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | |||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
.. | |||
cmake --build . | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac | |||
- name: Show ccache status | |||
continue-on-error: true | |||
run: ccache -s | |||
- name: Run tests | |||
timeout-minutes: 60 | |||
run: | | |||
case "${{ matrix.build }}" in | |||
"make") | |||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' | |||
echo "::group::Tests in 'test' directory" | |||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'ctest' directory" | |||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'utest' directory" | |||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
;; | |||
"cmake") | |||
cd build && ctest | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac | |||
msys2: | |||
runs-on: windows-latest | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
msystem: [MINGW64, MINGW32, CLANG64] | |||
idx: [int32, int64] | |||
include: | |||
- msystem: MINGW64 | |||
idx: int32 | |||
target-prefix: mingw-w64-x86_64 | |||
fc-pkg: mingw-w64-x86_64-gcc-fortran | |||
- msystem: MINGW32 | |||
idx: int32 | |||
target-prefix: mingw-w64-i686 | |||
fc-pkg: mingw-w64-i686-gcc-fortran | |||
- msystem: CLANG64 | |||
idx: int32 | |||
target-prefix: mingw-w64-clang-x86_64 | |||
c-lapack-flags: -DC_LAPACK=ON | |||
- msystem: MINGW64 | |||
idx: int64 | |||
idx64-flags: -DBINARY=64 -DINTERFACE64=1 | |||
target-prefix: mingw-w64-x86_64 | |||
fc-pkg: mingw-w64-x86_64-gcc-fortran | |||
- msystem: CLANG64 | |||
idx: int64 | |||
idx64-flags: -DBINARY=64 -DINTERFACE64=1 | |||
target-prefix: mingw-w64-clang-x86_64 | |||
c-lapack-flags: -DC_LAPACK=ON | |||
exclude: | |||
- msystem: MINGW32 | |||
idx: int64 | |||
cd /usr/ | |||
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz | |||
sudo tar xf flang-20190329-x86-70.tgz | |||
sudo rm flang-20190329-x86-70.tgz | |||
cd - | |||
defaults: | |||
run: | |||
# Use MSYS2 bash as default shell | |||
shell: msys2 {0} | |||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC=flang | |||
env: | |||
CHERE_INVOKING: 1 | |||
steps: | |||
- name: Get CPU name | |||
shell: pwsh | |||
run : | | |||
Get-CIMInstance -Class Win32_Processor | Select-Object -Property Name | |||
- name: Install build dependencies | |||
uses: msys2/setup-msys2@v2 | |||
with: | |||
msystem: ${{ matrix.msystem }} | |||
update: true | |||
release: false # Use pre-installed version | |||
install: >- | |||
base-devel | |||
${{ matrix.target-prefix }}-cc | |||
${{ matrix.fc-pkg }} | |||
${{ matrix.target-prefix }}-cmake | |||
${{ matrix.target-prefix }}-ninja | |||
${{ matrix.target-prefix }}-ccache | |||
- name: CMake gfortran build | |||
if: matrix.build == 'cmake' && matrix.fortran == 'gfortran' | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
# It looks like this path needs to be hard-coded. | |||
path: C:/msys64/home/runneradmin/.ccache | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
key: ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ github.ref }}-${{ github.sha }} | |||
# Restore a matching ccache cache entry. Prefer same branch. | |||
restore-keys: | | |||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ github.ref }} | |||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }} | |||
- name: Configure ccache | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota. | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
export PATH="/usr/lib/ccache:${PATH}" | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}" | |||
else | |||
echo "$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
which ccache | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 250M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
echo $HOME | |||
cygpath -w $HOME | |||
- name: Configure OpenBLAS | |||
run: | | |||
mkdir build && cd build | |||
cmake -DBUILD_SHARED_LIBS=ON \ | |||
-DBUILD_STATIC_LIBS=ON \ | |||
-DDYNAMIC_ARCH=ON \ | |||
-DUSE_THREAD=ON \ | |||
-DNUM_THREADS=64 \ | |||
-DTARGET=CORE2 \ | |||
${{ matrix.idx64-flags }} \ | |||
${{ matrix.c-lapack-flags }} \ | |||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
.. | |||
- name: Build OpenBLAS | |||
run: cd build && cmake --build . | |||
- name: Show ccache status | |||
continue-on-error: true | |||
run: ccache -s | |||
mkdir build | |||
cd build | |||
cmake -DDYNAMIC_ARCH=1 -DNOFORTRAN=0 -DBUILD_WITHOUT_LAPACK=0 -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_BUILD_TYPE=Release .. | |||
make -j$(nproc) | |||
- name: Run tests | |||
timeout-minutes: 60 | |||
run: cd build && ctest |
@@ -25,7 +25,8 @@ matrix: | |||
# - BTYPE="BINARY=64" | |||
# | |||
# - <<: *test-ubuntu | |||
os: linux-ppc64le | |||
os: linux | |||
arch: ppc64le | |||
before_script: &common-before | |||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32" | |||
script: | |||
@@ -43,6 +44,7 @@ matrix: | |||
arch: s390x | |||
before_script: | |||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32" | |||
- sudo apt-get install --only-upgrade binutils | |||
env: | |||
# for matrix annotation only | |||
- TARGET_BOX=IBMZ_LINUX | |||
@@ -55,6 +57,7 @@ matrix: | |||
compiler: clang | |||
before_script: | |||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32" | |||
- sudo apt-get install --only-upgrade binutils | |||
env: | |||
# for matrix annotation only | |||
- TARGET_BOX=IBMZ_LINUX | |||
@@ -269,9 +272,9 @@ matrix: | |||
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1" | |||
# - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1" | |||
- &test-graviton2 | |||
- &test-neoversen1 | |||
os: linux | |||
arch: arm64-graviton2 | |||
arch: arm64 | |||
dist: focal | |||
group: edge | |||
virt: lxd | |||
@@ -17,14 +17,12 @@ include(GNUInstallDirs) | |||
include(CMakePackageConfigHelpers) | |||
if(MSVC AND NOT DEFINED NOFORTRAN) | |||
set(NOFORTRAN ON) | |||
endif() | |||
####### | |||
if(MSVC) | |||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||
endif() | |||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF) | |||
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON) | |||
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF) | |||
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF) | |||
@@ -36,6 +34,8 @@ option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several | |||
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF) | |||
option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF) | |||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") | |||
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON) | |||
else() | |||
@@ -179,7 +179,7 @@ endforeach () | |||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | |||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | |||
if (NOT NOFORTRAN AND NOT NO_LAPACK) | |||
if (NOT NO_LAPACK) | |||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake") | |||
if (NOT NO_LAPACKE) | |||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake") | |||
@@ -205,8 +205,8 @@ endif () | |||
# add objects to the openblas lib | |||
if(NOT NO_LAPACK) | |||
add_library(LAPACK OBJECT ${LA_SOURCES}) | |||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACK>") | |||
add_library(LAPACK_OVERRIDES OBJECT ${LA_SOURCES}) | |||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACK_OVERRIDES>") | |||
endif() | |||
if(NOT NO_LAPACKE) | |||
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES}) | |||
@@ -247,7 +247,7 @@ endif() | |||
if (APPLE AND DYNAMIC_ARCH AND BUILD_SHARED_LIBS) | |||
set (CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) | |||
if (NOT NOFORTRAN) | |||
if (NOT NOFORTRAN) | |||
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1) | |||
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY | |||
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ar -ru libopenblas.a && exit 0' " | |||
@@ -317,7 +317,9 @@ if (NOT NOFORTRAN) | |||
if(NOT NO_CBLAS) | |||
add_subdirectory(ctest) | |||
endif() | |||
add_subdirectory(lapack-netlib/TESTING) | |||
if (BUILD_TESTING) | |||
add_subdirectory(lapack-netlib/TESTING) | |||
endif() | |||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) | |||
add_subdirectory(cpp_thread_test) | |||
endif() | |||
@@ -394,14 +396,23 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "") | |||
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") | |||
endif() | |||
if (NOT DEFINED USE_PERL) | |||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD | |||
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def | |||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so | |||
COMMENT "renaming symbols" | |||
) | |||
else() | |||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD | |||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def | |||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def | |||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so | |||
COMMENT "renaming symbols" | |||
) | |||
endif() | |||
endif() | |||
# Install project | |||
# Install libraries | |||
@@ -25,11 +25,14 @@ ifeq ($(NO_FORTRAN), 1) | |||
define NOFORTRAN | |||
1 | |||
endef | |||
define NO_LAPACK | |||
ifneq ($(NO_LAPACK), 1) | |||
define C_LAPACK | |||
1 | |||
endef | |||
endif | |||
export NOFORTRAN | |||
export NO_LAPACK | |||
export C_LAPACK | |||
endif | |||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS)) | |||
@@ -160,7 +163,7 @@ ifeq ($(CORE), UNKNOWN) | |||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) | |||
endif | |||
ifeq ($(NOFORTRAN), 1) | |||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.) | |||
$(info OpenBLAS: Detecting fortran compiler failed. Can only compile BLAS and f2c-converted LAPACK.) | |||
endif | |||
ifeq ($(NO_STATIC), 1) | |||
ifeq ($(NO_SHARED), 1) | |||
@@ -241,19 +244,14 @@ hpl_p : | |||
fi; \ | |||
done | |||
ifeq ($(NO_LAPACK), 1) | |||
netlib : | |||
else | |||
netlib : lapack_prebuild | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
ifneq ($(NO_LAPACK), 1) | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib | |||
endif | |||
ifneq ($(NO_LAPACKE), 1) | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib | |||
endif | |||
endif | |||
ifeq ($(NO_LAPACK), 1) | |||
re_lapack : | |||
@@ -267,7 +265,7 @@ prof_lapack : lapack_prebuild | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof | |||
lapack_prebuild : | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK))) | |||
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
@@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||
endif | |||
endif | |||
ifeq ($(CORE), FT2000) | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
# Use a72 tunings because Neoverse-N1 is only available | |||
# in GCC>=9 | |||
ifeq ($(CORE), NEOVERSEN1) | |||
@@ -229,6 +236,43 @@ endif | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXX1) | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72 | |||
endif | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXX2) | |||
CCOMMON_OPT += -march=armv8.4-a+sve | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a+sve | |||
endif | |||
endif | |||
endif | |||
#ifeq (1, $(filter 1,$(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXA510) | |||
CCOMMON_OPT += -march=armv8.4-a+sve | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a+sve | |||
endif | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXA710) | |||
CCOMMON_OPT += -march=armv8.4-a+sve | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a+sve | |||
endif | |||
endif | |||
endif | |||
endif | |||
endif |
@@ -15,6 +15,12 @@ TARGET_MAKE = Makefile.conf | |||
TARGET_CONF = config.h | |||
endif | |||
ifdef USE_PERL | |||
SCRIPTSUFFIX = .pl | |||
else | |||
SCRIPTSUFFIX = | |||
endif | |||
# CPUIDEMU = ../../cpuid/table.o | |||
ifdef CPUIDEMU | |||
@@ -53,10 +59,10 @@ all: getarch_2nd | |||
./getarch_2nd 0 >> $(TARGET_MAKE) | |||
./getarch_2nd 1 >> $(TARGET_CONF) | |||
config.h : c_check f_check getarch | |||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS) | |||
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch | |||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS) | |||
ifneq ($(ONLY_CBLAS), 1) | |||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) | |||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) | |||
else | |||
#When we only build CBLAS, we set NOFORTRAN=2 | |||
echo "NOFORTRAN=2" >> $(TARGET_MAKE) | |||
@@ -71,9 +77,11 @@ endif | |||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU) | |||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU) | |||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \ | |||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - $(CC) $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \ | |||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU) | |||
getarch_2nd : getarch_2nd.c config.h dummy | |||
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy | |||
ifndef TARGET_CORE | |||
$(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c | |||
else | |||
@@ -81,3 +89,5 @@ else | |||
endif | |||
dummy: | |||
.PHONY: dummy |
@@ -352,7 +352,7 @@ OBJCONV = $(CROSS_SUFFIX)objconv | |||
# When fortran support was either not detected or actively deselected, only build BLAS. | |||
ifeq ($(NOFORTRAN), 1) | |||
NO_LAPACK = 1 | |||
C_LAPACK = 1 | |||
override FEXTRALIB = | |||
endif | |||
@@ -847,7 +847,7 @@ CCOMMON_OPT += -mabi=32 | |||
BINARY_DEFINED = 1 | |||
endif | |||
ifeq ($(CORE), $(filter $(CORE),LOONGSON3R3 LOONGSON3R4)) | |||
ifneq (, $(filter $(CORE),LOONGSON3R3 LOONGSON3R4)) | |||
CCOMMON_OPT += -march=loongson3a | |||
FCOMMON_OPT += -march=loongson3a | |||
endif | |||
@@ -1041,9 +1041,11 @@ FCOMMON_OPT += -frecursive | |||
# work around ABI problem with passing single-character arguments | |||
FCOMMON_OPT += -fno-optimize-sibling-calls | |||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | |||
ifneq ($(NOFORTRAN), 1) | |||
ifneq ($(NO_LAPACK), 1) | |||
EXTRALIB += -lgfortran | |||
endif | |||
endif | |||
ifdef NO_BINARY_MODE | |||
ifeq ($(ARCH), $(filter $(ARCH),mips64)) | |||
ifdef BINARY64 | |||
@@ -1303,6 +1305,10 @@ ifeq ($(DYNAMIC_OLDER), 1) | |||
CCOMMON_OPT += -DDYNAMIC_OLDER | |||
endif | |||
ifeq ($(C_LAPACK), 1) | |||
CCOMMON_OPT += -DC_LAPACK | |||
endif | |||
ifeq ($(NO_LAPACK), 1) | |||
CCOMMON_OPT += -DNO_LAPACK | |||
#Disable LAPACK C interface | |||
@@ -1532,7 +1538,7 @@ override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) | |||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF) | |||
#MAKEOVERRIDES = | |||
ifdef NEED_PIC | |||
ifeq ($(NEED_PIC), 1) | |||
ifeq (,$(findstring PIC,$(FFLAGS))) | |||
override FFLAGS += -fPIC | |||
endif | |||
@@ -1562,6 +1568,7 @@ endif | |||
ifdef OS_WINDOWS | |||
LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS | |||
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE | |||
endif | |||
ifeq ($(C_COMPILER), LSB) | |||
LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE | |||
@@ -1661,6 +1668,7 @@ export USE_OPENMP | |||
export CROSS | |||
export CROSS_SUFFIX | |||
export NOFORTRAN | |||
export C_LAPACK | |||
export NO_FBLAS | |||
export EXTRALIB | |||
export CEXTRALIB | |||
@@ -92,6 +92,10 @@ CORTEXA53 | |||
CORTEXA57 | |||
CORTEXA72 | |||
CORTEXA73 | |||
CORTEXA510 | |||
CORTEXA710 | |||
CORTEXX1 | |||
CORTEXX2 | |||
NEOVERSEN1 | |||
NEOVERSEV1 | |||
NEOVERSEN2 | |||
@@ -103,6 +107,9 @@ THUNDERX2T99 | |||
TSV110 | |||
THUNDERX3T110 | |||
VORTEX | |||
A64FX | |||
ARMV8SVE | |||
FT2000 | |||
9.System Z: | |||
ZARCH_GENERIC | |||
@@ -65,7 +65,7 @@ jobs: | |||
- task: CMake@1 | |||
inputs: | |||
workingDirectory: 'build' # Optional | |||
cmakeArgs: '-G "Visual Studio 16 2019" ..' | |||
cmakeArgs: '-G "Visual Studio 17 2022" ..' | |||
- task: CMake@1 | |||
inputs: | |||
cmakeArgs: '--build . --config Release' | |||
@@ -103,7 +103,7 @@ jobs: | |||
- job: Windows_flang_clang | |||
pool: | |||
vmImage: 'windows-latest' | |||
vmImage: 'windows-2022' | |||
steps: | |||
- script: | | |||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | |||
@@ -114,11 +114,31 @@ jobs: | |||
conda install --yes --quiet ninja flang | |||
mkdir build | |||
cd build | |||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||
cmake --build . --config Release | |||
ctest | |||
- job: Windows_cl_flang | |||
pool: | |||
vmImage: 'windows-2022' | |||
steps: | |||
- script: | | |||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | |||
set "LIB=C:\Miniconda\Library\lib;%LIB%" | |||
set "CPATH=C:\Miniconda\Library\include;%CPATH%" | |||
conda config --add channels conda-forge --force | |||
conda config --set auto_update_conda false | |||
conda install --yes --quiet ninja flang | |||
mkdir build | |||
cd build | |||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
cmake -G "Ninja" -DCMAKE_C_COMPILER=cl -DCMAKE_Fortran_COMPILER=flang -DC_LAPACK=1 -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||
cmake --build . --config Release | |||
ctest | |||
- job: OSX_OpenMP | |||
pool: | |||
vmImage: 'macOS-10.15' | |||
@@ -178,7 +198,7 @@ jobs: | |||
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON .. | |||
cmake --build . | |||
ctest | |||
- job: OSX_Ifort_Clang | |||
pool: | |||
vmImage: 'macOS-10.15' | |||
@@ -1,426 +1,413 @@ | |||
#!/usr/bin/env perl | |||
#use File::Basename; | |||
# use File::Temp qw(tempfile); | |||
#!/bin/sh | |||
# Checking cross compile | |||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | |||
$hostarch = `uname -m | sed -e s/i.86/x86/`; | |||
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS"); | |||
chop($hostarch); | |||
$hostarch = "x86_64" if ($hostarch eq "amd64"); | |||
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); | |||
$hostarch = "arm64" if ($hostarch eq "aarch64"); | |||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); | |||
$hostarch = "zarch" if ($hostarch eq "s390x"); | |||
#$tmpf = new File::Temp( UNLINK => 1 ); | |||
$binary = $ENV{"BINARY"}; | |||
$makefile = shift(@ARGV); | |||
$config = shift(@ARGV); | |||
$compiler_name = shift(@ARGV); | |||
$flags = join(" ", @ARGV); | |||
hostos=`uname -s | sed -e 's/\-.*//'` | |||
hostarch=`uname -m | sed -e 's/i.86/x86/'` | |||
if [ "$hostos" = "AIX" ] || [ "$hostos" = "SunOS" ]; then | |||
hostarch=`uname -p` | |||
fi | |||
case "$hostarch" in | |||
amd64) hostarch=x86_64 ;; | |||
arm*) [ "$hostarch" = "arm64" ] || hostarch='arm' ;; | |||
aarch64) hostarch=arm64 ;; | |||
powerpc*|ppc*) hostarch=power ;; | |||
s390x) hostarch=zarch ;; | |||
esac | |||
makefile="$1" | |||
config="$2" | |||
compiler_name="$3" | |||
shift 3 | |||
flags="$*" | |||
# First, we need to know the target OS and compiler name | |||
$data = `$compiler_name $flags -E ctest.c`; | |||
if ($?) { | |||
printf STDERR "C Compiler ($compiler_name) is something wrong.\n"; | |||
die 1; | |||
} | |||
$cross_suffix = ""; | |||
eval "use File::Basename"; | |||
if ($@){ | |||
warn "could not load PERL module File::Basename, emulating its functionality"; | |||
my $dirnam = substr($compiler_name, 0, rindex($compiler_name, "/")-1 ); | |||
if ($dirnam ne ".") { | |||
$cross_suffix .= $dirnam . "/"; | |||
} | |||
my $basnam = substr($compiler_name, rindex($compiler_name,"/")+1, length($compiler_name)-rindex($compiler_name,"/")-1); | |||
if ($basnam =~ /([^\s]*-)(.*)/) { | |||
$cross_suffix .= $1; | |||
} | |||
} else { | |||
if (dirname($compiler_name) ne ".") { | |||
$cross_suffix .= dirname($compiler_name) . "/"; | |||
} | |||
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) { | |||
$cross_suffix .= $1; | |||
} | |||
} | |||
$compiler = ""; | |||
$compiler = LSB if ($data =~ /COMPILER_LSB/); | |||
$compiler = CLANG if ($data =~ /COMPILER_CLANG/); | |||
$compiler = PGI if ($data =~ /COMPILER_PGI/); | |||
$compiler = PATHSCALE if ($data =~ /COMPILER_PATHSCALE/); | |||
$compiler = INTEL if ($data =~ /COMPILER_INTEL/); | |||
$compiler = OPEN64 if ($data =~ /COMPILER_OPEN64/); | |||
$compiler = SUN if ($data =~ /COMPILER_SUN/); | |||
$compiler = IBM if ($data =~ /COMPILER_IBM/); | |||
$compiler = DEC if ($data =~ /COMPILER_DEC/); | |||
$compiler = GCC if ($compiler eq ""); | |||
$os = Linux if ($data =~ /OS_LINUX/); | |||
$os = FreeBSD if ($data =~ /OS_FREEBSD/); | |||
$os = NetBSD if ($data =~ /OS_NETBSD/); | |||
$os = OpenBSD if ($data =~ /OS_OPENBSD/); | |||
$os = DragonFly if ($data =~ /OS_DRAGONFLY/); | |||
$os = Darwin if ($data =~ /OS_DARWIN/); | |||
$os = SunOS if ($data =~ /OS_SUNOS/); | |||
$os = AIX if ($data =~ /OS_AIX/); | |||
$os = osf if ($data =~ /OS_OSF/); | |||
$os = WINNT if ($data =~ /OS_WINNT/); | |||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/); | |||
$os = Interix if ($data =~ /OS_INTERIX/); | |||
$os = Android if ($data =~ /OS_ANDROID/); | |||
$os = Haiku if ($data =~ /OS_HAIKU/); | |||
$architecture = x86 if ($data =~ /ARCH_X86/); | |||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); | |||
$architecture = e2k if ($data =~ /ARCH_E2K/); | |||
$architecture = power if ($data =~ /ARCH_POWER/); | |||
$architecture = mips if ($data =~ /ARCH_MIPS/); | |||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); | |||
$architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
$architecture = arm if ($data =~ /ARCH_ARM/); | |||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
$architecture = riscv64 if ($data =~ /ARCH_RISCV64/); | |||
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
$defined = 0; | |||
if ($os eq "AIX") { | |||
$compiler_name .= " -maix32" if ($binary eq "32"); | |||
$compiler_name .= " -maix64" if ($binary eq "64"); | |||
$defined = 1; | |||
} | |||
if ($architecture eq "mips") { | |||
$compiler_name .= " -mabi=32"; | |||
$defined = 1; | |||
} | |||
if ($architecture eq "mips64") { | |||
$compiler_name .= " -mabi=n32" if ($binary eq "32"); | |||
$compiler_name .= " -mabi=64" if ($binary eq "64"); | |||
$defined = 1; | |||
} | |||
if (($architecture eq "arm") || ($architecture eq "arm64")) { | |||
$defined = 1; | |||
} | |||
if ($architecture eq "zarch") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "e2k") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "alpha") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "ia64") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { | |||
$defined = 1; | |||
$binary =32; | |||
} | |||
if ($architecture eq "riscv64") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "loongarch64") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($compiler eq "PGI") { | |||
$compiler_name .= " -tp p7" if ($binary eq "32"); | |||
$compiler_name .= " -tp p7-64" if ($binary eq "64"); | |||
$openmp = "-mp"; | |||
$defined = 1; | |||
} | |||
if ($compiler eq "IBM") { | |||
$compiler_name .= " -q32" if ($binary eq "32"); | |||
$compiler_name .= " -q64" if ($binary eq "64"); | |||
$openmp = "-qsmp=omp"; | |||
$defined = 1; | |||
} | |||
if ($compiler eq "INTEL") { | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler eq "PATHSCALE") { | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler eq "OPEN64") { | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler eq "CLANG") { | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler eq "GCC" || $compiler eq "LSB") { | |||
$openmp = "-fopenmp"; | |||
{ | |||
data=`$compiler_name $flags -E ctest.c` | |||
} || { | |||
printf '%s\n' "C Compiler ($compiler_name) is something wrong." >&2 | |||
exit 1 | |||
} | |||
if ($defined == 0) { | |||
$compiler_name .= " -m32" if ($binary eq "32"); | |||
$compiler_name .= " -m64" if ($binary eq "64"); | |||
} | |||
cross_suffix="" | |||
if [ "`dirname $compiler_name`" != '.' ]; then | |||
cross_suffix="$cross_suffix`dirname $compiler_name`/" | |||
fi | |||
bn=`basename $compiler_name` | |||
case "$bn" in | |||
*-*) cross_suffix="$cross_suffix${bn%-*}-" | |||
esac | |||
compiler="" | |||
case "$data" in | |||
*COMPILER_LSB*) compiler=LSB ;; | |||
*COMPILER_CLANG*) compiler=CLANG ;; | |||
*COMPILER_PGI*) compiler=PGI ;; | |||
*COMPILER_PATHSCALE*) compiler=PATHSCALE ;; | |||
*COMPILER_INTEL*) compiler=INTEL ;; | |||
*COMPILER_OPEN64*) compiler=OPEN64 ;; | |||
*COMPILER_SUN*) compiler=SUN ;; | |||
*COMPILER_IBM*) compiler=IBM ;; | |||
*COMPILER_DEC*) compiler=DEC ;; | |||
esac | |||
if [ -z "$compiler" ]; then | |||
compiler=GCC | |||
fi | |||
case "$data" in *OS_LINUX*) os=Linux ;; esac | |||
case "$data" in *OS_FREEBSD*) os=FreeBSD ;; esac | |||
case "$data" in *OS_NETBSD*) os=NetBSD ;; esac | |||
case "$data" in *OS_OPENBSD*) os=OpenBSD ;; esac | |||
case "$data" in *OS_DRAGONFLY*) os=DragonFly ;; esac | |||
case "$data" in *OS_DARWIN*) os=Darwin ;; esac | |||
case "$data" in *OS_SUNOS*) os=SunOS ;; esac | |||
case "$data" in *OS_AIX*) os=AIX ;; esac | |||
case "$data" in *OS_OSF*) os=osf ;; esac | |||
case "$data" in *OS_WINNT*) os=WINNT ;; esac | |||
case "$data" in *OS_CYGWIN_NT*) os=CYGWIN_NT ;; esac | |||
case "$data" in *OS_INTERIX*) os=Interix ;; esac | |||
case "$data" in *OS_ANDROID*) os=Android ;; esac | |||
case "$data" in *OS_HAIKU*) os=Haiku ;; esac | |||
case "$data" in | |||
*ARCH_X86_64*) architecture=x86_64 ;; | |||
*ARCH_X86*) architecture=x86 ;; | |||
*ARCH_E2K*) architecture=e2k ;; | |||
*ARCH_POWER*) architecture=power ;; | |||
*ARCH_MIPS64*) architecture=mips64 ;; | |||
*ARCH_MIPS*) architecture=mips ;; | |||
*ARCH_ALPHA*) architecture=alpha ;; | |||
*ARCH_SPARC*) architecture=sparc ;; | |||
*ARCH_IA64*) architecture=ia64 ;; | |||
*ARCH_ARM64*) architecture=arm64 ;; | |||
*ARCH_ARM*) architecture=arm ;; | |||
*ARCH_ZARCH*) architecture=zarch ;; | |||
*ARCH_RISCV64*) architecture=riscv64 ;; | |||
*ARCH_LOONGARCH64*) architecture=loongarch64 ;; | |||
esac | |||
defined=0 | |||
if [ "$os" = "AIX" ]; then | |||
case "$BINARY" in | |||
32) compiler_name="$compiler_name -maix32" ;; | |||
64) compiler_name="$compiler_name -maix64" ;; | |||
esac | |||
defined=1 | |||
fi | |||
case "$architecture" in | |||
mips) | |||
compiler_name="$compiler_name -mabi=32" | |||
defined=1 | |||
;; | |||
mips64) | |||
case "$BINARY" in | |||
32) compiler_name="$compiler_name -mabi=n32" ;; | |||
64) compiler_name="$compiler_name -mabi=64" ;; | |||
esac | |||
defined=1 | |||
;; | |||
arm|arm64) defined=1 ;; | |||
zarch|e2k|alpha|ia64|riscv64|loonarch64) | |||
defined=1 | |||
BINARY=64 | |||
;; | |||
x86) | |||
[ "$os" != "Darwin" ] && [ "$os" != "SunOS" ] && { | |||
defined=1 | |||
BINARY=32 | |||
} | |||
;; | |||
esac | |||
case "$compiler" in | |||
PGI) | |||
case "$BINARY" in | |||
32) compiler_name="$compiler_name -tp p7" ;; | |||
64) compiler_name="$compiler_name -tp p7-64" ;; | |||
esac | |||
openmp='-mp' | |||
defined=1 | |||
;; | |||
IBM) | |||
case "$BINARY" in | |||
32) compiler_name="$compiler_name -q32" ;; | |||
64) compiler_name="$compiler_name -q64" ;; | |||
esac | |||
openmp='-qsmp=omp' | |||
defined=1 | |||
;; | |||
INTEL) openmp='-openmp' ;; | |||
PATHSCALE|OPEN64) openmp='-mp' ;; | |||
CLANG|GCC|LSB) openmp='-fopenmp' ;; | |||
esac | |||
if [ "$defined" -eq 0 ]; then | |||
case "$BINARY" in | |||
32) compiler_name="$compiler_name -m32" ;; | |||
64) compiler_name="$compiler_name -m64" ;; | |||
esac | |||
fi | |||
# Do again | |||
$data = `$compiler_name $flags -E ctest.c`; | |||
if ($?) { | |||
printf STDERR "C Compiler ($compiler_name) is something wrong.\n"; | |||
die 1; | |||
{ | |||
data="$($compiler_name $flags -E ctest.c)" | |||
} || { | |||
printf '%s\n' "C Compiler ($compiler_name) is something wrong." >&2 | |||
exit 1 | |||
} | |||
$have_msa = 0; | |||
if (($architecture eq "mips") || ($architecture eq "mips64")) { | |||
eval "use File::Temp qw(tempfile)"; | |||
if ($@){ | |||
warn "could not load PERL module File::Temp, so could not check MSA capatibility"; | |||
} else { | |||
$tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | |||
$code = '"addvi.b $w0, $w1, 1"'; | |||
$msa_flags = "-mmsa -mfp64 -mload-store-pairs"; | |||
print $tmpf "#include <msa.h>\n\n"; | |||
print $tmpf "void main(void){ __asm__ volatile($code); }\n"; | |||
$args = "$msa_flags -o $tmpf.o $tmpf"; | |||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$have_msa = 0; | |||
} else { | |||
$have_msa = 1; | |||
} | |||
unlink("$tmpf.o"); | |||
have_msa=0 | |||
if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then | |||
tmpd="$(mktemp -d)" | |||
tmpf="$tmpd/a.c" | |||
code='"addvi.b $w0, $w1, 1"' | |||
msa_flags='-mmsa -mfp64 -mload-store-pairs' | |||
printf "#include <msa.h>\n\n" >> "$tmpf" | |||
printf "void main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf" | |||
args="$msa_flags -o $tmpf.o $tmpf" | |||
have_msa=1 | |||
{ | |||
$compiler_name $flags $args >/dev/null 2>&1 | |||
} || { | |||
have_msa=0 | |||
} | |||
} | |||
$architecture = x86 if ($data =~ /ARCH_X86/); | |||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); | |||
$architecture = e2k if ($data =~ /ARCH_E2K/); | |||
$architecture = power if ($data =~ /ARCH_POWER/); | |||
$architecture = mips if ($data =~ /ARCH_MIPS/); | |||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); | |||
$architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
$architecture = arm if ($data =~ /ARCH_ARM/); | |||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
$binformat = bin32; | |||
$binformat = bin64 if ($data =~ /BINARY_64/); | |||
$no_avx512= 0; | |||
if (($architecture eq "x86") || ($architecture eq "x86_64")) { | |||
eval "use File::Temp qw(tempfile)"; | |||
if ($@){ | |||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with AVX512"; | |||
$no_avx512 = 0; | |||
} else { | |||
# $tmpf = new File::Temp( UNLINK => 1 ); | |||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | |||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; | |||
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; | |||
$args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf"; | |||
if ($compiler eq "PGI") { | |||
$args = " -tp skylake -c -o $tmpf.o $tmpf"; | |||
} | |||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$no_avx512 = 1; | |||
} else { | |||
$no_avx512 = 0; | |||
} | |||
unlink("$tmpf.o"); | |||
rm -rf "$tmpd" | |||
fi | |||
case "$data" in | |||
*ARCH_X86_64*) architecture=x86_64 ;; | |||
*ARCH_X86*) architecture=x86 ;; | |||
*ARCH_E2K*) architecture=e2k ;; | |||
*ARCH_POWER*) architecture=power ;; | |||
*ARCH_MIPS64*) architecture=mips64 ;; | |||
*ARCH_MIPS*) architecture=mips ;; | |||
*ARCH_ALPHA*) architecture=alpha ;; | |||
*ARCH_SPARC*) architecture=sparc ;; | |||
*ARCH_IA64*) architecture=ia64 ;; | |||
*ARCH_ARM64*) architecture=arm64 ;; | |||
*ARCH_ARM*) architecture=arm ;; | |||
*ARCH_ZARCH*) architecture=zarch ;; | |||
*ARCH_LOONGARCH64*) architecture=loongarch64 ;; | |||
esac | |||
binformat='bin32' | |||
case "$data" in | |||
*BINARY_64*) binformat='bin64' ;; | |||
esac | |||
no_avx512=0 | |||
if [ "$architecture" = "x86" ] || [ "$architecture" = "x86_64" ]; then | |||
tmpd=`mktemp -d` | |||
tmpf="$tmpd/a.c" | |||
code='"vbroadcastss -4 * 4(%rsi), %zmm2"' | |||
printf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf" | |||
if [ "$compiler" = "PGI" ]; then | |||
args=" -tp skylake -c -o $tmpf.o $tmpf" | |||
else | |||
args=" -march=skylake-avx512 -c -o $tmpf.o $tmpf" | |||
fi | |||
no_avx512=0 | |||
{ | |||
$compiler_name $flags $args >/dev/null 2>&1 | |||
} || { | |||
no_avx512=1 | |||
} | |||
} | |||
$c11_atomics = 0; | |||
if ($data =~ /HAVE_C11/) { | |||
eval "use File::Temp qw(tempfile)"; | |||
if ($@){ | |||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with C11"; | |||
$c11_atomics = 0; | |||
} else { | |||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | |||
print $tmpf "#include <stdatomic.h>\nint main(void){}\n"; | |||
$args = " -c -o $tmpf.o $tmpf"; | |||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$c11_atomics = 0; | |||
} else { | |||
$c11_atomics = 1; | |||
} | |||
unlink("$tmpf.o"); | |||
rm -rf "$tmpd" | |||
fi | |||
no_rv64gv=0 | |||
if [ "$architecture" = "riscv64" ]; then | |||
tmpd=`mktemp -d` | |||
tmpf="$tmpd/a.c" | |||
code='"vsetvli zero, zero, e8, m1\n"' | |||
printf "int main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf" | |||
args=" -march=rv64gv -c -o $tmpf.o $tmpf" | |||
no_rv64gv=0 | |||
{ | |||
$compiler_name $flags $args >/dev/null 2>&1 | |||
} || { | |||
no_rv64gv=1 | |||
} | |||
rm -rf "$tmpd" | |||
fi | |||
c11_atomics=0 | |||
case "$data" in | |||
*HAVE_C11*) | |||
tmpd=`mktemp -d` | |||
tmpf="$tmpd/a.c" | |||
printf "#include <stdatomic.h>\nint main(void){}\n" >> "$tmpf" | |||
args=' -c -o $tmpf.o $tmpf' | |||
c11_atomics=1 | |||
{ | |||
$compiler_name $flags $args >/dev/null 2>&1 | |||
} || { | |||
c11_atomics=0 | |||
} | |||
rm -rf "$tmpd" | |||
;; | |||
esac | |||
oldgcc=0 | |||
no_avx2=0 | |||
if [ "$compiler" = "GCC" ]; then | |||
case "$architecture" in x86|x86_64) | |||
no_avx2=0 | |||
oldgcc=0 | |||
data=`$compiler_name -dumpversion` | |||
case "$data" in *.*.*) | |||
data="${data%.*}" | |||
esac | |||
if awk -v n1=$data -v n2=4.6 'BEGIN { exit !(n1 <= n2) }'; then | |||
no_avx2=1 | |||
oldgcc=1 | |||
fi | |||
esac | |||
fi | |||
data=`$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s` | |||
need_fu='' | |||
if echo "$data" | grep 'globl[[:space:]][_\.]'; then | |||
need_fu="${data##*globl[[:space:]]}" | |||
need_fu="${need_fu%%[!_\.]*}" | |||
fi | |||
cross=0 | |||
if [ "$architecture" != "$hostarch" ]; then | |||
cross=1 | |||
[ "$hostarch" = "x86_64" ] && [ "$architecture" = "x86" ] && cross=0 | |||
[ "$hostarch" = "mips64" ] && [ "$architecture" = "mips" ] && cross=0 | |||
fi | |||
[ "$os" != "$hostos" ] && cross=1 | |||
[ "$os" = "Android" ] && [ "$hostos" = "Linux" ] && [ -n "$TERMUX_APP_PID" ] \ | |||
&& cross=0 | |||
[ "$USE_OPENMP" != 1 ] && openmp='' | |||
linker_L="" | |||
linker_l="" | |||
linker_a="" | |||
link=`$compiler_name $flags -c ctest2.c -o ctest2.o 2>&1 && $compiler_name $flags $openmp -v ctest2.o -o ctest2 2>&1 && rm -f ctest2.o ctest2 ctest2.exe` | |||
link=`echo "$link" | sed 's/\-Y[[:space:]]P\,/\-Y/g'` | |||
flags=`echo $link | tr "'[[:space:]],\n" " "` | |||
# Strip trailing quotes | |||
old_flags="$flags" | |||
flags='' | |||
for flag in $old_flags; do | |||
f=`echo "$flag" | tr '"' ' '` | |||
flags="$flags $f" | |||
done | |||
for flag in $flags; do | |||
case "$flag" in -L*) | |||
case "$flag" in | |||
-LIST:*|-LANG:*) ;; | |||
*) linker_L="$linker_L $flag" ;; | |||
esac | |||
esac | |||
case "$flag" in -Y*) | |||
linker_L="$linker_L -Wl,$flag" ;; | |||
esac | |||
case "$flag" in --exclude-libs*) | |||
linker_L="$linker_L -Wl,$flag" | |||
flags="" | |||
;; | |||
esac | |||
case "$flag" in -l*) | |||
case "$flag" in | |||
*gfortranbegin*|*frtbegin*|*pathfstart*|*numa*|*crt[0-9]*|\ | |||
*gcc*|*user32*|*kernel32*|*advapi32*|*shell32*|*omp*|\ | |||
*[0-9]*) ;; | |||
*) linker_l="$linker_l $flag" ;; | |||
esac | |||
esac | |||
case "$flag" in *.a) linker_a="$linker_a $flag" ;; esac | |||
done | |||
[ "$makefile" = "-" ] && { | |||
[ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n" | |||
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n" | |||
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n" | |||
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n" | |||
exit 0 | |||
} | |||
if ($compiler eq "GCC" &&( ($architecture eq "x86") || ($architecture eq "x86_64"))) { | |||
$no_avx2 = 0; | |||
$oldgcc = 0; | |||
$data = `$compiler_name -dumpversion`; | |||
if ($data <= 4.6) { | |||
$no_avx2 = 1; | |||
$oldgcc = 1; | |||
} | |||
} | |||
$data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | |||
:> "$makefile" || exit 1 | |||
:> "$config" || exit 1 | |||
$data =~ /globl\s([_\.]*)(.*)/; | |||
$need_fu = $1; | |||
$cross = 0; | |||
if ($architecture ne $hostarch) { | |||
$cross = 1; | |||
$cross = 0 if (($hostarch eq "x86_64") && ($architecture eq "x86")); | |||
$cross = 0 if (($hostarch eq "mips64") && ($architecture eq "mips")); | |||
} | |||
$cross = 1 if ($os ne $hostos); | |||
$openmp = "" if $ENV{USE_OPENMP} != 1; | |||
$linker_L = ""; | |||
$linker_l = ""; | |||
$linker_a = ""; | |||
# print $data, "\n"; | |||
{ | |||
$link = `$compiler_name $flags -c ctest2.c -o ctest2.o 2>&1 && $compiler_name $flags $openmp -v ctest2.o -o ctest2 2>&1 && rm -f ctest2.o ctest2 ctest2.exe`; | |||
$link =~ s/\-Y\sP\,/\-Y/g; | |||
@flags = split(/[\s\,\n]/, $link); | |||
# remove leading and trailing quotes from each flag. | |||
@flags = map {s/^['"]|['"]$//g; $_} @flags; | |||
foreach $flags (@flags) { | |||
if ( | |||
($flags =~ /^\-L/) | |||
&& ($flags !~ /^-LIST:/) | |||
&& ($flags !~ /^-LANG:/) | |||
) { | |||
$linker_L .= $flags . " " | |||
} | |||
if ($flags =~ /^\-Y/) { | |||
$linker_L .= "-Wl,". $flags . " " | |||
} | |||
if ($flags =~ /^\--exclude-libs/) { | |||
$linker_L .= "-Wl,". $flags . " "; | |||
$flags=""; | |||
} | |||
if ( | |||
($flags =~ /^\-l/) | |||
&& ($flags !~ /gfortranbegin/) | |||
&& ($flags !~ /frtbegin/) | |||
&& ($flags !~ /pathfstart/) | |||
&& ($flags !~ /numa/) | |||
&& ($flags !~ /crt[0-9]/) | |||
&& ($flags !~ /gcc/) | |||
&& ($flags !~ /user32/) | |||
&& ($flags !~ /kernel32/) | |||
&& ($flags !~ /advapi32/) | |||
&& ($flags !~ /shell32/) | |||
&& ($flags !~ /omp/) | |||
&& ($flags !~ /[0-9]+/) | |||
) { | |||
$linker_l .= $flags . " " | |||
} | |||
$linker_a .= $flags . " " if $flags =~ /\.a$/; | |||
printf "OSNAME=%s\n" "$os" | |||
printf "ARCH=%s\n" "$architecture" | |||
printf "C_COMPILER=%s\n" "$compiler" | |||
[ $binformat != 'bin32' ] && printf "BINARY32=\n" | |||
[ $binformat != 'bin64' ] && printf "BINARY64=\n" | |||
[ "$binformat" = "bin32" ] && printf "BINARY32=1\n" | |||
[ "$binformat" = "bin64" ] && printf "BINARY64=1\n" | |||
[ -n "$need_fu" ] && printf 'FU=%s\n' "$need_fu" | |||
[ "$cross" -ne 0 ] && [ -n "$cross_suffix" ] && \ | |||
printf "CROSS_SUFFIX=%s\n" "$cross_suffix" | |||
[ "$cross" -ne 0 ] && printf "CROSS=1\n" | |||
printf "CEXTRALIB=%s %s %s\n" "$linker_L" "$linker_l" "$linker_a" | |||
[ "$have_msa" -eq 1 ] && { | |||
printf "HAVE_MSA=1\n" | |||
printf "MSA_FLAGS=%s\n" "$msa_flags" | |||
} | |||
[ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n" | |||
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n" | |||
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n" | |||
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n" | |||
} >> "$makefile" | |||
} | |||
os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ ` | |||
architecture=`echo "$architecture" | tr '[[:lower:]]' '[[:upper:]]' ` | |||
compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' ` | |||
open(MAKEFILE, "> $makefile") || die "Can't create $makefile"; | |||
open(CONFFILE, "> $config" ) || die "Can't create $config"; | |||
{ | |||
printf "#define OS_%s\t1\n" "$os" | |||
printf "#define ARCH_%s\t1\n" "$architecture" | |||
printf "#define C_%s\t1\n" "$compiler" | |||
[ "$binformat" = "bin32" ] && printf "#define __32BIT__\t1\n" | |||
[ "$binformat" = "bin64" ] && printf "#define __64BIT__\t1\n" | |||
[ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu" | |||
[ "$have_msa" -eq 1 ] && printf "#define HAVE_MSA\t1\n" | |||
[ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n" | |||
} >> "$config" | |||
# print $data, "\n"; | |||
print MAKEFILE "OSNAME=$os\n"; | |||
print MAKEFILE "ARCH=$architecture\n"; | |||
print MAKEFILE "C_COMPILER=$compiler\n"; | |||
print MAKEFILE "BINARY32=\n" if $binformat ne bin32; | |||
print MAKEFILE "BINARY64=\n" if $binformat ne bin64; | |||
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32; | |||
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64; | |||
print MAKEFILE "FU=$need_fu\n" if $need_fu ne ""; | |||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne ""; | |||
print MAKEFILE "CROSS=1\n" if $cross != 0; | |||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | |||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | |||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | |||
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | |||
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1; | |||
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1; | |||
$os =~ tr/[a-z]/[A-Z]/; | |||
$architecture =~ tr/[a-z]/[A-Z]/; | |||
$compiler =~ tr/[a-z]/[A-Z]/; | |||
print CONFFILE "#define OS_$os\t1\n"; | |||
print CONFFILE "#define ARCH_$architecture\t1\n"; | |||
print CONFFILE "#define C_$compiler\t1\n"; | |||
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; | |||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; | |||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; | |||
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; | |||
print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1; | |||
if ($os eq "LINUX") { | |||
if [ "$os" = "LINUX" ]; then | |||
# @pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`); | |||
# if ($pthread[2] ne "") { | |||
# print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n"; | |||
# } else { | |||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n"; | |||
printf "#define PTHREAD_CREATE_FUNC pthread_create\n" >> "$config" | |||
# } | |||
} else { | |||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n"; | |||
} | |||
close(MAKEFILE); | |||
close(CONFFILE); | |||
else | |||
printf "#define PTHREAD_CREATE_FUNC pthread_create\n" >> "$config" | |||
fi |
@@ -0,0 +1,451 @@ | |||
#!/usr/bin/env perl | |||
#use File::Basename; | |||
# use File::Temp qw(tempfile); | |||
# Checking cross compile | |||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | |||
$hostarch = `uname -m | sed -e s/i.86/x86/`; | |||
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS"); | |||
chop($hostarch); | |||
$hostarch = "x86_64" if ($hostarch eq "amd64"); | |||
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); | |||
$hostarch = "arm64" if ($hostarch eq "aarch64"); | |||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); | |||
$hostarch = "zarch" if ($hostarch eq "s390x"); | |||
#$tmpf = new File::Temp( UNLINK => 1 ); | |||
$binary = $ENV{"BINARY"}; | |||
$makefile = shift(@ARGV); | |||
$config = shift(@ARGV); | |||
$compiler_name = shift(@ARGV); | |||
$flags = join(" ", @ARGV); | |||
# First, we need to know the target OS and compiler name | |||
$data = `$compiler_name $flags -E ctest.c`; | |||
if ($?) { | |||
printf STDERR "C Compiler ($compiler_name) is something wrong.\n"; | |||
die 1; | |||
} | |||
$cross_suffix = ""; | |||
eval "use File::Basename"; | |||
if ($@){ | |||
warn "could not load PERL module File::Basename, emulating its functionality"; | |||
my $dirnam = substr($compiler_name, 0, rindex($compiler_name, "/")-1 ); | |||
if ($dirnam ne ".") { | |||
$cross_suffix .= $dirnam . "/"; | |||
} | |||
my $basnam = substr($compiler_name, rindex($compiler_name,"/")+1, length($compiler_name)-rindex($compiler_name,"/")-1); | |||
if ($basnam =~ /([^\s]*-)(.*)/) { | |||
$cross_suffix .= $1; | |||
} | |||
} else { | |||
if (dirname($compiler_name) ne ".") { | |||
$cross_suffix .= dirname($compiler_name) . "/"; | |||
} | |||
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) { | |||
$cross_suffix .= $1; | |||
} | |||
} | |||
$compiler = ""; | |||
$compiler = LSB if ($data =~ /COMPILER_LSB/); | |||
$compiler = CLANG if ($data =~ /COMPILER_CLANG/); | |||
$compiler = PGI if ($data =~ /COMPILER_PGI/); | |||
$compiler = PATHSCALE if ($data =~ /COMPILER_PATHSCALE/); | |||
$compiler = INTEL if ($data =~ /COMPILER_INTEL/); | |||
$compiler = OPEN64 if ($data =~ /COMPILER_OPEN64/); | |||
$compiler = SUN if ($data =~ /COMPILER_SUN/); | |||
$compiler = IBM if ($data =~ /COMPILER_IBM/); | |||
$compiler = DEC if ($data =~ /COMPILER_DEC/); | |||
$compiler = GCC if ($compiler eq ""); | |||
$os = Linux if ($data =~ /OS_LINUX/); | |||
$os = FreeBSD if ($data =~ /OS_FREEBSD/); | |||
$os = NetBSD if ($data =~ /OS_NETBSD/); | |||
$os = OpenBSD if ($data =~ /OS_OPENBSD/); | |||
$os = DragonFly if ($data =~ /OS_DRAGONFLY/); | |||
$os = Darwin if ($data =~ /OS_DARWIN/); | |||
$os = SunOS if ($data =~ /OS_SUNOS/); | |||
$os = AIX if ($data =~ /OS_AIX/); | |||
$os = osf if ($data =~ /OS_OSF/); | |||
$os = WINNT if ($data =~ /OS_WINNT/); | |||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/); | |||
$os = Interix if ($data =~ /OS_INTERIX/); | |||
$os = Android if ($data =~ /OS_ANDROID/); | |||
$os = Haiku if ($data =~ /OS_HAIKU/); | |||
$architecture = x86 if ($data =~ /ARCH_X86/); | |||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); | |||
$architecture = e2k if ($data =~ /ARCH_E2K/); | |||
$architecture = power if ($data =~ /ARCH_POWER/); | |||
$architecture = mips if ($data =~ /ARCH_MIPS/); | |||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); | |||
$architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
$architecture = arm if ($data =~ /ARCH_ARM/); | |||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
$architecture = riscv64 if ($data =~ /ARCH_RISCV64/); | |||
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
$defined = 0; | |||
if ($os eq "AIX") { | |||
$compiler_name .= " -maix32" if ($binary eq "32"); | |||
$compiler_name .= " -maix64" if ($binary eq "64"); | |||
$defined = 1; | |||
} | |||
if ($architecture eq "mips") { | |||
$compiler_name .= " -mabi=32"; | |||
$defined = 1; | |||
} | |||
if ($architecture eq "mips64") { | |||
$compiler_name .= " -mabi=n32" if ($binary eq "32"); | |||
$compiler_name .= " -mabi=64" if ($binary eq "64"); | |||
$defined = 1; | |||
} | |||
if (($architecture eq "arm") || ($architecture eq "arm64")) { | |||
$defined = 1; | |||
} | |||
if ($architecture eq "zarch") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "e2k") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "alpha") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "ia64") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { | |||
$defined = 1; | |||
$binary =32; | |||
} | |||
if ($architecture eq "riscv64") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($architecture eq "loongarch64") { | |||
$defined = 1; | |||
$binary = 64; | |||
} | |||
if ($compiler eq "PGI") { | |||
$compiler_name .= " -tp p7" if ($binary eq "32"); | |||
$compiler_name .= " -tp p7-64" if ($binary eq "64"); | |||
$openmp = "-mp"; | |||
$defined = 1; | |||
} | |||
if ($compiler eq "IBM") { | |||
$compiler_name .= " -q32" if ($binary eq "32"); | |||
$compiler_name .= " -q64" if ($binary eq "64"); | |||
$openmp = "-qsmp=omp"; | |||
$defined = 1; | |||
} | |||
if ($compiler eq "INTEL") { | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler eq "PATHSCALE") { | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler eq "OPEN64") { | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler eq "CLANG") { | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler eq "GCC" || $compiler eq "LSB") { | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($defined == 0) { | |||
$compiler_name .= " -m32" if ($binary eq "32"); | |||
$compiler_name .= " -m64" if ($binary eq "64"); | |||
} | |||
# Do again | |||
$data = `$compiler_name $flags -E ctest.c`; | |||
if ($?) { | |||
printf STDERR "C Compiler ($compiler_name) is something wrong.\n"; | |||
die 1; | |||
} | |||
$have_msa = 0; | |||
if (($architecture eq "mips") || ($architecture eq "mips64")) { | |||
eval "use File::Temp qw(tempfile)"; | |||
if ($@){ | |||
warn "could not load PERL module File::Temp, so could not check MSA capatibility"; | |||
} else { | |||
$tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | |||
$code = '"addvi.b $w0, $w1, 1"'; | |||
$msa_flags = "-mmsa -mfp64 -mload-store-pairs"; | |||
print $tmpf "#include <msa.h>\n\n"; | |||
print $tmpf "void main(void){ __asm__ volatile($code); }\n"; | |||
$args = "$msa_flags -o $tmpf.o $tmpf"; | |||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$have_msa = 0; | |||
} else { | |||
$have_msa = 1; | |||
} | |||
unlink("$tmpf.o"); | |||
} | |||
} | |||
$architecture = x86 if ($data =~ /ARCH_X86/); | |||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); | |||
$architecture = e2k if ($data =~ /ARCH_E2K/); | |||
$architecture = power if ($data =~ /ARCH_POWER/); | |||
$architecture = mips if ($data =~ /ARCH_MIPS/); | |||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); | |||
$architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
$architecture = arm if ($data =~ /ARCH_ARM/); | |||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/); | |||
$binformat = bin32; | |||
$binformat = bin64 if ($data =~ /BINARY_64/); | |||
$no_avx512= 0; | |||
if (($architecture eq "x86") || ($architecture eq "x86_64")) { | |||
eval "use File::Temp qw(tempfile)"; | |||
if ($@){ | |||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with AVX512"; | |||
$no_avx512 = 0; | |||
} else { | |||
# $tmpf = new File::Temp( UNLINK => 1 ); | |||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | |||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; | |||
print $fh "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; | |||
$args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf"; | |||
if ($compiler eq "PGI") { | |||
$args = " -tp skylake -c -o $tmpf.o $tmpf"; | |||
} | |||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$no_avx512 = 1; | |||
} else { | |||
$no_avx512 = 0; | |||
} | |||
unlink("$tmpf.o"); | |||
} | |||
} | |||
$no_rv64gv= 0; | |||
if (($architecture eq "riscv64")) { | |||
eval "use File::Temp qw(tempfile)"; | |||
if ($@){ | |||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with the RISCV vector extension"; | |||
$no_rv64gv = 0; | |||
} else { | |||
# $tmpf = new File::Temp( UNLINK => 1 ); | |||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | |||
$code = '"vsetvli zero, zero, e8, m1\n"'; | |||
print $fh "int main(void){ __asm__ volatile($code); }\n"; | |||
$args = " -march=rv64gv -c -o $tmpf.o $tmpf"; | |||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$no_rv64gv = 1; | |||
} else { | |||
$no_rv64gv = 0; | |||
} | |||
unlink("$tmpf.o"); | |||
} | |||
} | |||
$c11_atomics = 0; | |||
if ($data =~ /HAVE_C11/) { | |||
eval "use File::Temp qw(tempfile)"; | |||
if ($@){ | |||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with C11"; | |||
$c11_atomics = 0; | |||
} else { | |||
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); | |||
print $fh "#include <stdatomic.h>\nint main(void){}\n"; | |||
$args = " -c -o $tmpf.o $tmpf"; | |||
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | |||
system(@cmd) == 0; | |||
if ($? != 0) { | |||
$c11_atomics = 0; | |||
} else { | |||
$c11_atomics = 1; | |||
} | |||
unlink("$tmpf.o"); | |||
} | |||
} | |||
if ($compiler eq "GCC" &&( ($architecture eq "x86") || ($architecture eq "x86_64"))) { | |||
$no_avx2 = 0; | |||
$oldgcc = 0; | |||
$data = `$compiler_name -dumpversion`; | |||
if ($data <= 4.6) { | |||
$no_avx2 = 1; | |||
$oldgcc = 1; | |||
} | |||
} | |||
$data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | |||
$data =~ /globl\s([_\.]*)(.*)/; | |||
$need_fu = $1; | |||
$cross = 0; | |||
if ($architecture ne $hostarch) { | |||
$cross = 1; | |||
$cross = 0 if (($hostarch eq "x86_64") && ($architecture eq "x86")); | |||
$cross = 0 if (($hostarch eq "mips64") && ($architecture eq "mips")); | |||
} | |||
$cross = 1 if ($os ne $hostos); | |||
$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != "")); | |||
$openmp = "" if $ENV{USE_OPENMP} != 1; | |||
$linker_L = ""; | |||
$linker_l = ""; | |||
$linker_a = ""; | |||
{ | |||
$link = `$compiler_name $flags -c ctest2.c -o ctest2.o 2>&1 && $compiler_name $flags $openmp -v ctest2.o -o ctest2 2>&1 && rm -f ctest2.o ctest2 ctest2.exe`; | |||
$link =~ s/\-Y\sP\,/\-Y/g; | |||
@flags = split(/[\s\,\n]/, $link); | |||
# remove leading and trailing quotes from each flag. | |||
@flags = map {s/^['"]|['"]$//g; $_} @flags; | |||
foreach $flags (@flags) { | |||
if ( | |||
($flags =~ /^\-L/) | |||
&& ($flags !~ /^-LIST:/) | |||
&& ($flags !~ /^-LANG:/) | |||
) { | |||
$linker_L .= $flags . " " | |||
} | |||
if ($flags =~ /^\-Y/) { | |||
$linker_L .= "-Wl,". $flags . " " | |||
} | |||
if ($flags =~ /^\--exclude-libs/) { | |||
$linker_L .= "-Wl,". $flags . " "; | |||
$flags=""; | |||
} | |||
if ( | |||
($flags =~ /^\-l/) | |||
&& ($flags !~ /gfortranbegin/) | |||
&& ($flags !~ /frtbegin/) | |||
&& ($flags !~ /pathfstart/) | |||
&& ($flags !~ /numa/) | |||
&& ($flags !~ /crt[0-9]/) | |||
&& ($flags !~ /gcc/) | |||
&& ($flags !~ /user32/) | |||
&& ($flags !~ /kernel32/) | |||
&& ($flags !~ /advapi32/) | |||
&& ($flags !~ /shell32/) | |||
&& ($flags !~ /omp/) | |||
&& ($flags !~ /[0-9]+/) | |||
) { | |||
$linker_l .= $flags . " " | |||
} | |||
$linker_a .= $flags . " " if $flags =~ /\.a$/; | |||
} | |||
} | |||
open(MAKEFILE, "> $makefile") || die "Can't create $makefile"; | |||
open(CONFFILE, "> $config" ) || die "Can't create $config"; | |||
# print $data, "\n"; | |||
print MAKEFILE "OSNAME=$os\n"; | |||
print MAKEFILE "ARCH=$architecture\n"; | |||
print MAKEFILE "C_COMPILER=$compiler\n"; | |||
print MAKEFILE "BINARY32=\n" if $binformat ne bin32; | |||
print MAKEFILE "BINARY64=\n" if $binformat ne bin64; | |||
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32; | |||
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64; | |||
print MAKEFILE "FU=$need_fu\n" if $need_fu ne ""; | |||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne ""; | |||
print MAKEFILE "CROSS=1\n" if $cross != 0; | |||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | |||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | |||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | |||
print MAKEFILE "NO_RV64GV=1\n" if $no_rv64gv eq 1; | |||
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | |||
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1; | |||
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1; | |||
$os =~ tr/[a-z]/[A-Z]/; | |||
$architecture =~ tr/[a-z]/[A-Z]/; | |||
$compiler =~ tr/[a-z]/[A-Z]/; | |||
print CONFFILE "#define OS_$os\t1\n"; | |||
print CONFFILE "#define ARCH_$architecture\t1\n"; | |||
print CONFFILE "#define C_$compiler\t1\n"; | |||
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; | |||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; | |||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; | |||
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; | |||
print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1; | |||
if ($os eq "LINUX") { | |||
# @pthread = split(/\s+/, `nm /lib/libpthread.so* | grep _pthread_create`); | |||
# if ($pthread[2] ne "") { | |||
# print CONFFILE "#define PTHREAD_CREATE_FUNC $pthread[2]\n"; | |||
# } else { | |||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n"; | |||
# } | |||
} else { | |||
print CONFFILE "#define PTHREAD_CREATE_FUNC pthread_create\n"; | |||
} | |||
close(MAKEFILE); | |||
close(CONFFILE); |
@@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE) | |||
endif () | |||
endif () | |||
if (${CORE} STREQUAL CORTEXA510) | |||
if (NOT DYNAMIC_ARCH) | |||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||
endif () | |||
endif () | |||
if (${CORE} STREQUAL CORTEXA710) | |||
if (NOT DYNAMIC_ARCH) | |||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||
endif () | |||
endif () | |||
if (${CORE} STREQUAL CORTEXX1) | |||
if (NOT DYNAMIC_ARCH) | |||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a") | |||
endif () | |||
endif () | |||
if (${CORE} STREQUAL CORTEXX2) | |||
if (NOT DYNAMIC_ARCH) | |||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve") | |||
endif () | |||
endif () | |||
if (${CORE} STREQUAL POWER10) | |||
if (NOT DYNAMIC_ARCH) | |||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||
@@ -50,6 +50,15 @@ else() | |||
set(ONLY_CBLAS_IN ${ONLY_CBLAS}) | |||
endif() | |||
if (NOT DEFINED USE_PERL) | |||
add_custom_command( | |||
OUTPUT ${PROJECT_BINARY_DIR}/openblas.def | |||
#TARGET ${OpenBLAS_LIBNAME} PRE_LINK | |||
COMMAND "${PROJECT_SOURCE_DIR}/exports/gensymbol" | |||
ARGS "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" | |||
COMMENT "Create openblas.def file" | |||
VERBATIM) | |||
else | |||
add_custom_command( | |||
OUTPUT ${PROJECT_BINARY_DIR}/openblas.def | |||
#TARGET ${OpenBLAS_LIBNAME} PRE_LINK | |||
@@ -57,5 +66,5 @@ add_custom_command( | |||
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" | |||
COMMENT "Create openblas.def file" | |||
VERBATIM) | |||
endif() | |||
endif() |
@@ -25,11 +25,19 @@ check_language(Fortran) | |||
if(CMAKE_Fortran_COMPILER) | |||
enable_language(Fortran) | |||
else() | |||
set (NOFORTRAN 1) | |||
if (NOT NO_LAPACK) | |||
message(STATUS "No Fortran compiler found, can build only BLAS but not LAPACK") | |||
if (NOT XXXXX) | |||
message(STATUS "No Fortran compiler found, can build only BLAS and f2c-converted LAPACK") | |||
set(C_LAPACK 1) | |||
if (INTERFACE64) | |||
set (CCOMMON_OPT "${CCOMMON_OPT} -DLAPACK_ILP64") | |||
endif () | |||
set(TIMER "NONE") | |||
else () | |||
message(STATUS "No Fortran compiler found, can build only BLAS") | |||
endif() | |||
endif() | |||
set (NOFORTRAN 1) | |||
set (NO_LAPACK 1) | |||
endif() | |||
if (NOT ONLY_CBLAS) | |||
@@ -67,7 +67,15 @@ if (${F_COMPILER} STREQUAL "GFORTRAN") | |||
if (BINARY64) | |||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||
if (INTERFACE64) | |||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||
if (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") | |||
if (WIN32) | |||
set(FCOMMON_OPT "${FCOMMON_OPT} /integer-size:64") | |||
else () | |||
set(FCOMMON_OPT "${FCOMMON_OPT} -integer-size 64") | |||
endif () | |||
else () | |||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||
endif () | |||
endif () | |||
else () | |||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||
@@ -1,5 +1,6 @@ | |||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. | |||
if (NOT C_LAPACK) | |||
message (STATUS "fortran lapack") | |||
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F | |||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f dlaset.f | |||
../INSTALL/ilaver.f xerbla_array.f | |||
@@ -488,6 +489,499 @@ if(BUILD_COMPLEX16) | |||
message(STATUS "Building Double Precision Complex") | |||
endif() | |||
else () | |||
message (STATUS "c lapack") | |||
set(ALLAUX ilaenv.c ilaenv2stage.c ieeeck.c lsamen.c iparmq.c iparam2stage.c | |||
ilaprec.c ilatrans.c ilauplo.c iladiag.c chla_transtype.c dlaset.c | |||
../INSTALL/ilaver.c xerbla_array.c | |||
../INSTALL/slamch.c) | |||
set(SCLAUX | |||
scombssq.c sbdsvdx.c sstevx.c sstein.c | |||
sbdsdc.c | |||
sbdsqr.c sdisna.c slabad.c slacpy.c sladiv.c slae2.c slaebz.c | |||
slaed0.c slaed1.c slaed2.c slaed3.c slaed4.c slaed5.c slaed6.c | |||
slaed7.c slaed8.c slaed9.c slaeda.c slaev2.c slagtf.c | |||
slagts.c slamrg.c slanst.c | |||
slapy2.c slapy3.c slarnv.c | |||
slarra.c slarrb.c slarrc.c slarrd.c slarre.c slarrf.c slarrj.c | |||
slarrk.c slarrr.c slaneg.c | |||
slartg.c slaruv.c slas2.c slascl.c | |||
slasd0.c slasd1.c slasd2.c slasd3.c slasd4.c slasd5.c slasd6.c | |||
slasd7.c slasd8.c slasda.c slasdq.c slasdt.c | |||
slaset.c slasq1.c slasq2.c slasq3.c slasq4.c slasq5.c slasq6.c | |||
slasr.c slasrt.c slassq.c slasv2.c spttrf.c sstebz.c sstedc.c | |||
ssteqr.c ssterf.c slaisnan.c sisnan.c | |||
slartgp.c slartgs.c | |||
../INSTALL/second_${TIMER}.c) | |||
set(DZLAUX | |||
dbdsdc.c | |||
dbdsvdx.c dstevx.c dstein.c | |||
dbdsqr.c ddisna.c dlabad.c dlacpy.c dladiv.c dlae2.c dlaebz.c | |||
dlaed0.c dlaed1.c dlaed2.c dlaed3.c dlaed4.c dlaed5.c dlaed6.c | |||
dlaed7.c dlaed8.c dlaed9.c dlaeda.c dlaev2.c dlagtf.c | |||
dlagts.c dlamrg.c dlanst.c | |||
dlapy2.c dlapy3.c dlarnv.c | |||
dlarra.c dlarrb.c dlarrc.c dlarrd.c dlarre.c dlarrf.c dlarrj.c | |||
dlarrk.c dlarrr.c dlaneg.c | |||
dlartg.c dlaruv.c dlas2.c dlascl.c | |||
dlasd0.c dlasd1.c dlasd2.c dlasd3.c dlasd4.c dlasd5.c dlasd6.c | |||
dlasd7.c dlasd8.c dlasda.c dlasdq.c dlasdt.c | |||
dlasq1.c dlasq2.c dlasq3.c dlasq4.c dlasq5.c dlasq6.c | |||
dlasr.c dlasrt.c dlassq.c dlasv2.c dpttrf.c dstebz.c dstedc.c | |||
dsteqr.c dsterf.c dlaisnan.c disnan.c | |||
dlartgp.c dlartgs.c | |||
../INSTALL/dlamch.c ../INSTALL/dsecnd_${TIMER}.c) | |||
set(SLASRC | |||
sgbbrd.c sgbcon.c sgbequ.c sgbrfs.c sgbsv.c | |||
sgbsvx.c sgbtf2.c sgbtrf.c sgbtrs.c sgebak.c sgebal.c sgebd2.c | |||
sgebrd.c sgecon.c sgeequ.c sgees.c sgeesx.c sgeev.c sgeevx.c | |||
sgehd2.c sgehrd.c sgelq2.c sgelqf.c | |||
sgels.c sgelsd.c sgelss.c sgelsy.c sgeql2.c sgeqlf.c | |||
sgeqp3.c sgeqr2.c sgeqr2p.c sgeqrf.c sgeqrfp.c sgerfs.c sgerq2.c sgerqf.c | |||
sgesc2.c sgesdd.c sgesvd.c sgesvdx.c sgesvx.c sgetc2.c | |||
sgetrf2.c sgetri.c | |||
sggbak.c sggbal.c | |||
sgges.c sgges3.c sggesx.c sggev.c sggev3.c sggevx.c | |||
sggglm.c sgghrd.c sgghd3.c sgglse.c sggqrf.c | |||
sggrqf.c sggsvd3.c sggsvp3.c sgtcon.c sgtrfs.c sgtsv.c | |||
sgtsvx.c sgttrf.c sgttrs.c sgtts2.c shgeqz.c | |||
shsein.c shseqr.c slabrd.c slacon.c slacn2.c | |||
slaein.c slaexc.c slag2.c slags2.c slagtm.c slagv2.c slahqr.c | |||
slahr2.c slaic1.c slaln2.c slals0.c slalsa.c slalsd.c | |||
slangb.c slange.c slangt.c slanhs.c slansb.c slansp.c | |||
slansy.c slantb.c slantp.c slantr.c slanv2.c | |||
slapll.c slapmt.c | |||
slaqgb.c slaqge.c slaqp2.c slaqps.c slaqsb.c slaqsp.c slaqsy.c | |||
slaqr0.c slaqr1.c slaqr2.c slaqr3.c slaqr4.c slaqr5.c | |||
slaqtr.c slar1v.c slar2v.c ilaslr.c ilaslc.c | |||
slarf.c slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarfx.c slarfy.c slargv.c | |||
slarrv.c slartv.c | |||
slarz.c slarzb.c slarzt.c slasy2.c | |||
slasyf.c slasyf_rook.c slasyf_rk.c slasyf_aa.c | |||
slatbs.c slatdf.c slatps.c slatrd.c slatrs.c slatrz.c | |||
sopgtr.c sopmtr.c sorg2l.c sorg2r.c | |||
sorgbr.c sorghr.c sorgl2.c sorglq.c sorgql.c sorgqr.c sorgr2.c | |||
sorgrq.c sorgtr.c sorm2l.c sorm2r.c sorm22.c | |||
sormbr.c sormhr.c sorml2.c sormlq.c sormql.c sormqr.c sormr2.c | |||
sormr3.c sormrq.c sormrz.c sormtr.c spbcon.c spbequ.c spbrfs.c | |||
spbstf.c spbsv.c spbsvx.c | |||
spbtf2.c spbtrf.c spbtrs.c spocon.c spoequ.c sporfs.c sposv.c | |||
sposvx.c spotrf2.c spotri.c spstrf.c spstf2.c | |||
sppcon.c sppequ.c | |||
spprfs.c sppsv.c sppsvx.c spptrf.c spptri.c spptrs.c sptcon.c | |||
spteqr.c sptrfs.c sptsv.c sptsvx.c spttrs.c sptts2.c srscl.c | |||
ssbev.c ssbevd.c ssbevx.c ssbgst.c ssbgv.c ssbgvd.c ssbgvx.c | |||
ssbtrd.c sspcon.c sspev.c sspevd.c sspevx.c sspgst.c | |||
sspgv.c sspgvd.c sspgvx.c ssprfs.c sspsv.c sspsvx.c ssptrd.c | |||
ssptrf.c ssptri.c ssptrs.c sstegr.c sstev.c sstevd.c sstevr.c | |||
ssycon.c ssyev.c ssyevd.c ssyevr.c ssyevx.c ssygs2.c | |||
ssygst.c ssygv.c ssygvd.c ssygvx.c ssyrfs.c ssysv.c ssysvx.c | |||
ssytd2.c ssytf2.c ssytrd.c ssytrf.c ssytri.c ssytri2.c ssytri2x.c | |||
ssyswapr.c ssytrs.c ssytrs2.c | |||
ssyconv.c ssyconvf.c ssyconvf_rook.c | |||
ssysv_aa.c ssysv_aa_2stage.c ssytrf_aa.c ssytrf_aa_2stage.c ssytrs_aa.c ssytrs_aa_2stage.c | |||
ssytf2_rook.c ssytrf_rook.c ssytrs_rook.c | |||
ssytri_rook.c ssycon_rook.c ssysv_rook.c | |||
ssytf2_rk.c ssytrf_rk.c ssytrs_3.c | |||
ssytri_3.c ssytri_3x.c ssycon_3.c ssysv_rk.c | |||
ssysv_aa.c ssytrf_aa.c ssytrs_aa.c | |||
stbcon.c | |||
stbrfs.c stbtrs.c stgevc.c stgex2.c stgexc.c stgsen.c | |||
stgsja.c stgsna.c stgsy2.c stgsyl.c stpcon.c stprfs.c stptri.c | |||
stptrs.c | |||
strcon.c strevc.c strevc3.c strexc.c strrfs.c strsen.c strsna.c strsyl.c | |||
strtrs.c stzrzf.c sstemr.c | |||
slansf.c spftrf.c spftri.c spftrs.c ssfrk.c stfsm.c stftri.c stfttp.c | |||
stfttr.c stpttf.c stpttr.c strttf.c strttp.c | |||
sgejsv.c sgesvj.c sgsvj0.c sgsvj1.c | |||
sgeequb.c ssyequb.c spoequb.c sgbequb.c | |||
sbbcsd.c slapmr.c sorbdb.c sorbdb1.c sorbdb2.c sorbdb3.c sorbdb4.c | |||
sorbdb5.c sorbdb6.c sorcsd.c sorcsd2by1.c | |||
sgeqrt.c sgeqrt2.c sgeqrt3.c sgemqrt.c | |||
stpqrt.c stpqrt2.c stpmqrt.c stprfb.c | |||
sgelqt.c sgelqt3.c sgemlqt.c | |||
sgetsls.c sgetsqrhrt.c sgeqr.c slatsqr.c slamtsqr.c sgemqr.c | |||
sgelq.c slaswlq.c slamswlq.c sgemlq.c | |||
stplqt.c stplqt2.c stpmlqt.c | |||
ssytrd_2stage.c ssytrd_sy2sb.c ssytrd_sb2st.c ssb2st_kernels.c | |||
ssyevd_2stage.c ssyev_2stage.c ssyevx_2stage.c ssyevr_2stage.c | |||
ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c | |||
sgesvdq.c slaorhr_col_getrfnp.c | |||
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c ) | |||
set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c | |||
sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c | |||
sla_syrfsx_extended.c sla_syamv.c sla_syrcond.c sla_syrpvgrw.c | |||
sposvxx.c sporfsx.c sla_porfsx_extended.c sla_porcond.c | |||
sla_porpvgrw.c sgbsvxx.c sgbrfsx.c sla_gbrfsx_extended.c | |||
sla_gbamv.c sla_gbrcond.c sla_gbrpvgrw.c sla_lin_berr.c slarscl2.c | |||
slascl2.c sla_wwaddw.c) | |||
set(CLASRC | |||
cbdsqr.c cgbbrd.c cgbcon.c cgbequ.c cgbrfs.c cgbsv.c cgbsvx.c | |||
cgbtf2.c cgbtrf.c cgbtrs.c cgebak.c cgebal.c cgebd2.c cgebrd.c | |||
cgecon.c cgeequ.c cgees.c cgeesx.c cgeev.c cgeevx.c | |||
cgehd2.c cgehrd.c cgelq2.c cgelqf.c | |||
cgels.c cgelsd.c cgelss.c cgelsy.c cgeql2.c cgeqlf.c cgeqp3.c | |||
cgeqr2.c cgeqr2p.c cgeqrf.c cgeqrfp.c cgerfs.c cgerq2.c cgerqf.c | |||
cgesc2.c cgesdd.c cgesvd.c cgesvdx.c | |||
cgesvj.c cgejsv.c cgsvj0.c cgsvj1.c | |||
cgesvx.c cgetc2.c cgetrf2.c | |||
cgetri.c | |||
cggbak.c cggbal.c | |||
cgges.c cgges3.c cggesx.c cggev.c cggev3.c cggevx.c | |||
cggglm.c cgghrd.c cgghd3.c cgglse.c cggqrf.c cggrqf.c | |||
cggsvd3.c cggsvp3.c | |||
cgtcon.c cgtrfs.c cgtsv.c cgtsvx.c cgttrf.c cgttrs.c cgtts2.c chbev.c | |||
chbevd.c chbevx.c chbgst.c chbgv.c chbgvd.c chbgvx.c chbtrd.c | |||
checon.c cheev.c cheevd.c cheevr.c cheevx.c chegs2.c chegst.c | |||
chegv.c chegvd.c chegvx.c cherfs.c chesv.c chesvx.c chetd2.c | |||
chetf2.c chetrd.c | |||
chetrf.c chetri.c chetri2.c chetri2x.c cheswapr.c | |||
chetrs.c chetrs2.c | |||
chetf2_rook.c chetrf_rook.c chetri_rook.c | |||
chetrs_rook.c checon_rook.c chesv_rook.c | |||
chetf2_rk.c chetrf_rk.c chetri_3.c chetri_3x.c | |||
chetrs_3.c checon_3.c chesv_rk.c | |||
chesv_aa.c chesv_aa_2stage.c chetrf_aa.c chetrf_aa_2stage.c chetrs_aa.c chetrs_aa_2stage.c | |||
chgeqz.c chpcon.c chpev.c chpevd.c | |||
chpevx.c chpgst.c chpgv.c chpgvd.c chpgvx.c chprfs.c chpsv.c | |||
chpsvx.c | |||
chptrd.c chptrf.c chptri.c chptrs.c chsein.c chseqr.c clabrd.c | |||
clacgv.c clacon.c clacn2.c clacp2.c clacpy.c clacrm.c clacrt.c cladiv.c | |||
claed0.c claed7.c claed8.c | |||
claein.c claesy.c claev2.c clags2.c clagtm.c | |||
clahef.c clahef_rook.c clahef_rk.c clahef_aa.c clahqr.c | |||
clahr2.c claic1.c clals0.c clalsa.c clalsd.c clangb.c clange.c clangt.c | |||
clanhb.c clanhe.c | |||
clanhp.c clanhs.c clanht.c clansb.c clansp.c clansy.c clantb.c | |||
clantp.c clantr.c clapll.c clapmt.c clarcm.c claqgb.c claqge.c | |||
claqhb.c claqhe.c claqhp.c claqp2.c claqps.c claqsb.c | |||
claqr0.c claqr1.c claqr2.c claqr3.c claqr4.c claqr5.c | |||
claqsp.c claqsy.c clar1v.c clar2v.c ilaclr.c ilaclc.c | |||
clarf.c clarfb.c clarfb_gett.c clarfg.c clarfgp.c clarft.c | |||
clarfx.c clarfy.c clargv.c clarnv.c clarrv.c clartg.c clartv.c | |||
clarz.c clarzb.c clarzt.c clascl.c claset.c clasr.c classq.c | |||
clasyf.c clasyf_rook.c clasyf_rk.c clasyf_aa.c | |||
clatbs.c clatdf.c clatps.c clatrd.c clatrs.c clatrz.c | |||
cpbcon.c cpbequ.c cpbrfs.c cpbstf.c cpbsv.c | |||
cpbsvx.c cpbtf2.c cpbtrf.c cpbtrs.c cpocon.c cpoequ.c cporfs.c | |||
cposv.c cposvx.c cpotrf2.c cpotri.c cpstrf.c cpstf2.c | |||
cppcon.c cppequ.c cpprfs.c cppsv.c cppsvx.c cpptrf.c cpptri.c cpptrs.c | |||
cptcon.c cpteqr.c cptrfs.c cptsv.c cptsvx.c cpttrf.c cpttrs.c cptts2.c | |||
crot.c cspcon.c csprfs.c cspsv.c | |||
cspsvx.c csptrf.c csptri.c csptrs.c csrscl.c cstedc.c | |||
cstegr.c cstein.c csteqr.c csycon.c | |||
csyrfs.c csysv.c csysvx.c csytf2.c csytrf.c csytri.c | |||
csytri2.c csytri2x.c csyswapr.c | |||
csytrs.c csytrs2.c | |||
csyconv.c csyconvf.c csyconvf_rook.c | |||
csytf2_rook.c csytrf_rook.c csytrs_rook.c | |||
csytri_rook.c csycon_rook.c csysv_rook.c | |||
csytf2_rk.c csytrf_rk.c csytrf_aa.c csytrf_aa_2stage.c csytrs_3.c csytrs_aa.c csytrs_aa_2stage.c | |||
csytri_3.c csytri_3x.c csycon_3.c csysv_rk.c csysv_aa.c csysv_aa_2stage.c | |||
ctbcon.c ctbrfs.c ctbtrs.c ctgevc.c ctgex2.c | |||
ctgexc.c ctgsen.c ctgsja.c ctgsna.c ctgsy2.c ctgsyl.c ctpcon.c | |||
ctprfs.c ctptri.c | |||
ctptrs.c ctrcon.c ctrevc.c ctrevc3.c ctrexc.c ctrrfs.c ctrsen.c ctrsna.c | |||
ctrsyl.c ctrtrs.c ctzrzf.c cung2l.c cung2r.c | |||
cungbr.c cunghr.c cungl2.c cunglq.c cungql.c cungqr.c cungr2.c | |||
cungrq.c cungtr.c cunm2l.c cunm2r.c cunmbr.c cunmhr.c cunml2.c cunm22.c | |||
cunmlq.c cunmql.c cunmqr.c cunmr2.c cunmr3.c cunmrq.c cunmrz.c | |||
cunmtr.c cupgtr.c cupmtr.c icmax1.c scsum1.c cstemr.c | |||
chfrk.c ctfttp.c clanhf.c cpftrf.c cpftri.c cpftrs.c ctfsm.c ctftri.c | |||
ctfttr.c ctpttf.c ctpttr.c ctrttf.c ctrttp.c | |||
cgeequb.c cgbequb.c csyequb.c cpoequb.c cheequb.c | |||
cbbcsd.c clapmr.c cunbdb.c cunbdb1.c cunbdb2.c cunbdb3.c cunbdb4.c | |||
cunbdb5.c cunbdb6.c cuncsd.c cuncsd2by1.c | |||
cgeqrt.c cgeqrt2.c cgeqrt3.c cgemqrt.c | |||
ctpqrt.c ctpqrt2.c ctpmqrt.c ctprfb.c | |||
cgelqt.c cgelqt3.c cgemlqt.c | |||
cgetsls.c cgetsqrhrt.c cgeqr.c clatsqr.c clamtsqr.c cgemqr.c | |||
cgelq.c claswlq.c clamswlq.c cgemlq.c | |||
ctplqt.c ctplqt2.c ctpmlqt.c | |||
chetrd_2stage.c chetrd_he2hb.c chetrd_hb2st.c chb2st_kernels.c | |||
cheevd_2stage.c cheev_2stage.c cheevx_2stage.c cheevr_2stage.c | |||
chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c | |||
cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c | |||
cungtsqr.c cungtsqr_row.c cunhr_col.c ) | |||
set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c | |||
cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c | |||
csysvxx.c csyrfsx.c cla_syrfsx_extended.c cla_syamv.c | |||
cla_syrcond_c.c cla_syrcond_x.c cla_syrpvgrw.c | |||
cposvxx.c cporfsx.c cla_porfsx_extended.c | |||
cla_porcond_c.c cla_porcond_x.c cla_porpvgrw.c | |||
cgbsvxx.c cgbrfsx.c cla_gbrfsx_extended.c cla_gbamv.c | |||
cla_gbrcond_c.c cla_gbrcond_x.c cla_gbrpvgrw.c | |||
chesvxx.c cherfsx.c cla_herfsx_extended.c cla_heamv.c | |||
cla_hercond_c.c cla_hercond_x.c cla_herpvgrw.c | |||
cla_lin_berr.c clarscl2.c clascl2.c cla_wwaddw.c) | |||
set(DLASRC | |||
dgbbrd.c dgbcon.c dgbequ.c dgbrfs.c dgbsv.c | |||
dgbsvx.c dgbtf2.c dgbtrf.c dgbtrs.c dgebak.c dgebal.c dgebd2.c | |||
dgebrd.c dgecon.c dgeequ.c dgees.c dgeesx.c dgeev.c dgeevx.c | |||
dgehd2.c dgehrd.c dgelq2.c dgelqf.c | |||
dgels.c dgelsd.c dgelss.c dgelsy.c dgeql2.c dgeqlf.c | |||
dgeqp3.c dgeqr2.c dgeqr2p.c dgeqrf.c dgeqrfp.c dgerfs.c dgerq2.c dgerqf.c | |||
dgesc2.c dgesdd.c dgesvd.c dgesvdx.c dgesvx.c dgetc2.c | |||
dgetrf2.c dgetri.c | |||
dggbak.c dggbal.c | |||
dgges.c dgges3.c dggesx.c dggev.c dggev3.c dggevx.c | |||
dggglm.c dgghrd.c dgghd3.c dgglse.c dggqrf.c | |||
dggrqf.c dggsvd3.c dggsvp3.c dgtcon.c dgtrfs.c dgtsv.c | |||
dgtsvx.c dgttrf.c dgttrs.c dgtts2.c dhgeqz.c | |||
dhsein.c dhseqr.c dlabrd.c dlacon.c dlacn2.c | |||
dlaein.c dlaexc.c dlag2.c dlags2.c dlagtm.c dlagv2.c dlahqr.c | |||
dlahr2.c dlaic1.c dlaln2.c dlals0.c dlalsa.c dlalsd.c | |||
dlangb.c dlange.c dlangt.c dlanhs.c dlansb.c dlansp.c | |||
dlansy.c dlantb.c dlantp.c dlantr.c dlanv2.c | |||
dlapll.c dlapmt.c | |||
dlaqgb.c dlaqge.c dlaqp2.c dlaqps.c dlaqsb.c dlaqsp.c dlaqsy.c | |||
dlaqr0.c dlaqr1.c dlaqr2.c dlaqr3.c dlaqr4.c dlaqr5.c | |||
dlaqtr.c dlar1v.c dlar2v.c iladlr.c iladlc.c | |||
dlarf.c dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarfx.c dlarfy.c | |||
dlargv.c dlarrv.c dlartv.c | |||
dlarz.c dlarzb.c dlarzt.c dlasy2.c | |||
dlasyf.c dlasyf_rook.c dlasyf_rk.c dlasyf_aa.c | |||
dlatbs.c dlatdf.c dlatps.c dlatrd.c dlatrs.c dlatrz.c | |||
dopgtr.c dopmtr.c dorg2l.c dorg2r.c | |||
dorgbr.c dorghr.c dorgl2.c dorglq.c dorgql.c dorgqr.c dorgr2.c | |||
dorgrq.c dorgtr.c dorm2l.c dorm2r.c dorm22.c | |||
dormbr.c dormhr.c dorml2.c dormlq.c dormql.c dormqr.c dormr2.c | |||
dormr3.c dormrq.c dormrz.c dormtr.c dpbcon.c dpbequ.c dpbrfs.c | |||
dpbstf.c dpbsv.c dpbsvx.c | |||
dpbtf2.c dpbtrf.c dpbtrs.c dpocon.c dpoequ.c dporfs.c dposv.c | |||
dposvx.c dpotrf2.c dpotri.c dpotrs.c dpstrf.c dpstf2.c | |||
dppcon.c dppequ.c | |||
dpprfs.c dppsv.c dppsvx.c dpptrf.c dpptri.c dpptrs.c dptcon.c | |||
dpteqr.c dptrfs.c dptsv.c dptsvx.c dpttrs.c dptts2.c drscl.c | |||
dsbev.c dsbevd.c dsbevx.c dsbgst.c dsbgv.c dsbgvd.c dsbgvx.c | |||
dsbtrd.c dspcon.c dspev.c dspevd.c dspevx.c dspgst.c | |||
dspgv.c dspgvd.c dspgvx.c dsprfs.c dspsv.c dspsvx.c dsptrd.c | |||
dsptrf.c dsptri.c dsptrs.c dstegr.c dstev.c dstevd.c dstevr.c | |||
dsycon.c dsyev.c dsyevd.c dsyevr.c | |||
dsyevx.c dsygs2.c dsygst.c dsygv.c dsygvd.c dsygvx.c dsyrfs.c | |||
dsysv.c dsysvx.c | |||
dsytd2.c dsytf2.c dsytrd.c dsytrf.c dsytri.c dsytrs.c dsytrs2.c | |||
dsytri2.c dsytri2x.c dsyswapr.c | |||
dsyconv.c dsyconvf.c dsyconvf_rook.c | |||
dsytf2_rook.c dsytrf_rook.c dsytrs_rook.c | |||
dsytri_rook.c dsycon_rook.c dsysv_rook.c | |||
dsytf2_rk.c dsytrf_rk.c dsytrs_3.c | |||
dsytri_3.c dsytri_3x.c dsycon_3.c dsysv_rk.c | |||
dsysv_aa.c dsysv_aa_2stage.c dsytrf_aa.c dsytrf_aa_2stage.c dsytrs_aa.c dsytrs_aa_2stage.c | |||
dtbcon.c | |||
dtbrfs.c dtbtrs.c dtgevc.c dtgex2.c dtgexc.c dtgsen.c | |||
dtgsja.c dtgsna.c dtgsy2.c dtgsyl.c dtpcon.c dtprfs.c dtptri.c | |||
dtptrs.c | |||
dtrcon.c dtrevc.c dtrevc3.c dtrexc.c dtrrfs.c dtrsen.c dtrsna.c dtrsyl.c | |||
dtrtrs.c dtzrzf.c dstemr.c | |||
dsgesv.c dsposv.c dlag2s.c slag2d.c dlat2s.c | |||
dlansf.c dpftrf.c dpftri.c dpftrs.c dsfrk.c dtfsm.c dtftri.c dtfttp.c | |||
dtfttr.c dtpttf.c dtpttr.c dtrttf.c dtrttp.c | |||
dgejsv.c dgesvj.c dgsvj0.c dgsvj1.c | |||
dgeequb.c dsyequb.c dpoequb.c dgbequb.c | |||
dbbcsd.c dlapmr.c dorbdb.c dorbdb1.c dorbdb2.c dorbdb3.c dorbdb4.c | |||
dorbdb5.c dorbdb6.c dorcsd.c dorcsd2by1.c | |||
dgeqrt.c dgeqrt2.c dgeqrt3.c dgemqrt.c | |||
dtpqrt.c dtpqrt2.c dtpmqrt.c dtprfb.c | |||
dgelqt.c dgelqt3.c dgemlqt.c | |||
dgetsls.c dgetsqrhrt.c dgeqr.c dlatsqr.c dlamtsqr.c dgemqr.c | |||
dgelq.c dlaswlq.c dlamswlq.c dgemlq.c | |||
dtplqt.c dtplqt2.c dtpmlqt.c | |||
dsytrd_2stage.c dsytrd_sy2sb.c dsytrd_sb2st.c dsb2st_kernels.c | |||
dsyevd_2stage.c dsyev_2stage.c dsyevx_2stage.c dsyevr_2stage.c | |||
dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c | |||
dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c | |||
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c ) | |||
set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c | |||
dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c | |||
dla_syrfsx_extended.c dla_syamv.c dla_syrcond.c dla_syrpvgrw.c | |||
dposvxx.c dporfsx.c dla_porfsx_extended.c dla_porcond.c | |||
dla_porpvgrw.c dgbsvxx.c dgbrfsx.c dla_gbrfsx_extended.c | |||
dla_gbamv.c dla_gbrcond.c dla_gbrpvgrw.c dla_lin_berr.c dlarscl2.c | |||
dlascl2.c dla_wwaddw.c) | |||
set(ZLASRC | |||
zbdsqr.c zgbbrd.c zgbcon.c zgbequ.c zgbrfs.c zgbsv.c zgbsvx.c | |||
zgbtf2.c zgbtrf.c zgbtrs.c zgebak.c zgebal.c zgebd2.c zgebrd.c | |||
zgecon.c zgeequ.c zgees.c zgeesx.c zgeev.c zgeevx.c | |||
zgehd2.c zgehrd.c zgelq2.c zgelqf.c | |||
zgels.c zgelsd.c zgelss.c zgelsy.c zgeql2.c zgeqlf.c zgeqp3.c | |||
zgeqr2.c zgeqr2p.c zgeqrf.c zgeqrfp.c zgerfs.c zgerq2.c zgerqf.c | |||
zgesc2.c zgesdd.c zgesvd.c zgesvdx.c zgesvx.c | |||
zgesvj.c zgejsv.c zgsvj0.c zgsvj1.c | |||
zgetc2.c zgetrf2.c | |||
zgetri.c | |||
zggbak.c zggbal.c | |||
zgges.c zgges3.c zggesx.c zggev.c zggev3.c zggevx.c | |||
zggglm.c zgghrd.c zgghd3.c zgglse.c zggqrf.c zggrqf.c | |||
zggsvd3.c zggsvp3.c | |||
zgtcon.c zgtrfs.c zgtsv.c zgtsvx.c zgttrf.c zgttrs.c zgtts2.c zhbev.c | |||
zhbevd.c zhbevx.c zhbgst.c zhbgv.c zhbgvd.c zhbgvx.c zhbtrd.c | |||
zhecon.c zheev.c zheevd.c zheevr.c zheevx.c zhegs2.c zhegst.c | |||
zhegv.c zhegvd.c zhegvx.c zherfs.c zhesv.c zhesvx.c zhetd2.c | |||
zhetf2.c zhetrd.c | |||
zhetrf.c zhetri.c zhetri2.c zhetri2x.c zheswapr.c | |||
zhetrs.c zhetrs2.c | |||
zhetf2_rook.c zhetrf_rook.c zhetri_rook.c | |||
zhetrs_rook.c zhecon_rook.c zhesv_rook.c | |||
zhetf2_rk.c zhetrf_rk.c zhetri_3.c zhetri_3x.c | |||
zhetrs_3.c zhecon_3.c zhesv_rk.c | |||
zhesv_aa.c zhesv_aa_2stage.c zhetrf_aa.c zhetrf_aa_2stage.c zhetrs_aa.c zhetrs_aa_2stage.c | |||
zhgeqz.c zhpcon.c zhpev.c zhpevd.c | |||
zhpevx.c zhpgst.c zhpgv.c zhpgvd.c zhpgvx.c zhprfs.c zhpsv.c | |||
zhpsvx.c | |||
zhptrd.c zhptrf.c zhptri.c zhptrs.c zhsein.c zhseqr.c zlabrd.c | |||
zlacgv.c zlacon.c zlacn2.c zlacp2.c zlacpy.c zlacrm.c zlacrt.c zladiv.c | |||
zlaed0.c zlaed7.c zlaed8.c | |||
zlaein.c zlaesy.c zlaev2.c zlags2.c zlagtm.c | |||
zlahef.c zlahef_rook.c zlahef_rk.c zlahef_aa.c zlahqr.c | |||
zlahr2.c zlaic1.c zlals0.c zlalsa.c zlalsd.c zlangb.c zlange.c | |||
zlangt.c zlanhb.c | |||
zlanhe.c | |||
zlanhp.c zlanhs.c zlanht.c zlansb.c zlansp.c zlansy.c zlantb.c | |||
zlantp.c zlantr.c zlapll.c zlapmt.c zlaqgb.c zlaqge.c | |||
zlaqhb.c zlaqhe.c zlaqhp.c zlaqp2.c zlaqps.c zlaqsb.c | |||
zlaqr0.c zlaqr1.c zlaqr2.c zlaqr3.c zlaqr4.c zlaqr5.c | |||
zlaqsp.c zlaqsy.c zlar1v.c zlar2v.c ilazlr.c ilazlc.c | |||
zlarcm.c zlarf.c zlarfb.c zlarfb_gett.c | |||
zlarfg.c zlarfgp.c zlarft.c | |||
zlarfx.c zlarfy.c zlargv.c zlarnv.c zlarrv.c zlartg.c zlartv.c | |||
zlarz.c zlarzb.c zlarzt.c zlascl.c zlaset.c zlasr.c | |||
zlassq.c zlasyf.c zlasyf_rook.c zlasyf_rk.c zlasyf_aa.c | |||
zlatbs.c zlatdf.c zlatps.c zlatrd.c zlatrs.c zlatrz.c | |||
zpbcon.c zpbequ.c zpbrfs.c zpbstf.c zpbsv.c | |||
zpbsvx.c zpbtf2.c zpbtrf.c zpbtrs.c zpocon.c zpoequ.c zporfs.c | |||
zposv.c zposvx.c zpotrf2.c zpotri.c zpotrs.c zpstrf.c zpstf2.c | |||
zppcon.c zppequ.c zpprfs.c zppsv.c zppsvx.c zpptrf.c zpptri.c zpptrs.c | |||
zptcon.c zpteqr.c zptrfs.c zptsv.c zptsvx.c zpttrf.c zpttrs.c zptts2.c | |||
zrot.c zspcon.c zsprfs.c zspsv.c | |||
zspsvx.c zsptrf.c zsptri.c zsptrs.c zdrscl.c zstedc.c | |||
zstegr.c zstein.c zsteqr.c zsycon.c | |||
zsyrfs.c zsysv.c zsysvx.c zsytf2.c zsytrf.c zsytri.c | |||
zsytri2.c zsytri2x.c zsyswapr.c | |||
zsytrs.c zsytrs2.c | |||
zsyconv.c zsyconvf.c zsyconvf_rook.c | |||
zsytf2_rook.c zsytrf_rook.c zsytrs_rook.c zsytrs_aa.c zsytrs_aa_2stage.c | |||
zsytri_rook.c zsycon_rook.c zsysv_rook.c | |||
zsytf2_rk.c zsytrf_rk.c zsytrf_aa.c zsytrf_aa_2stage.c zsytrs_3.c | |||
zsytri_3.c zsytri_3x.c zsycon_3.c zsysv_rk.c zsysv_aa.c zsysv_aa_2stage.c | |||
ztbcon.c ztbrfs.c ztbtrs.c ztgevc.c ztgex2.c | |||
ztgexc.c ztgsen.c ztgsja.c ztgsna.c ztgsy2.c ztgsyl.c ztpcon.c | |||
ztprfs.c ztptri.c | |||
ztptrs.c ztrcon.c ztrevc.c ztrevc3.c ztrexc.c ztrrfs.c ztrsen.c ztrsna.c | |||
ztrsyl.c ztrtrs.c ztzrzf.c zung2l.c | |||
zung2r.c zungbr.c zunghr.c zungl2.c zunglq.c zungql.c zungqr.c zungr2.c | |||
zungrq.c zungtr.c zunm2l.c zunm2r.c zunmbr.c zunmhr.c zunml2.c zunm22.c | |||
zunmlq.c zunmql.c zunmqr.c zunmr2.c zunmr3.c zunmrq.c zunmrz.c | |||
zunmtr.c zupgtr.c | |||
zupmtr.c izmax1.c dzsum1.c zstemr.c | |||
zcgesv.c zcposv.c zlag2c.c clag2z.c zlat2c.c | |||
zhfrk.c ztfttp.c zlanhf.c zpftrf.c zpftri.c zpftrs.c ztfsm.c ztftri.c | |||
ztfttr.c ztpttf.c ztpttr.c ztrttf.c ztrttp.c | |||
zgeequb.c zgbequb.c zsyequb.c zpoequb.c zheequb.c | |||
zbbcsd.c zlapmr.c zunbdb.c zunbdb1.c zunbdb2.c zunbdb3.c zunbdb4.c | |||
zunbdb5.c zunbdb6.c zuncsd.c zuncsd2by1.c | |||
zgeqrt.c zgeqrt2.c zgeqrt3.c zgemqrt.c | |||
ztpqrt.c ztpqrt2.c ztpmqrt.c ztprfb.c | |||
ztplqt.c ztplqt2.c ztpmlqt.c | |||
zgelqt.c zgelqt3.c zgemlqt.c | |||
zgetsls.c zgetsqrhrt.c zgeqr.c zlatsqr.c zlamtsqr.c zgemqr.c | |||
zgelq.c zlaswlq.c zlamswlq.c zgemlq.c | |||
zhetrd_2stage.c zhetrd_he2hb.c zhetrd_hb2st.c zhb2st_kernels.c | |||
zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c | |||
zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c | |||
zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c | |||
zungtsqr.c zungtsqr_row.c zunhr_col.c) | |||
set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c | |||
zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c | |||
zla_syrfsx_extended.c zla_syamv.c zla_syrcond_c.c zla_syrcond_x.c | |||
zla_syrpvgrw.c zposvxx.c zporfsx.c zla_porfsx_extended.c | |||
zla_porcond_c.c zla_porcond_x.c zla_porpvgrw.c zgbsvxx.c zgbrfsx.c | |||
zla_gbrfsx_extended.c zla_gbamv.c zla_gbrcond_c.c zla_gbrcond_x.c | |||
zla_gbrpvgrw.c zhesvxx.c zherfsx.c zla_herfsx_extended.c | |||
zla_heamv.c zla_hercond_c.c zla_hercond_x.c zla_herpvgrw.c | |||
zla_lin_berr.c zlarscl2.c zlascl2.c zla_wwaddw.c) | |||
if(USE_XBLAS) | |||
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC}) | |||
endif() | |||
list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c | |||
DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c | |||
DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c) | |||
list(APPEND DLASRC DEPRECATED/dgegs.c DEPRECATED/dgegv.c | |||
DEPRECATED/dgeqpf.c DEPRECATED/dgelsx.c DEPRECATED/dggsvd.c | |||
DEPRECATED/dggsvp.c DEPRECATED/dlahrd.c DEPRECATED/dlatzm.c DEPRECATED/dtzrqf.c) | |||
list(APPEND CLASRC DEPRECATED/cgegs.c DEPRECATED/cgegv.c | |||
DEPRECATED/cgeqpf.c DEPRECATED/cgelsx.c DEPRECATED/cggsvd.c | |||
DEPRECATED/cggsvp.c DEPRECATED/clahrd.c DEPRECATED/clatzm.c DEPRECATED/ctzrqf.c) | |||
list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c | |||
DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c | |||
DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c) | |||
message(STATUS "Building deprecated routines") | |||
set(DSLASRC spotrs.c) | |||
set(ZCLASRC cpotrs.c) | |||
set(SCATGEN slatm1.c slaran.c slarnd.c) | |||
set(SMATGEN slatms.c slatme.c slatmr.c slatmt.c | |||
slagge.c slagsy.c slakf2.c slarge.c slaror.c slarot.c slatm2.c | |||
slatm3.c slatm5.c slatm6.c slatm7.c slahilb.c) | |||
set(CMATGEN clatms.c clatme.c clatmr.c clatmt.c | |||
clagge.c claghe.c clagsy.c clakf2.c clarge.c claror.c clarot.c | |||
clatm1.c clarnd.c clatm2.c clatm3.c clatm5.c clatm6.c clahilb.c slatm7.c) | |||
set(DZATGEN dlatm1.c dlaran.c dlarnd.c) | |||
set(DMATGEN dlatms.c dlatme.c dlatmr.c dlatmt.c | |||
dlagge.c dlagsy.c dlakf2.c dlarge.c dlaror.c dlarot.c dlatm2.c | |||
dlatm3.c dlatm5.c dlatm6.c dlatm7.c dlahilb.c) | |||
set(ZMATGEN zlatms.c zlatme.c zlatmr.c zlatmt.c | |||
zlagge.c zlaghe.c zlagsy.c zlakf2.c zlarge.c zlaror.c zlarot.c | |||
zlatm1.c zlarnd.c zlatm2.c zlatm3.c zlatm5.c zlatm6.c zlahilb.c dlatm7.c) | |||
if(BUILD_SINGLE) | |||
set(LA_REL_SRC ${SLASRC} ${DSLASRC} ${ALLAUX} ${SCLAUX}) | |||
set(LA_GEN_SRC ${SMATGEN} ${SCATGEN}) | |||
message(STATUS "Building Single Precision") | |||
endif() | |||
if(BUILD_DOUBLE) | |||
set(LA_REL_SRC ${LA_REL_SRC} ${DLASRC} ${DSLASRC} ${ALLAUX} ${DZLAUX}) | |||
set(LA_GEN_SRC ${LA_GEN_SRC} ${DMATGEN} ${DZATGEN}) | |||
message(STATUS "Building Double Precision") | |||
endif() | |||
if(BUILD_COMPLEX) | |||
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX}) | |||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN}) | |||
message(STATUS "Building Single Precision Complex") | |||
endif() | |||
if(BUILD_COMPLEX16) | |||
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX}) | |||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN}) | |||
# for zlange/zlanhe | |||
if (NOT BUILD_DOUBLE) | |||
set (LA_REL_SRC ${LA_REL_SRC} dcombssq.c) | |||
endif () | |||
message(STATUS "Building Double Precision Complex") | |||
endif() | |||
endif() | |||
# add lapack-netlib folder to the sources | |||
set(LA_SOURCES "") | |||
foreach (LA_FILE ${LA_REL_SRC}) | |||
@@ -496,4 +990,9 @@ endforeach () | |||
foreach (LA_FILE ${LA_GEN_SRC}) | |||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/TESTING/MATGEN/${LA_FILE}") | |||
endforeach () | |||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") | |||
if (NOT C_LAPACK) | |||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") | |||
else () | |||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") | |||
endif () |
@@ -199,12 +199,12 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||
"#define HAVE_VFP\n" | |||
"#define HAVE_NEON\n" | |||
"#define ARMV8\n") | |||
if ("${TCORE}" STREQUAL "CORTEXA57") | |||
if ("${TCORE}" STREQUAL "CORTEXA57") | |||
set(SGEMM_UNROLL_M 16) | |||
set(SGEMM_UNROLL_N 4) | |||
else () | |||
set(SGEMM_UNROLL_M 8) | |||
set(SGEMM_UNROLL_N 8) | |||
set(SGEMM_UNROLL_N 8) | |||
endif () | |||
set(DGEMM_UNROLL_M 8) | |||
set(DGEMM_UNROLL_N 4) | |||
@@ -603,7 +603,7 @@ endif () | |||
"#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n") | |||
# Move to where gen_config_h would place it | |||
file(MAKE_DIRECTORY ${TARGET_CONF_DIR}) | |||
file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}") | |||
file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}") | |||
else(NOT CMAKE_CROSSCOMPILING) | |||
# compile getarch | |||
@@ -639,7 +639,7 @@ else(NOT CMAKE_CROSSCOMPILING) | |||
OUTPUT_VARIABLE GETARCH_LOG | |||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} | |||
) | |||
if (NOT ${GETARCH_RESULT}) | |||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") | |||
endif () | |||
@@ -284,8 +284,15 @@ if (NOT NOFORTRAN) | |||
# Fortran Compiler dependent settings | |||
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake") | |||
else () | |||
set(NO_LAPACK 1) | |||
set(NO_LAPACKE 1) | |||
if (NOT XXXX) | |||
set(C_LAPACK 1) | |||
if (INTERFACE64) | |||
set (CCOMMON_OPT "${CCOMMON_OPT} -DLAPACK_ILP64") | |||
endif () | |||
set(TIMER "NONE") | |||
else () | |||
set (NO_LAPACK 1) | |||
endif () | |||
endif () | |||
if (BINARY64) | |||
@@ -2610,8 +2610,9 @@ | |||
#endif | |||
#ifndef ASSEMBLER | |||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)\ | |||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) | |||
#if !defined(DYNAMIC_ARCH) \ | |||
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \ | |||
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K)) | |||
extern BLASLONG gemm_offset_a; | |||
extern BLASLONG gemm_offset_b; | |||
extern BLASLONG sbgemm_p; | |||
@@ -45,6 +45,10 @@ size_t length64=sizeof(value64); | |||
#define CPU_NEOVERSEN1 11 | |||
#define CPU_NEOVERSEV1 16 | |||
#define CPU_NEOVERSEN2 17 | |||
#define CPU_CORTEXX1 18 | |||
#define CPU_CORTEXX2 19 | |||
#define CPU_CORTEXA510 20 | |||
#define CPU_CORTEXA710 21 | |||
// Qualcomm | |||
#define CPU_FALKOR 6 | |||
// Cavium | |||
@@ -59,6 +63,8 @@ size_t length64=sizeof(value64); | |||
#define CPU_VORTEX 13 | |||
// Fujitsu | |||
#define CPU_A64FX 15 | |||
// Phytium | |||
#define CPU_FT2000 22 | |||
static char *cpuname[] = { | |||
"UNKNOWN", | |||
@@ -73,12 +79,17 @@ static char *cpuname[] = { | |||
"TSV110", | |||
"EMAG8180", | |||
"NEOVERSEN1", | |||
"NEOVERSEV1" | |||
"NEOVERSEN2" | |||
"THUNDERX3T110", | |||
"VORTEX", | |||
"CORTEXA55", | |||
"A64FX" | |||
"A64FX", | |||
"NEOVERSEV1", | |||
"NEOVERSEN2", | |||
"CORTEXX1", | |||
"CORTEXX2", | |||
"CORTEXA510", | |||
"CORTEXA710", | |||
"FT2000" | |||
}; | |||
static char *cpuname_lower[] = { | |||
@@ -94,12 +105,17 @@ static char *cpuname_lower[] = { | |||
"tsv110", | |||
"emag8180", | |||
"neoversen1", | |||
"neoversev1", | |||
"neoversen2", | |||
"thunderx3t110", | |||
"vortex", | |||
"cortexa55", | |||
"a64fx" | |||
"a64fx", | |||
"neoversev1", | |||
"neoversen2", | |||
"cortexx1", | |||
"cortexx2", | |||
"cortexa510", | |||
"cortexa710", | |||
"ft2000" | |||
}; | |||
int get_feature(char *search) | |||
@@ -182,6 +198,14 @@ int detect(void) | |||
return CPU_NEOVERSEN2; | |||
else if (strstr(cpu_part, "0xd05")) | |||
return CPU_CORTEXA55; | |||
else if (strstr(cpu_part, "0xd46")) | |||
return CPU_CORTEXA510; | |||
else if (strstr(cpu_part, "0xd47")) | |||
return CPU_CORTEXA710; | |||
else if (strstr(cpu_part, "0xd44")) | |||
return CPU_CORTEXX1; | |||
else if (strstr(cpu_part, "0xd4c")) | |||
return CPU_CORTEXX2; | |||
} | |||
// Qualcomm | |||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | |||
@@ -202,6 +226,13 @@ int detect(void) | |||
// Fujitsu | |||
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001")) | |||
return CPU_A64FX; | |||
// Apple | |||
else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022")) | |||
return CPU_VORTEX; | |||
// Phytium | |||
else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661") | |||
|| strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663"))) | |||
return CPU_FT2000; | |||
} | |||
p = (char *) NULL ; | |||
@@ -382,7 +413,24 @@ void get_cpuconfig(void) | |||
printf("#define DTB_DEFAULT_ENTRIES 48\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
break; | |||
case CPU_CORTEXA510: | |||
case CPU_CORTEXA710: | |||
case CPU_CORTEXX1: | |||
case CPU_CORTEXX2: | |||
printf("#define ARMV9\n"); | |||
printf("#define %s\n", cpuname[d]); | |||
printf("#define L1_CODE_SIZE 65536\n"); | |||
printf("#define L1_CODE_LINESIZE 64\n"); | |||
printf("#define L1_CODE_ASSOCIATIVE 4\n"); | |||
printf("#define L1_DATA_SIZE 65536\n"); | |||
printf("#define L1_DATA_LINESIZE 64\n"); | |||
printf("#define L1_DATA_ASSOCIATIVE 4\n"); | |||
printf("#define L2_SIZE 1048576\n"); | |||
printf("#define L2_LINESIZE 64\n"); | |||
printf("#define L2_ASSOCIATIVE 8\n"); | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
break; | |||
case CPU_FALKOR: | |||
printf("#define FALKOR\n"); | |||
printf("#define L1_CODE_SIZE 65536\n"); | |||
@@ -469,9 +517,9 @@ void get_cpuconfig(void) | |||
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
printf("#define DTB_SIZE 4096 \n"); | |||
break; | |||
#ifdef __APPLE__ | |||
case CPU_VORTEX: | |||
printf("#define VORTEX \n"); | |||
#ifdef __APPLE__ | |||
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0); | |||
printf("#define L1_CODE_SIZE %lld \n",value64); | |||
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0); | |||
@@ -480,10 +528,10 @@ void get_cpuconfig(void) | |||
printf("#define L1_DATA_SIZE %lld \n",value64); | |||
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0); | |||
printf("#define L2_SIZE %lld \n",value64); | |||
#endif | |||
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
printf("#define DTB_SIZE 4096 \n"); | |||
break; | |||
#endif | |||
case CPU_A64FX: | |||
printf("#define A64FX\n"); | |||
printf("#define L1_CODE_SIZE 65535\n"); | |||
@@ -494,6 +542,16 @@ void get_cpuconfig(void) | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
break; | |||
case CPU_FT2000: | |||
printf("#define FT2000\n"); | |||
printf("#define L1_CODE_SIZE 32768\n"); | |||
printf("#define L1_DATA_SIZE 32768\n"); | |||
printf("#define L1_DATA_LINESIZE 64\n"); | |||
printf("#define L2_SIZE 33554432\n"); | |||
printf("#define L2_LINESIZE 64\n"); | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
break; | |||
} | |||
get_cpucount(); | |||
} | |||
@@ -1,5 +1,5 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2011-2014, The OpenBLAS Project | |||
Copyright (c) 2011-2022, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
@@ -13,9 +13,9 @@ met: | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written | |||
permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
@@ -70,16 +70,43 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#define CPU_UNKNOWN 0 | |||
#define CPU_C910V 1 | |||
#define CPU_GENERIC 0 | |||
#define CPU_C910V 1 | |||
static char *cpuname[] = { | |||
"UNKOWN", | |||
"RISCV64_GENERIC", | |||
"C910V" | |||
}; | |||
int detect(void){ | |||
return CPU_UNKNOWN; | |||
#ifdef __linux | |||
FILE *infile; | |||
char buffer[512],isa_buffer[512],model_buffer[512]; | |||
const char* check_c910_str = "T-HEAD C910"; | |||
char *pmodel = NULL, *pisa = NULL; | |||
infile = fopen("/proc/cpuinfo", "r"); | |||
while (fgets(buffer, sizeof(buffer), infile)){ | |||
if(!strncmp(buffer, "model name", 10)){ | |||
strcpy(model_buffer, buffer) | |||
pmodel = strchr(isa_buffer, ':') + 1; | |||
} | |||
if(!strncmp(buffer, "isa", 3)){ | |||
strcpy(isa_buffer, buffer) | |||
pisa = strchr(isa_buffer, '4') + 1; | |||
} | |||
} | |||
fclose(infile); | |||
if (strstr(pmodel, check_c910_str) && strchr(pisa, 'v')) | |||
return CPU_C910V; | |||
return CPU_GENERIC; | |||
#endif | |||
return CPU_GENERIC; | |||
} | |||
char *get_corename(void){ | |||
@@ -91,6 +118,7 @@ void get_architecture(void){ | |||
} | |||
void get_subarchitecture(void){ | |||
printf("%s",cpuname[detect()]); | |||
} | |||
void get_subdirname(void){ | |||
@@ -98,7 +126,7 @@ void get_subdirname(void){ | |||
} | |||
void get_cpuconfig(void){ | |||
printf("#define UNKNOWN\n"); | |||
printf("#define %s\n", cpuname[detect()]); | |||
printf("#define L1_DATA_SIZE 65536\n"); | |||
printf("#define L1_DATA_LINESIZE 32\n"); | |||
printf("#define L2_SIZE 512488\n"); | |||
@@ -1707,8 +1707,18 @@ int get_cpuname(void){ | |||
if (model == 0xf && stepping < 0xe) | |||
return CPUTYPE_NANO; | |||
return CPUTYPE_NEHALEM; | |||
case 0x7: | |||
switch (exmodel) { | |||
case 5: | |||
if (support_avx2()) | |||
return CPUTYPE_ZEN; | |||
else | |||
return CPUTYPE_DUNNINGTON; | |||
default: | |||
return CPUTYPE_NEHALEM; | |||
} | |||
default: | |||
if (family >= 0x7) | |||
if (family >= 0x8) | |||
return CPUTYPE_NEHALEM; | |||
else | |||
return CPUTYPE_VIAC3; | |||
@@ -1716,7 +1726,20 @@ int get_cpuname(void){ | |||
} | |||
if (vendor == VENDOR_ZHAOXIN){ | |||
return CPUTYPE_NEHALEM; | |||
switch (family) { | |||
case 0x7: | |||
switch (exmodel) { | |||
case 5: | |||
if (support_avx2()) | |||
return CPUTYPE_ZEN; | |||
else | |||
return CPUTYPE_DUNNINGTON; | |||
default: | |||
return CPUTYPE_NEHALEM; | |||
} | |||
default: | |||
return CPUTYPE_NEHALEM; | |||
} | |||
} | |||
if (vendor == VENDOR_RISE){ | |||
@@ -2416,8 +2439,18 @@ int get_coretype(void){ | |||
if (model == 0xf && stepping < 0xe) | |||
return CORE_NANO; | |||
return CORE_NEHALEM; | |||
case 0x7: | |||
switch (exmodel) { | |||
case 5: | |||
if (support_avx2()) | |||
return CORE_ZEN; | |||
else | |||
return CORE_DUNNINGTON; | |||
default: | |||
return CORE_NEHALEM; | |||
} | |||
default: | |||
if (family >= 0x7) | |||
if (family >= 0x8) | |||
return CORE_NEHALEM; | |||
else | |||
return CORE_VIAC3; | |||
@@ -2425,7 +2458,20 @@ int get_coretype(void){ | |||
} | |||
if (vendor == VENDOR_ZHAOXIN) { | |||
return CORE_NEHALEM; | |||
switch (family) { | |||
case 0x7: | |||
switch (exmodel) { | |||
case 5: | |||
if (support_avx2()) | |||
return CORE_ZEN; | |||
else | |||
return CORE_DUNNINGTON; | |||
default: | |||
return CORE_NEHALEM; | |||
} | |||
default: | |||
return CORE_NEHALEM; | |||
} | |||
} | |||
return CORE_UNKNOWN; | |||
@@ -96,7 +96,7 @@ extern gotoblas_t gotoblas_BARCELONA; | |||
#endif | |||
#ifdef DYN_ATOM | |||
extern gotoblas_t gotoblas_ATOM; | |||
elif defined(DYN_NEHALEM) | |||
#elif defined(DYN_NEHALEM) | |||
#define gotoblas_ATOM gotoblas_NEHALEM | |||
#else | |||
#define gotoblas_ATOM gotoblas_PRESCOTT | |||
@@ -855,7 +855,11 @@ static gotoblas_t *get_coretype(void){ | |||
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); | |||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
} | |||
} else if (exfamily == 10) { | |||
} else if (exfamily == 10) { | |||
if(support_avx512_bf16()) | |||
return &gotoblas_COOPERLAKE; | |||
if(support_avx512()) | |||
return &gotoblas_SKYLAKEX; | |||
if(support_avx()) | |||
return &gotoblas_ZEN; | |||
else{ | |||
@@ -863,7 +867,7 @@ static gotoblas_t *get_coretype(void){ | |||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. | |||
} | |||
}else { | |||
return &gotoblas_BARCELONA; | |||
return NULL; | |||
} | |||
} | |||
@@ -875,14 +879,37 @@ static gotoblas_t *get_coretype(void){ | |||
if (model == 0xf && stepping < 0xe) | |||
return &gotoblas_NANO; | |||
return &gotoblas_NEHALEM; | |||
case 0x7: | |||
switch (exmodel) { | |||
case 5: | |||
if (support_avx2()) | |||
return &gotoblas_ZEN; | |||
else | |||
return &gotoblas_DUNNINGTON; | |||
default: | |||
return &gotoblas_NEHALEM; | |||
} | |||
default: | |||
if (family >= 0x7) | |||
if (family >= 0x8) | |||
return &gotoblas_NEHALEM; | |||
} | |||
} | |||
if (vendor == VENDOR_ZHAOXIN) { | |||
return &gotoblas_NEHALEM; | |||
switch (family) { | |||
case 0x7: | |||
switch (exmodel) { | |||
case 5: | |||
if (support_avx2()) | |||
return &gotoblas_ZEN; | |||
else | |||
return &gotoblas_DUNNINGTON; | |||
default: | |||
return &gotoblas_NEHALEM; | |||
} | |||
default: | |||
return &gotoblas_NEHALEM; | |||
} | |||
} | |||
return NULL; | |||
@@ -60,6 +60,9 @@ static char* openblas_config_str="" | |||
#ifdef USE_OPENMP | |||
"USE_OPENMP " | |||
#endif | |||
#ifdef USE_TLS | |||
"USE_TLS " | |||
#endif | |||
#ifndef DYNAMIC_ARCH | |||
CHAR_CORENAME | |||
#endif | |||
@@ -2,6 +2,12 @@ TOPDIR = .. | |||
include ../Makefile.system | |||
ifdef USE_PERL | |||
GENSYM = gensymbol.pl | |||
else | |||
GENSYM = gensymbol | |||
endif | |||
ifndef EXPRECISION | |||
EXPRECISION = 0 | |||
endif | |||
@@ -119,11 +125,11 @@ dll : ../$(LIBDLLNAME) | |||
-shared -o ../$(LIBDLLNAME) -Wl,--out-implib,../$(IMPLIBNAME) \ | |||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) | |||
$(LIBPREFIX).def : gensymbol | |||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
$(LIBPREFIX).def : $(GENSYM) | |||
./$(GENSYM) win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
libgoto_hpl.def : gensymbol | |||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
libgoto_hpl.def : $(GENSYM) | |||
./$(GENSYM) win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
ifeq ($(OSNAME), Darwin) | |||
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib | |||
@@ -265,24 +271,24 @@ static : ../$(LIBNAME) | |||
$(AR) -cq ../$(LIBNAME) goto.$(SUFFIX) | |||
rm -f goto.$(SUFFIX) | |||
osx.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
osx.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
./$(GENSYM) osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
aix.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
aix.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
./$(GENSYM) aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
objcopy.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
objcopy.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
./$(GENSYM) objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
objconv.def : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
objconv.def : $(GENSYM) ../Makefile.system ../getarch.c | |||
./$(GENSYM) objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F) | |||
test : linktest.c | |||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | |||
rm -f linktest | |||
linktest.c : gensymbol ../Makefile.system ../getarch.c | |||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c | |||
linktest.c : $(GENSYM) ../Makefile.system ../getarch.c | |||
./$(GENSYM) linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c | |||
clean :: | |||
@rm -f *.def *.dylib __.SYMDEF* *.renamed | |||
@@ -1,6 +1,16 @@ | |||
#!/usr/bin/env perl | |||
#!/bin/sh | |||
split() { | |||
set -f | |||
old_ifs=$IFS | |||
IFS=$2 | |||
set -- $1 | |||
printf '%s ' "$@" | |||
IFS=$old_ifs | |||
set +f | |||
} | |||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | |||
hostos="$(uname -s | sed 's/\-.*//')" | |||
# | |||
# 1. Not specified | |||
@@ -12,407 +22,390 @@ $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | |||
# 2.2.2 Path is not correct, invalid compiler name, then gfortran is default with NOFORTRAN definition | |||
# | |||
$makefile = shift(@ARGV); | |||
$config = shift(@ARGV); | |||
makefile="$1" | |||
config="$2" | |||
nofortran=0 | |||
$nofortran = 0; | |||
shift 2 | |||
compiler="$*" | |||
compiler_bin="$1" | |||
$compiler = join(" ", @ARGV); | |||
$compiler_bin = shift(@ARGV); | |||
# f77 is too ambiguous | |||
$compiler = "" if $compiler eq "f77"; | |||
@path = split(/:/, $ENV{"PATH"}); | |||
if ($compiler eq "") { | |||
@lists = ("gfortran", "g95", "frt", "fort", "openf90", "openf95", | |||
"sunf77", "sunf90", "sunf95", | |||
"xlf95", "xlf90", "xlf", | |||
"ppuf77", "ppuf95", "ppuf90", "ppuxlf", | |||
"pathf90", "pathf95", | |||
"pgf95", "pgf90", "pgf77", "pgfortran", "nvfortran", | |||
"flang", "egfortran", | |||
"ifort", "nagfor"); | |||
OUTER: | |||
foreach $lists (@lists) { | |||
foreach $path (@path) { | |||
if (-x $path . "/" . $lists) { | |||
$compiler = $lists; | |||
$compiler_bin = $lists; | |||
last OUTER; | |||
[ "$compiler" = "f77" ] && compiler='' | |||
path=`split "$PATH" ':'` | |||
if [ -z "$compiler" ]; then | |||
lists="gfortran g95 frt fort openf90 openf95 | |||
sunf77 sunf90 sunf95 | |||
xlf95 xlf90 xlf | |||
ppuf77 ppuf95 ppuf90 ppuxlf | |||
pathf90 pathf95 | |||
pgf95 pgf90 pgf77 pgfortran nvfortran | |||
flang egfortran | |||
ifort nagfor ifx" | |||
for list in $lists; do | |||
for p in $path; do | |||
if [ -x "$p/$list" ]; then | |||
compiler=$list | |||
compiler_bin=$list | |||
break 2 | |||
fi | |||
done | |||
done | |||
fi | |||
if [ -z "$compiler" ]; then | |||
nofortran=1 | |||
compiler=gfortran | |||
vendor=GFORTRAN | |||
bu="_" | |||
else | |||
{ | |||
data="$(command -v "$compiler_bin" >/dev/null 2>&1)" | |||
vendor="" | |||
} && { | |||
data=`$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s` | |||
if [ -z "$data" ]; then | |||
data=`$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.c && rm -f ftest.c` | |||
fi | |||
case "$data" in *zhoge_*) bu=_ ;; esac | |||
case "$data" in | |||
*Fujitsu*) | |||
vendor=FUJITSU | |||
openmp='-Kopenmp' | |||
;; | |||
*GNU*|*GCC*) | |||
v="${data#*GCC: *\) }" | |||
v="${v%%\"*}" | |||
major="${v%%.*}" | |||
if [ "$major" -ge 4 ]; then | |||
vendor=GFORTRAN | |||
openmp='-fopenmp' | |||
else | |||
case "$compiler" in | |||
*flang*) | |||
vendor=FLANG | |||
openmp='-fopenmp' | |||
;; | |||
*ifx*) | |||
vendor=INTEL | |||
openmp='-fopenmp' | |||
;; | |||
*pgf*|*nvf*) | |||
vendor=PGI | |||
openmp='-mp' | |||
;; | |||
*) | |||
vendor=G77 | |||
openmp='' | |||
;; | |||
esac | |||
fi | |||
;; | |||
*g95*) | |||
vendor=G95 | |||
openmp='' | |||
;; | |||
*Intel*) | |||
vendor=INTEL | |||
openmp='-fopenmp' | |||
;; | |||
*'Sun Fortran'*) | |||
vendor=SUN | |||
openmp='-xopenmp=parallel' | |||
;; | |||
*PathScale*) | |||
vendor=PATHSCALE | |||
openmp='-openmp' | |||
;; | |||
*Open64*) | |||
vendor=OPEN64 | |||
openmp='-mp' | |||
;; | |||
*PGF*|*NVF*) | |||
vendor=PGI | |||
openmp='-mp' | |||
;; | |||
*'IBM XL'*) | |||
vendor=IBM | |||
openmp='-openmp' | |||
;; | |||
*NAG*) | |||
vendor=NAG | |||
openmp='-openmp' | |||
;; | |||
esac | |||
# for embedded underscore name, e.g. zho_ge, it may append 2 underscores. | |||
data=`$compiler -O2 -S ftest3.f >/dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s` | |||
[ -z "$data" ] && { | |||
data=`$compiler -O2 -S ftest3.f >/dev/null 2>&1 && cat ftest3.c && rm -f ftest3.c` | |||
} | |||
case "$data" in *' zho_ge__'*) need2bu=1 ;; esac | |||
case "$vendor" in *G95*) [ "$NO_LAPACKE" != 1 ] && need2bu='' ;; esac | |||
} | |||
if [ -z "$vendor" ]; then | |||
case "$compiler" in | |||
*g77*) | |||
vendor=G77 | |||
bu=_ | |||
openmp='' | |||
;; | |||
*g95*) | |||
vendor=G95 | |||
bu=_ | |||
openmp='' | |||
;; | |||
*gfortran*) | |||
vendor=GFORTRAN | |||
bu=_ | |||
openmp='-fopenmp' | |||
;; | |||
*ifort*|*ifx*) | |||
vendor=INTEL | |||
bu=_ | |||
openmp='-fopenmp' | |||
;; | |||
*pathf*) | |||
vendor=PATHSCALE | |||
bu=_ | |||
openmp='-mp' | |||
;; | |||
*pgf*|*nvf*) | |||
vendor=PGI | |||
bu=_ | |||
openmp='-mp' | |||
;; | |||
*ftn*) | |||
vendor=PGI | |||
bu=_ | |||
openmp=-openmp | |||
;; | |||
*frt*) | |||
vendor=FUJITSU | |||
bu=_ | |||
openmp='-openmp' | |||
;; | |||
*sunf77*|*sunf90*|*sunf95*) | |||
vendor=SUN | |||
bu=_ | |||
openmp='-xopenmp=parallel' | |||
;; | |||
*ppuf*|*xlf*) | |||
vendor=IBM | |||
openmp='-openmp' | |||
;; | |||
*open64*) | |||
vendor=OPEN64 | |||
openmp='-mp' | |||
;; | |||
*flang*) | |||
vendor=FLANG | |||
bu=_ | |||
openmp='-fopenmp' | |||
;; | |||
*nagfor*) | |||
vendor=NAG | |||
bu=_ | |||
openmp='-openmp' | |||
;; | |||
esac | |||
if [ -z "$vendor" ]; then | |||
nofortran=1 | |||
compiler="gfortran" | |||
vendor=GFORTRAN | |||
bu=_ | |||
openmp='' | |||
fi | |||
fi | |||
fi | |||
{ | |||
data=`command -v $compiler_bin >/dev/null 2>&1` | |||
} && { | |||
binary=$BINARY | |||
[ "$USE_OPENMP" != 1 ] && openmp='' | |||
case "$binary" in | |||
32) | |||
{ | |||
link=`$compiler $openmp -m32 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
link=`$compiler $openmp -q32 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
# for AIX | |||
link=`$compiler $openmp -maix32 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
# for gfortran MIPS | |||
mips_data=`$compiler_bin -E -dM - < /dev/null` | |||
case "$mips_data" in | |||
*_MIPS_ISA_MIPS64*) | |||
link=`$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
;; | |||
*) | |||
link=`$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
;; | |||
esac | |||
} || { | |||
binary='' | |||
} | |||
} | |||
} | |||
} | |||
if ($compiler eq "") { | |||
$nofortran = 1; | |||
$compiler = "gfortran"; | |||
$vendor = GFORTRAN; | |||
$bu = "_"; | |||
} else { | |||
$data = `which $compiler_bin > /dev/null 2> /dev/null`; | |||
$vendor = ""; | |||
if (!$?) { | |||
$data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s`; | |||
if ($data eq "") { | |||
$data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.c && rm -f ftest.c`; | |||
} | |||
if ($data =~ /zhoge_/) { | |||
$bu = "_"; | |||
} | |||
if ($data =~ /Fujitsu/) { | |||
$vendor = FUJITSU; | |||
$openmp = "-Kopenmp"; | |||
} elsif ($data =~ /GNU/ || $data =~ /GCC/ ) { | |||
$data =~ s/\(+.*?\)+//g; | |||
$data =~ /(\d+)\.(\d+).(\d+)/; | |||
$major = $1; | |||
$minor = $2; | |||
if ($major >= 4) { | |||
$vendor = GFORTRAN; | |||
$openmp = "-fopenmp"; | |||
} else { | |||
if ($compiler =~ /flang/) { | |||
$vendor = FLANG; | |||
$openmp = "-fopenmp"; | |||
} elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) { | |||
$vendor = PGI; | |||
$openmp = "-mp"; | |||
} else { | |||
$vendor = G77; | |||
$openmp = ""; | |||
} | |||
} | |||
} | |||
if ($data =~ /g95/) { | |||
$vendor = G95; | |||
$openmp = ""; | |||
} | |||
if ($data =~ /Intel/) { | |||
$vendor = INTEL; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($data =~ /Sun Fortran/) { | |||
$vendor = SUN; | |||
$openmp = "-xopenmp=parallel"; | |||
} | |||
if ($data =~ /PathScale/) { | |||
$vendor = PATHSCALE; | |||
$openmp = "-openmp"; | |||
} | |||
if ($data =~ /Open64/) { | |||
$vendor = OPEN64; | |||
$openmp = "-mp"; | |||
} | |||
if ($data =~ /PGF/ || $data =~ /NVF/) { | |||
$vendor = PGI; | |||
$openmp = "-mp"; | |||
} | |||
if ($data =~ /IBM XL/) { | |||
$vendor = IBM; | |||
$openmp = "-openmp"; | |||
} | |||
if ($data =~ /NAG/) { | |||
$vendor = NAG; | |||
$openmp = "-openmp"; | |||
} | |||
# for embedded underscore name, e.g. zho_ge, it may append 2 underscores. | |||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; | |||
if ($data eq "") { | |||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.c && rm -f ftest3.c`; | |||
} | |||
if ($data =~ / zho_ge__/) { | |||
$need2bu = 1; | |||
} | |||
if ($vendor =~ /G95/) { | |||
if ($ENV{NO_LAPACKE} != 1) { | |||
$need2bu = ""; | |||
} | |||
} | |||
} | |||
if ($vendor eq "") { | |||
if ($compiler =~ /g77/) { | |||
$vendor = G77; | |||
$bu = "_"; | |||
$openmp = ""; | |||
} | |||
if ($compiler =~ /g95/) { | |||
$vendor = G95; | |||
$bu = "_"; | |||
$openmp = ""; | |||
} | |||
if ($compiler =~ /gfortran/) { | |||
$vendor = GFORTRAN; | |||
$bu = "_"; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler =~ /ifort/) { | |||
$vendor = INTEL; | |||
$bu = "_"; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler =~ /pathf/) { | |||
$vendor = PATHSCALE; | |||
$bu = "_"; | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler =~ /pgf/ || $compiler =~ /nvf/) { | |||
$vendor = PGI; | |||
$bu = "_"; | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler =~ /ftn/) { | |||
$vendor = PGI; | |||
$bu = "_"; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /frt/) { | |||
$vendor = FUJITSU; | |||
$bu = "_"; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /sunf77|sunf90|sunf95/) { | |||
$vendor = SUN; | |||
$bu = "_"; | |||
$openmp = "-xopenmp=parallel"; | |||
} | |||
if ($compiler =~ /ppuf/) { | |||
$vendor = IBM; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /xlf/) { | |||
$vendor = IBM; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /open64/) { | |||
$vendor = OPEN64; | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler =~ /flang/) { | |||
$vendor = FLANG; | |||
$bu = "_"; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler =~ /nagfor/) { | |||
$vendor = NAG; | |||
$bu = "_"; | |||
$openmp = "-openmp"; | |||
} | |||
if ($vendor eq "") { | |||
$nofortran = 1; | |||
$compiler = "gfortran"; | |||
$vendor = GFORTRAN; | |||
$bu = "_"; | |||
$openmp = ""; | |||
} | |||
;; | |||
64) | |||
{ | |||
link=`$compiler $openmp -m64 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
link=`$compiler $openmp -q64 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
# for AIX | |||
link=`$compiler $openmp -maix64 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
# for gfortran MIPS | |||
link=`$compiler $openmp -mabi=64 -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
# for nagfor | |||
link=`$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe` | |||
} || { | |||
binary='' | |||
} | |||
;; | |||
esac | |||
} | |||
if [ -z "$binary" ]; then | |||
link=`$compiler $openmp -v ftest2.f 2>&1 && rm -f a.out a.exe` | |||
fi | |||
} | |||
$data = `which $compiler_bin > /dev/null 2> /dev/null`; | |||
if [ "$vendor" = "NAG" ]; then | |||
link=`$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe` | |||
fi | |||
linker_L="" | |||
linker_l="" | |||
linker_a="" | |||
if (!$?) { | |||
if [ -n "$link" ]; then | |||
$binary = $ENV{"BINARY"}; | |||
link=`echo "$link" | sed 's/\-Y[[:space:]]P\,/\-Y/g'` | |||
$openmp = "" if $ENV{USE_OPENMP} != 1; | |||
link=`echo "$link" | sed 's/\-R[[:space:]]*/\-rpath\%/g'` | |||
if ($binary == 32) { | |||
$link = `$compiler $openmp -m32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
if ($?) { | |||
$link = `$compiler $openmp -q32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
# for AIX | |||
if ($?) { | |||
$link = `$compiler $openmp -maix32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
#For gfortran MIPS | |||
if ($?) { | |||
$mips_data = `$compiler_bin -E -dM - < /dev/null`; | |||
if ($mips_data =~ /_MIPS_ISA_MIPS64/) { | |||
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} else { | |||
$link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
} | |||
$binary = "" if ($?); | |||
} | |||
link=`echo "$link" | sed 's/\-rpath[[:space:]]+/\-rpath\%/g'` | |||
if ($binary == 64) { | |||
$link = `$compiler $openmp -m64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
if ($?) { | |||
$link = `$compiler $openmp -q64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
# for AIX | |||
if ($?) { | |||
$link = `$compiler $openmp -maix64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
#For gfortran MIPS | |||
if ($?) { | |||
$link = `$compiler $openmp -mabi=64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
#For nagfor | |||
if ($?) { | |||
$link = `$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
$binary = "" if ($?); | |||
} | |||
if ($binary eq "") { | |||
$link = `$compiler $openmp -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
} | |||
link=`echo "$link" | sed 's/\-rpath-link[[:space:]]+/\-rpath-link\%/g'` | |||
if ( $vendor eq "NAG") { | |||
$link = `$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
$linker_L = ""; | |||
$linker_l = ""; | |||
$linker_a = ""; | |||
if ($link ne "") { | |||
$link =~ s/\-Y\sP\,/\-Y/g; | |||
$link =~ s/\-R\s*/\-rpath\%/g; | |||
$link =~ s/\-rpath\s+/\-rpath\%/g; | |||
$link =~ s/\-rpath-link\s+/\-rpath-link\%/g; | |||
@flags = split(/[\s\,\n]/, $link); | |||
flags=`echo "$link" | tr "',\n" " "` | |||
# remove leading and trailing quotes from each flag. | |||
@flags = map {s/^['"]|['"]$//g; $_} @flags; | |||
foreach $flags (@flags) { | |||
if ( | |||
($flags =~ /^\-L/) | |||
&& ($flags !~ /^-LIST:/) | |||
&& ($flags !~ /^-LANG:/) | |||
) { | |||
$linker_L .= $flags . " "; | |||
} | |||
if ($flags =~ /^\-Y/) { | |||
next if ($hostos eq 'SunOS'); | |||
$linker_L .= "-Wl,". $flags . " "; | |||
} | |||
if ($flags =~ /^\--exclude-libs/) { | |||
$linker_L .= "-Wl,". $flags . " "; | |||
$flags=""; | |||
} | |||
if ($flags =~ /^\-rpath\%/) { | |||
$flags =~ s/\%/\,/g; | |||
$linker_L .= "-Wl,". $flags . " " ; | |||
} | |||
if ($flags =~ /^\-rpath-link\%/) { | |||
$flags =~ s/\%/\,/g; | |||
$linker_L .= "-Wl,". $flags . " " ; | |||
} | |||
if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) { | |||
$flags = "-lomp"; | |||
} | |||
if ( | |||
($flags =~ /^\-l/) | |||
&& ($flags !~ /ibrary/) | |||
&& ($flags !~ /gfortranbegin/) | |||
&& ($flags !~ /flangmain/) | |||
&& ($flags !~ /frtbegin/) | |||
&& ($flags !~ /pathfstart/) | |||
&& ($flags !~ /crt[0-9]/) | |||
&& ($flags !~ /gcc/) | |||
&& ($flags !~ /user32/) | |||
&& ($flags !~ /kernel32/) | |||
&& ($flags !~ /advapi32/) | |||
&& ($flags !~ /shell32/) | |||
&& ($flags !~ /omp/ || ($vendor !~ /PGI/ && $vendor !~ /FUJITSU/ && $flags =~ /omp/)) | |||
&& ($flags !~ /[0-9]+/ || ($vendor == FUJITSU && $flags =~ /^-lfj90/)) | |||
&& ($flags !~ /^\-l$/) | |||
) { | |||
$linker_l .= $flags . " "; | |||
} | |||
if ( $flags =~ /quickfit.o/ && $vendor == NAG) { | |||
$linker_l .= $flags . " "; | |||
} | |||
if ( $flags =~ /safefit.o/ && $vendor == NAG) { | |||
$linker_l .= $flags . " "; | |||
} | |||
if ( $flags =~ /thsafe.o/ && $vendor == NAG) { | |||
$linker_l .= $flags . " "; | |||
} | |||
$linker_a .= $flags . " " if $flags =~ /\.a$/; | |||
} | |||
} | |||
if ($vendor eq "FLANG"){ | |||
$linker_a .= "-lflang" | |||
} | |||
open(MAKEFILE, ">> $makefile") || die "Can't append $makefile"; | |||
open(CONFFILE, ">> $config" ) || die "Can't append $config"; | |||
print MAKEFILE "F_COMPILER=$vendor\n"; | |||
print MAKEFILE "FC=$compiler\n"; | |||
print MAKEFILE "BU=$bu\n" if $bu ne ""; | |||
print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1; | |||
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; | |||
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; | |||
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne ""; | |||
print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne ""; | |||
if (($linker_l ne "") || ($linker_a ne "")) { | |||
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; | |||
} | |||
#@flags = map {s/^['"]|['"]$//g; $_} @flags; | |||
for flag in $flags; do | |||
case "$flag" in -L*) | |||
case "$flag" in | |||
-LIST:*|-LANG:*) ;; | |||
*) linker_L="$linker_L $flag" ;; | |||
esac | |||
esac | |||
case "$flag" in -Y*) | |||
[ "$hostos" = "SunOS" ] && continue | |||
linker_L="$linker_L -Wl,$flag" | |||
;; | |||
esac | |||
case "$flag" in --exclude-libs*) | |||
linker_L="$linker_L -Wl,$flag" | |||
flag="" | |||
;; | |||
esac | |||
case "$flag" in -rpath%*) | |||
flag=`echo "$flag" | sed 's/\%/\,/g'` | |||
linker_L="$linker_L -Wl,$flag" | |||
esac | |||
case "$flag" in -rpath-link%*) | |||
flag=`echo "$flag" | sed 's/\%/\,/g'` | |||
linker_L="$linker_L -Wl,$flag" | |||
;; | |||
esac | |||
case "$flag" in *-lgomp*) | |||
case "$CC" in *clang*) | |||
flag="-lomp" | |||
;; | |||
esac | |||
esac | |||
case "$flag" in -l*) | |||
case "$flag" in | |||
*ibrary*|*gfortranbegin*|*flangmain*|*frtbegin*|*pathfstart*|\ | |||
*crt[0-9]*|*gcc*|*user32*|*kernel32*|*advapi32*|*shell32*|\ | |||
-l) ;; | |||
*omp*) | |||
case "$vendor" in | |||
*PGI*|*FUJITSU*) ;; | |||
*) linker_l="$linker_l $flag" ;; | |||
esac | |||
;; | |||
*[0-9]*) | |||
if [ "$vendor" = "FUJITSU" ]; then | |||
case "$flag" in | |||
-lfj90*) linker_l="$linker_l $flag" ;; | |||
*) ;; | |||
esac | |||
fi | |||
;; | |||
*) linker_l="$linker_l $flag" ;; | |||
esac | |||
esac | |||
case "$flag" in *quickfit.o*) | |||
[ "$vendor" = "NAG" ] && linker_l="$linker_l $flag" ;; | |||
esac | |||
case "$flag" in *safefit.o*) | |||
[ "$vendor" = "NAG" ] && linker_l="$linker_l $flag" ;; | |||
esac | |||
case "$flag" in *thsafe.o*) | |||
[ "$vendor" = "NAG" ] && linker_l="$linker_l $flag" ;; | |||
esac | |||
case "$flag" in *.a) linker_a="$linker_a $flag" ;; esac | |||
done | |||
fi | |||
if [ "$vendor" = "FLANG" ]; then | |||
linker_a="$linker_a -lflang" | |||
fi | |||
printf "F_COMPILER=%s\n" "$vendor" >> "$makefile" | |||
printf "FC=%s\n" "$compiler" >> "$makefile" | |||
[ -n "$bu" ] && printf 'BU=%s\n' "$bu" >> "$makefile" | |||
[ "$nofortran" -eq 1 ] && printf 'NOFORTRAN=1\n' >> "$makefile" | |||
[ -n "$bu" ] && printf '#define BUNDERSCORE\t%s\n' "$bu" >> "$config" | |||
[ -n "$bu" ] && printf '#define NEEDBUNDERSCORE\t1\n' >> "$config" | |||
[ -n "$need2bu" ] && printf "#define NEED2UNDERSCORES\t1\n" >> "$config" | |||
[ -n "$need2bu" ] && printf "#define NEED2UNDERSCORES=1\n" >> "$config" | |||
if [ -n "$linker_l" ] || [ -n "$linker_a" ]; then | |||
printf "FEXTRALIB=%s %s %s\n" "$linker_L" "$linker_l" "$linker_a" >> "$makefile" | |||
fi | |||
close(MAKEFILE); | |||
close(CONFFILE); |
@@ -0,0 +1,421 @@ | |||
#!/usr/bin/env perl | |||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | |||
# | |||
# 1. Not specified | |||
# 1.1 Automatically detect, then check compiler | |||
# 1.2 If no fortran compiler is detected, gfortran is default with NOFORTRAN definition | |||
# 2. Specified | |||
# 2.1 If path is correct, check compiler | |||
# 2.2 If path is not correct, but still valid compiler name, force setting | |||
# 2.2.2 Path is not correct, invalid compiler name, then gfortran is default with NOFORTRAN definition | |||
# | |||
$makefile = shift(@ARGV); | |||
$config = shift(@ARGV); | |||
$nofortran = 0; | |||
$compiler = join(" ", @ARGV); | |||
$compiler_bin = shift(@ARGV); | |||
# f77 is too ambiguous | |||
$compiler = "" if $compiler eq "f77"; | |||
@path = split(/:/, $ENV{"PATH"}); | |||
if ($compiler eq "") { | |||
@lists = ("gfortran", "g95", "frt", "fort", "openf90", "openf95", | |||
"sunf77", "sunf90", "sunf95", | |||
"xlf95", "xlf90", "xlf", | |||
"ppuf77", "ppuf95", "ppuf90", "ppuxlf", | |||
"pathf90", "pathf95", | |||
"pgf95", "pgf90", "pgf77", "pgfortran", "nvfortran", | |||
"flang", "egfortran", | |||
"ifort", "nagfor", "ifx"); | |||
OUTER: | |||
foreach $lists (@lists) { | |||
foreach $path (@path) { | |||
if (-x $path . "/" . $lists) { | |||
$compiler = $lists; | |||
$compiler_bin = $lists; | |||
last OUTER; | |||
} | |||
} | |||
} | |||
} | |||
if ($compiler eq "") { | |||
$nofortran = 1; | |||
$compiler = "gfortran"; | |||
$vendor = GFORTRAN; | |||
$bu = "_"; | |||
} else { | |||
$data = `which $compiler_bin > /dev/null 2> /dev/null`; | |||
$vendor = ""; | |||
if (!$?) { | |||
$data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.s && rm -f ftest.s`; | |||
if ($data eq "") { | |||
$data = `$compiler -O2 -S ftest.f > /dev/null 2>&1 && cat ftest.c && rm -f ftest.c`; | |||
} | |||
if ($data =~ /zhoge_/) { | |||
$bu = "_"; | |||
} | |||
if ($data =~ /Fujitsu/) { | |||
$vendor = FUJITSU; | |||
$openmp = "-Kopenmp"; | |||
} elsif ($data =~ /GNU/ || $data =~ /GCC/ ) { | |||
$data =~ s/\(+.*?\)+//g; | |||
$data =~ /(\d+)\.(\d+).(\d+)/; | |||
$major = $1; | |||
$minor = $2; | |||
if ($major >= 4) { | |||
$vendor = GFORTRAN; | |||
$openmp = "-fopenmp"; | |||
} else { | |||
if ($compiler =~ /flang/) { | |||
$vendor = FLANG; | |||
$openmp = "-fopenmp"; | |||
} elsif ($compiler =~ /ifx/) { | |||
$vendor = INTEL; | |||
$openmp = "-fopenmp"; | |||
} elsif ($compiler =~ /pgf/ || $compiler =~ /nvf/) { | |||
$vendor = PGI; | |||
$openmp = "-mp"; | |||
} else { | |||
$vendor = G77; | |||
$openmp = ""; | |||
} | |||
} | |||
} | |||
if ($data =~ /g95/) { | |||
$vendor = G95; | |||
$openmp = ""; | |||
} | |||
if ($data =~ /Intel/) { | |||
$vendor = INTEL; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($data =~ /Sun Fortran/) { | |||
$vendor = SUN; | |||
$openmp = "-xopenmp=parallel"; | |||
} | |||
if ($data =~ /PathScale/) { | |||
$vendor = PATHSCALE; | |||
$openmp = "-openmp"; | |||
} | |||
if ($data =~ /Open64/) { | |||
$vendor = OPEN64; | |||
$openmp = "-mp"; | |||
} | |||
if ($data =~ /PGF/ || $data =~ /NVF/) { | |||
$vendor = PGI; | |||
$openmp = "-mp"; | |||
} | |||
if ($data =~ /IBM XL/) { | |||
$vendor = IBM; | |||
$openmp = "-openmp"; | |||
} | |||
if ($data =~ /NAG/) { | |||
$vendor = NAG; | |||
$openmp = "-openmp"; | |||
} | |||
# for embedded underscore name, e.g. zho_ge, it may append 2 underscores. | |||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; | |||
if ($data eq "") { | |||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.c && rm -f ftest3.c`; | |||
} | |||
if ($data =~ / zho_ge__/) { | |||
$need2bu = 1; | |||
} | |||
if ($vendor =~ /G95/) { | |||
if ($ENV{NO_LAPACKE} != 1) { | |||
$need2bu = ""; | |||
} | |||
} | |||
} | |||
if ($vendor eq "") { | |||
if ($compiler =~ /g77/) { | |||
$vendor = G77; | |||
$bu = "_"; | |||
$openmp = ""; | |||
} | |||
if ($compiler =~ /g95/) { | |||
$vendor = G95; | |||
$bu = "_"; | |||
$openmp = ""; | |||
} | |||
if ($compiler =~ /gfortran/) { | |||
$vendor = GFORTRAN; | |||
$bu = "_"; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler =~ /ifort/ || $compiler =~ /ifx/) { | |||
$vendor = INTEL; | |||
$bu = "_"; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler =~ /pathf/) { | |||
$vendor = PATHSCALE; | |||
$bu = "_"; | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler =~ /pgf/ || $compiler =~ /nvf/) { | |||
$vendor = PGI; | |||
$bu = "_"; | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler =~ /ftn/) { | |||
$vendor = PGI; | |||
$bu = "_"; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /frt/) { | |||
$vendor = FUJITSU; | |||
$bu = "_"; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /sunf77|sunf90|sunf95/) { | |||
$vendor = SUN; | |||
$bu = "_"; | |||
$openmp = "-xopenmp=parallel"; | |||
} | |||
if ($compiler =~ /ppuf/) { | |||
$vendor = IBM; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /xlf/) { | |||
$vendor = IBM; | |||
$openmp = "-openmp"; | |||
} | |||
if ($compiler =~ /open64/) { | |||
$vendor = OPEN64; | |||
$openmp = "-mp"; | |||
} | |||
if ($compiler =~ /flang/) { | |||
$vendor = FLANG; | |||
$bu = "_"; | |||
$openmp = "-fopenmp"; | |||
} | |||
if ($compiler =~ /nagfor/) { | |||
$vendor = NAG; | |||
$bu = "_"; | |||
$openmp = "-openmp"; | |||
} | |||
if ($vendor eq "") { | |||
$nofortran = 1; | |||
$compiler = "gfortran"; | |||
$vendor = GFORTRAN; | |||
$bu = "_"; | |||
$openmp = ""; | |||
} | |||
} | |||
} | |||
$data = `which $compiler_bin > /dev/null 2> /dev/null`; | |||
if (!$?) { | |||
$binary = $ENV{"BINARY"}; | |||
$openmp = "" if $ENV{USE_OPENMP} != 1; | |||
if ($binary == 32) { | |||
$link = `$compiler $openmp -m32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
if ($?) { | |||
$link = `$compiler $openmp -q32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
# for AIX | |||
if ($?) { | |||
$link = `$compiler $openmp -maix32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
#For gfortran MIPS | |||
if ($?) { | |||
$mips_data = `$compiler_bin -E -dM - < /dev/null`; | |||
if ($mips_data =~ /_MIPS_ISA_MIPS64/) { | |||
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} else { | |||
$link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
} | |||
$binary = "" if ($?); | |||
} | |||
if ($binary == 64) { | |||
$link = `$compiler $openmp -m64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
if ($?) { | |||
$link = `$compiler $openmp -q64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
# for AIX | |||
if ($?) { | |||
$link = `$compiler $openmp -maix64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
#For gfortran MIPS | |||
if ($?) { | |||
$link = `$compiler $openmp -mabi=64 -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
#For nagfor | |||
if ($?) { | |||
$link = `$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
$binary = "" if ($?); | |||
} | |||
if ($binary eq "") { | |||
$link = `$compiler $openmp -v ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
} | |||
if ( $vendor eq "NAG") { | |||
$link = `$compiler $openmp -dryrun ftest2.f 2>&1 && rm -f a.out a.exe`; | |||
} | |||
$linker_L = ""; | |||
$linker_l = ""; | |||
$linker_a = ""; | |||
if ($link ne "") { | |||
$link =~ s/\-Y\sP\,/\-Y/g; | |||
$link =~ s/\-R\s*/\-rpath\%/g; | |||
$link =~ s/\-rpath\s+/\-rpath\%/g; | |||
$link =~ s/\-rpath-link\s+/\-rpath-link\%/g; | |||
@flags = split(/[\s\,\n]/, $link); | |||
# remove leading and trailing quotes from each flag. | |||
@flags = map {s/^['"]|['"]$//g; $_} @flags; | |||
foreach $flags (@flags) { | |||
if ( | |||
($flags =~ /^\-L/) | |||
&& ($flags !~ /^-LIST:/) | |||
&& ($flags !~ /^-LANG:/) | |||
) { | |||
$linker_L .= $flags . " "; | |||
} | |||
if ($flags =~ /^\-Y/) { | |||
next if ($hostos eq 'SunOS'); | |||
$linker_L .= "-Wl,". $flags . " "; | |||
} | |||
if ($flags =~ /^\--exclude-libs/) { | |||
$linker_L .= "-Wl,". $flags . " "; | |||
$flags=""; | |||
} | |||
if ($flags =~ /^\-rpath\%/) { | |||
$flags =~ s/\%/\,/g; | |||
$linker_L .= "-Wl,". $flags . " " ; | |||
} | |||
if ($flags =~ /^\-rpath-link\%/) { | |||
$flags =~ s/\%/\,/g; | |||
$linker_L .= "-Wl,". $flags . " " ; | |||
} | |||
if ($flags =~ /-lgomp/ && $ENV{"CC"} =~ /clang/) { | |||
$flags = "-lomp"; | |||
} | |||
if ( | |||
($flags =~ /^\-l/) | |||
&& ($flags !~ /ibrary/) | |||
&& ($flags !~ /gfortranbegin/) | |||
&& ($flags !~ /flangmain/) | |||
&& ($flags !~ /frtbegin/) | |||
&& ($flags !~ /pathfstart/) | |||
&& ($flags !~ /crt[0-9]/) | |||
&& ($flags !~ /gcc/) | |||
&& ($flags !~ /user32/) | |||
&& ($flags !~ /kernel32/) | |||
&& ($flags !~ /advapi32/) | |||
&& ($flags !~ /shell32/) | |||
&& ($flags !~ /omp/ || ($vendor !~ /PGI/ && $vendor !~ /FUJITSU/ && $flags =~ /omp/)) | |||
&& ($flags !~ /[0-9]+/ || ($vendor == FUJITSU && $flags =~ /^-lfj90/)) | |||
&& ($flags !~ /^\-l$/) | |||
) { | |||
$linker_l .= $flags . " "; | |||
} | |||
if ( $flags =~ /quickfit.o/ && $vendor == NAG) { | |||
$linker_l .= $flags . " "; | |||
} | |||
if ( $flags =~ /safefit.o/ && $vendor == NAG) { | |||
$linker_l .= $flags . " "; | |||
} | |||
if ( $flags =~ /thsafe.o/ && $vendor == NAG) { | |||
$linker_l .= $flags . " "; | |||
} | |||
$linker_a .= $flags . " " if $flags =~ /\.a$/; | |||
} | |||
} | |||
if ($vendor eq "FLANG"){ | |||
$linker_a .= "-lflang" | |||
} | |||
open(MAKEFILE, ">> $makefile") || die "Can't append $makefile"; | |||
open(CONFFILE, ">> $config" ) || die "Can't append $config"; | |||
print MAKEFILE "F_COMPILER=$vendor\n"; | |||
print MAKEFILE "FC=$compiler\n"; | |||
print MAKEFILE "BU=$bu\n" if $bu ne ""; | |||
print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1; | |||
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; | |||
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; | |||
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne ""; | |||
print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne ""; | |||
if (($linker_l ne "") || ($linker_a ne "")) { | |||
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; | |||
} | |||
close(MAKEFILE); | |||
close(CONFFILE); |
@@ -94,14 +94,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include <sys/sysinfo.h> | |||
#endif | |||
#if defined(__x86_64__) || defined(_M_X64) | |||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6)) | |||
#else | |||
#ifndef NO_AVX512 | |||
#define NO_AVX512 | |||
#endif | |||
#endif | |||
#endif | |||
/* #define FORCE_P2 */ | |||
/* #define FORCE_KATMAI */ | |||
/* #define FORCE_COPPERMINE */ | |||
@@ -1240,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "cortexa53" | |||
#define CORENAME "CORTEXA53" | |||
#else | |||
#endif | |||
#ifdef FORCE_CORTEXA57 | |||
@@ -1256,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "cortexa57" | |||
#define CORENAME "CORTEXA57" | |||
#else | |||
#endif | |||
#ifdef FORCE_CORTEXA72 | |||
@@ -1272,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "cortexa72" | |||
#define CORENAME "CORTEXA72" | |||
#else | |||
#endif | |||
#ifdef FORCE_CORTEXA73 | |||
@@ -1288,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "cortexa73" | |||
#define CORENAME "CORTEXA73" | |||
#else | |||
#endif | |||
#ifdef FORCE_CORTEXX1 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM64" | |||
#define SUBARCHITECTURE "CORTEXX1" | |||
#define SUBDIRNAME "arm64" | |||
#define ARCHCONFIG "-DCORTEXX1 " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "cortexx1" | |||
#define CORENAME "CORTEXX1" | |||
#endif | |||
#ifdef FORCE_CORTEXX2 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM64" | |||
#define SUBARCHITECTURE "CORTEXX2" | |||
#define SUBDIRNAME "arm64" | |||
#define ARCHCONFIG "-DCORTEXX2 " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
#define LIBNAME "cortexx2" | |||
#define CORENAME "CORTEXX2" | |||
#endif | |||
#ifdef FORCE_CORTEXA510 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM64" | |||
#define SUBARCHITECTURE "CORTEXA510" | |||
#define SUBDIRNAME "arm64" | |||
#define ARCHCONFIG "-DCORTEXA510 " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
#define LIBNAME "cortexa510" | |||
#define CORENAME "CORTEXA510" | |||
#endif | |||
#ifdef FORCE_CORTEXA710 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM64" | |||
#define SUBARCHITECTURE "CORTEXA710" | |||
#define SUBDIRNAME "arm64" | |||
#define ARCHCONFIG "-DCORTEXA710 " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | |||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9" | |||
#define LIBNAME "cortexa710" | |||
#define CORENAME "CORTEXA710" | |||
#endif | |||
#ifdef FORCE_NEOVERSEN1 | |||
@@ -1305,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-march=armv8.2-a -mtune=neoverse-n1" | |||
#define LIBNAME "neoversen1" | |||
#define CORENAME "NEOVERSEN1" | |||
#else | |||
#endif | |||
#ifdef FORCE_NEOVERSEV1 | |||
@@ -1322,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-march=armv8.4-a -mtune=neoverse-v1" | |||
#define LIBNAME "neoversev1" | |||
#define CORENAME "NEOVERSEV1" | |||
#else | |||
#endif | |||
@@ -1340,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-march=armv8.5-a -mtune=neoverse-n2" | |||
#define LIBNAME "neoversen2" | |||
#define CORENAME "NEOVERSEN2" | |||
#else | |||
#endif | |||
#ifdef FORCE_CORTEXA55 | |||
@@ -1356,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "cortexa55" | |||
#define CORENAME "CORTEXA55" | |||
#else | |||
#endif | |||
#ifdef FORCE_FALKOR | |||
@@ -1372,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "falkor" | |||
#define CORENAME "FALKOR" | |||
#else | |||
#endif | |||
#ifdef FORCE_THUNDERX | |||
@@ -1387,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "thunderx" | |||
#define CORENAME "THUNDERX" | |||
#else | |||
#endif | |||
#ifdef FORCE_THUNDERX2T99 | |||
@@ -1405,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "thunderx2t99" | |||
#define CORENAME "THUNDERX2T99" | |||
#else | |||
#endif | |||
#ifdef FORCE_TSV110 | |||
@@ -1421,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "tsv110" | |||
#define CORENAME "TSV110" | |||
#else | |||
#endif | |||
#ifdef FORCE_EMAG8180 | |||
@@ -1456,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "thunderx3t110" | |||
#define CORENAME "THUNDERX3T110" | |||
#else | |||
#endif | |||
#ifdef FORCE_VORTEX | |||
@@ -1488,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" | |||
#define LIBNAME "a64fx" | |||
#define CORENAME "A64FX" | |||
#else | |||
#endif | |||
#ifdef FORCE_FT2000 | |||
#define ARMV8 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM64" | |||
#define SUBARCHITECTURE "FT2000" | |||
#define SUBDIRNAME "arm64" | |||
#define ARCHCONFIG "-DFT2000 " \ | |||
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||
"-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
#define LIBNAME "ft2000" | |||
#define CORENAME "FT2000" | |||
#endif | |||
#ifdef FORCE_ZARCH_GENERIC | |||
@@ -1524,6 +1574,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef FORCE_C910V | |||
#define FORCE | |||
#define ARCHITECTURE "RISCV64" | |||
#ifdef NO_RV64GV | |||
#define SUBARCHITECTURE "RISCV64_GENERIC" | |||
#define SUBDIRNAME "riscv64" | |||
#define ARCHCONFIG "-DRISCV64_GENERIC " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
#define LIBNAME "riscv64_generic" | |||
#define CORENAME "RISCV64_GENERIC" | |||
#else | |||
#define SUBARCHITECTURE "C910V" | |||
#define SUBDIRNAME "riscv64" | |||
#define ARCHCONFIG "-DC910V " \ | |||
@@ -1532,6 +1592,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
#define LIBNAME "c910v" | |||
#define CORENAME "C910V" | |||
#endif | |||
#else | |||
#endif | |||
@@ -1632,17 +1693,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
static int get_num_cores(void) { | |||
int count; | |||
#ifdef OS_WINDOWS | |||
SYSTEM_INFO sysinfo; | |||
#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) | |||
int m[2], count; | |||
int m[2]; | |||
size_t len; | |||
#endif | |||
#if defined(linux) || defined(__sun__) | |||
//returns the number of processors which are currently online | |||
return sysconf(_SC_NPROCESSORS_CONF); | |||
count = sysconf(_SC_NPROCESSORS_CONF); | |||
if (count <= 0) count = 2; | |||
return count; | |||
#elif defined(OS_WINDOWS) | |||
GetSystemInfo(&sysinfo); | |||
@@ -1653,13 +1717,15 @@ static int get_num_cores(void) { | |||
m[1] = HW_NCPU; | |||
len = sizeof(int); | |||
sysctl(m, 2, &count, &len, NULL, 0); | |||
if (count <= 0) count = 2; | |||
return count; | |||
#elif defined(AIX) | |||
//returns the number of processors which are currently online | |||
return sysconf(_SC_NPROCESSORS_ONLN); | |||
count = sysconf(_SC_NPROCESSORS_ONLN); | |||
if (count <= 0) count = 2; | |||
#else | |||
return 2; | |||
#endif | |||
@@ -1681,7 +1747,7 @@ int main(int argc, char *argv[]){ | |||
#ifdef FORCE | |||
printf("CORE=%s\n", CORENAME); | |||
#else | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) | |||
printf("CORE=%s\n", get_corename()); | |||
#endif | |||
#endif | |||
@@ -1829,7 +1895,7 @@ printf("ELF_VERSION=2\n"); | |||
#ifdef FORCE | |||
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | |||
#else | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) | |||
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) || defined(ZARCH) || defined(sparc) || defined(__loongarch__) || defined(__riscv) | |||
printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | |||
#endif | |||
#endif | |||
@@ -531,8 +531,11 @@ $(BLASOBJS) $(BLASOBJS_P) : functable.h | |||
$(BLASOBJS) $(BLASOBJS_P) : override CFLAGS += -DPROFILE_FUNC_NAME=interface_$(*F) | |||
functable.h : Makefile | |||
ifndef USE_PERL | |||
./create $(FUNCALLFILES) > functable.h | |||
else | |||
./create.pl $(FUNCALLFILES) > functable.h | |||
endif | |||
endif | |||
clean :: | |||
@@ -1,22 +1,22 @@ | |||
#!/usr/bin/env perl | |||
#!/bin/sh | |||
$count = 0; | |||
count=0 | |||
foreach (@ARGV) { | |||
print "#define\tinterface_", $_, "\t\t", $count, "\n"; | |||
$count ++; | |||
} | |||
for arg in "$@"; do | |||
printf "#define\tinterface_%s\t\t%d\n" "$arg" "$count" | |||
count=`expr $count + 1` | |||
done | |||
print "#ifdef USE_FUNCTABLE\n"; | |||
printf "#ifdef USE_FUNCTABLE\n" | |||
print "#define MAX_PROF_TABLE ", $count, "\n"; | |||
printf "#define MAX_PROF_TABLE %d\n" "$count" | |||
print "static char *func_table[] = {\n"; | |||
printf "static char *func_table[] = {\n" | |||
foreach (@ARGV) { | |||
print "\"", $_, "\",\n"; | |||
} | |||
for arg in "$@"; do | |||
printf "\"%s\",\n" "$arg" | |||
done | |||
print "};\n"; | |||
print "#endif\n"; | |||
printf "};\n" | |||
printf "#endif\n" | |||
@@ -0,0 +1,22 @@ | |||
#!/usr/bin/env perl | |||
$count = 0; | |||
foreach (@ARGV) { | |||
print "#define\tinterface_", $_, "\t\t", $count, "\n"; | |||
$count ++; | |||
} | |||
print "#ifdef USE_FUNCTABLE\n"; | |||
print "#define MAX_PROF_TABLE ", $count, "\n"; | |||
print "static char *func_table[] = {\n"; | |||
foreach (@ARGV) { | |||
print "\"", $_, "\",\n"; | |||
} | |||
print "};\n"; | |||
print "#endif\n"; | |||
@@ -854,49 +854,49 @@ endif () | |||
# Makefile.LA | |||
if(NOT NO_LAPACK) | |||
foreach (float_type ${FLOAT_TYPES}) | |||
string(SUBSTRING ${float_type} 0 1 float_char) | |||
if (${float_type} STREQUAL "BFLOAT16") | |||
set (float_char "SB") | |||
endif () | |||
if (NOT DEFINED ${float_char}NEG_TCOPY) | |||
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") | |||
set(${float_char}NEG_TCOPY ../generic/zneg_tcopy.c) | |||
set(${float_char}NEG_TCOPY ../generic/zneg_tcopy_${${float_char}GEMM_UNROLL_M}.c) | |||
else () | |||
set(${float_char}NEG_TCOPY ../generic/neg_tcopy.c) | |||
set(${float_char}NEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c) | |||
endif () | |||
endif () | |||
if (NOT DEFINED ${float_char}LASWP_NCOPY) | |||
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X") | |||
set(${float_char}LASWP_NCOPY ../generic/zlaswp_ncopy.c) | |||
set(${float_char}LASWP_NCOPY ../generic/zlaswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) | |||
else () | |||
set(${float_char}LASWP_NCOPY ../generic/laswp_ncopy.c) | |||
set(${float_char}LASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) | |||
endif () | |||
endif () | |||
string(SUBSTRING ${float_type} 0 1 float_char) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}" "" "neg_tcopy" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}" "" "laswp_ncopy" false "" "" false ${float_type}) | |||
endforeach() | |||
if (BUILD_COMPLEX AND NOT BUILD_SINGLE) | |||
if (NOT DEFINED SNEG_TCOPY) | |||
set(SNEG_TCOPY ../generic/neg_tcopy.c) | |||
set(SNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c) | |||
endif () | |||
if (NOT DEFINED SLASWP_NCOPY) | |||
set(SLASWP_NCOPY ../generic/laswp_ncopy.c) | |||
set(SLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}_${SGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}_${SGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}" "" "neg_tcopy" false "" "" false "SINGLE") | |||
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "SINGLE") | |||
endif() | |||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | |||
if (NOT DEFINED DNEG_TCOPY) | |||
set(DNEG_TCOPY ../generic/neg_tcopy.c) | |||
set(DNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c) | |||
endif () | |||
if (NOT DEFINED DLASWP_NCOPY) | |||
set(DLASWP_NCOPY ../generic/laswp_ncopy.c) | |||
set(DLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c) | |||
endif () | |||
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" "" false "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" "" false "DOUBLE") | |||
endif() | |||
endif() | |||
@@ -0,0 +1,216 @@ | |||
SAMINKERNEL = ../arm/amin.c | |||
DAMINKERNEL = ../arm/amin.c | |||
CAMINKERNEL = ../arm/zamin.c | |||
ZAMINKERNEL = ../arm/zamin.c | |||
SMAXKERNEL = ../arm/max.c | |||
DMAXKERNEL = ../arm/max.c | |||
SMINKERNEL = ../arm/min.c | |||
DMINKERNEL = ../arm/min.c | |||
ISAMINKERNEL = ../arm/iamin.c | |||
IDAMINKERNEL = ../arm/iamin.c | |||
ICAMINKERNEL = ../arm/izamin.c | |||
IZAMINKERNEL = ../arm/izamin.c | |||
ISMAXKERNEL = ../arm/imax.c | |||
IDMAXKERNEL = ../arm/imax.c | |||
ISMINKERNEL = ../arm/imin.c | |||
IDMINKERNEL = ../arm/imin.c | |||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
SAMAXKERNEL = amax.S | |||
DAMAXKERNEL = amax.S | |||
CAMAXKERNEL = zamax.S | |||
ZAMAXKERNEL = zamax.S | |||
SAXPYKERNEL = axpy.S | |||
DAXPYKERNEL = axpy.S | |||
CAXPYKERNEL = zaxpy.S | |||
ZAXPYKERNEL = zaxpy.S | |||
SROTKERNEL = rot.S | |||
DROTKERNEL = rot.S | |||
CROTKERNEL = zrot.S | |||
ZROTKERNEL = zrot.S | |||
SSCALKERNEL = scal.S | |||
DSCALKERNEL = scal.S | |||
CSCALKERNEL = zscal.S | |||
ZSCALKERNEL = zscal.S | |||
SGEMVNKERNEL = gemv_n.S | |||
DGEMVNKERNEL = gemv_n.S | |||
CGEMVNKERNEL = zgemv_n.S | |||
ZGEMVNKERNEL = zgemv_n.S | |||
SGEMVTKERNEL = gemv_t.S | |||
DGEMVTKERNEL = gemv_t.S | |||
CGEMVTKERNEL = zgemv_t.S | |||
ZGEMVTKERNEL = zgemv_t.S | |||
SASUMKERNEL = asum.S | |||
DASUMKERNEL = asum.S | |||
CASUMKERNEL = casum.S | |||
ZASUMKERNEL = zasum.S | |||
SCOPYKERNEL = copy.S | |||
DCOPYKERNEL = copy.S | |||
CCOPYKERNEL = copy.S | |||
ZCOPYKERNEL = copy.S | |||
SSWAPKERNEL = swap.S | |||
DSWAPKERNEL = swap.S | |||
CSWAPKERNEL = swap.S | |||
ZSWAPKERNEL = swap.S | |||
ISAMAXKERNEL = iamax.S | |||
IDAMAXKERNEL = iamax.S | |||
ICAMAXKERNEL = izamax.S | |||
IZAMAXKERNEL = izamax.S | |||
SNRM2KERNEL = nrm2.S | |||
DNRM2KERNEL = nrm2.S | |||
CNRM2KERNEL = znrm2.S | |||
ZNRM2KERNEL = znrm2.S | |||
DDOTKERNEL = dot.S | |||
ifneq ($(C_COMPILER), PGI) | |||
SDOTKERNEL = ../generic/dot.c | |||
else | |||
SDOTKERNEL = dot.S | |||
endif | |||
ifneq ($(C_COMPILER), PGI) | |||
CDOTKERNEL = zdot.S | |||
ZDOTKERNEL = zdot.S | |||
else | |||
CDOTKERNEL = ../arm/zdot.c | |||
ZDOTKERNEL = ../arm/zdot.c | |||
endif | |||
DSDOTKERNEL = dot.S | |||
DGEMM_BETA = dgemm_beta.S | |||
SGEMM_BETA = sgemm_beta.S | |||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
SSYMMUCOPY_M = symm_ucopy_sve.c | |||
SSYMMLCOPY_M = symm_lcopy_sve.c | |||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
DSYMMUCOPY_M = symm_ucopy_sve.c | |||
DSYMMLCOPY_M = symm_lcopy_sve.c | |||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c |
@@ -0,0 +1,216 @@ | |||
SAMINKERNEL = ../arm/amin.c | |||
DAMINKERNEL = ../arm/amin.c | |||
CAMINKERNEL = ../arm/zamin.c | |||
ZAMINKERNEL = ../arm/zamin.c | |||
SMAXKERNEL = ../arm/max.c | |||
DMAXKERNEL = ../arm/max.c | |||
SMINKERNEL = ../arm/min.c | |||
DMINKERNEL = ../arm/min.c | |||
ISAMINKERNEL = ../arm/iamin.c | |||
IDAMINKERNEL = ../arm/iamin.c | |||
ICAMINKERNEL = ../arm/izamin.c | |||
IZAMINKERNEL = ../arm/izamin.c | |||
ISMAXKERNEL = ../arm/imax.c | |||
IDMAXKERNEL = ../arm/imax.c | |||
ISMINKERNEL = ../arm/imin.c | |||
IDMINKERNEL = ../arm/imin.c | |||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
SAMAXKERNEL = amax.S | |||
DAMAXKERNEL = amax.S | |||
CAMAXKERNEL = zamax.S | |||
ZAMAXKERNEL = zamax.S | |||
SAXPYKERNEL = axpy.S | |||
DAXPYKERNEL = axpy.S | |||
CAXPYKERNEL = zaxpy.S | |||
ZAXPYKERNEL = zaxpy.S | |||
SROTKERNEL = rot.S | |||
DROTKERNEL = rot.S | |||
CROTKERNEL = zrot.S | |||
ZROTKERNEL = zrot.S | |||
SSCALKERNEL = scal.S | |||
DSCALKERNEL = scal.S | |||
CSCALKERNEL = zscal.S | |||
ZSCALKERNEL = zscal.S | |||
SGEMVNKERNEL = gemv_n.S | |||
DGEMVNKERNEL = gemv_n.S | |||
CGEMVNKERNEL = zgemv_n.S | |||
ZGEMVNKERNEL = zgemv_n.S | |||
SGEMVTKERNEL = gemv_t.S | |||
DGEMVTKERNEL = gemv_t.S | |||
CGEMVTKERNEL = zgemv_t.S | |||
ZGEMVTKERNEL = zgemv_t.S | |||
SASUMKERNEL = asum.S | |||
DASUMKERNEL = asum.S | |||
CASUMKERNEL = casum.S | |||
ZASUMKERNEL = zasum.S | |||
SCOPYKERNEL = copy.S | |||
DCOPYKERNEL = copy.S | |||
CCOPYKERNEL = copy.S | |||
ZCOPYKERNEL = copy.S | |||
SSWAPKERNEL = swap.S | |||
DSWAPKERNEL = swap.S | |||
CSWAPKERNEL = swap.S | |||
ZSWAPKERNEL = swap.S | |||
ISAMAXKERNEL = iamax.S | |||
IDAMAXKERNEL = iamax.S | |||
ICAMAXKERNEL = izamax.S | |||
IZAMAXKERNEL = izamax.S | |||
SNRM2KERNEL = nrm2.S | |||
DNRM2KERNEL = nrm2.S | |||
CNRM2KERNEL = znrm2.S | |||
ZNRM2KERNEL = znrm2.S | |||
DDOTKERNEL = dot.S | |||
ifneq ($(C_COMPILER), PGI) | |||
SDOTKERNEL = ../generic/dot.c | |||
else | |||
SDOTKERNEL = dot.S | |||
endif | |||
ifneq ($(C_COMPILER), PGI) | |||
CDOTKERNEL = zdot.S | |||
ZDOTKERNEL = zdot.S | |||
else | |||
CDOTKERNEL = ../arm/zdot.c | |||
ZDOTKERNEL = ../arm/zdot.c | |||
endif | |||
DSDOTKERNEL = dot.S | |||
DGEMM_BETA = dgemm_beta.S | |||
SGEMM_BETA = sgemm_beta.S | |||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
SSYMMUCOPY_M = symm_ucopy_sve.c | |||
SSYMMLCOPY_M = symm_lcopy_sve.c | |||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
DSYMMUCOPY_M = symm_ucopy_sve.c | |||
DSYMMLCOPY_M = symm_lcopy_sve.c | |||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c |
@@ -0,0 +1 @@ | |||
include $(KERNELDIR)/KERNEL.CORTEXA57 |
@@ -0,0 +1,216 @@ | |||
SAMINKERNEL = ../arm/amin.c | |||
DAMINKERNEL = ../arm/amin.c | |||
CAMINKERNEL = ../arm/zamin.c | |||
ZAMINKERNEL = ../arm/zamin.c | |||
SMAXKERNEL = ../arm/max.c | |||
DMAXKERNEL = ../arm/max.c | |||
SMINKERNEL = ../arm/min.c | |||
DMINKERNEL = ../arm/min.c | |||
ISAMINKERNEL = ../arm/iamin.c | |||
IDAMINKERNEL = ../arm/iamin.c | |||
ICAMINKERNEL = ../arm/izamin.c | |||
IZAMINKERNEL = ../arm/izamin.c | |||
ISMAXKERNEL = ../arm/imax.c | |||
IDMAXKERNEL = ../arm/imax.c | |||
ISMINKERNEL = ../arm/imin.c | |||
IDMINKERNEL = ../arm/imin.c | |||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
TRSMCOPYLN_M = trsm_lncopy_sve.c | |||
TRSMCOPYLT_M = trsm_ltcopy_sve.c | |||
TRSMCOPYUN_M = trsm_uncopy_sve.c | |||
TRSMCOPYUT_M = trsm_utcopy_sve.c | |||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c | |||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c | |||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c | |||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c | |||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c | |||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c | |||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c | |||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c | |||
SAMAXKERNEL = amax.S | |||
DAMAXKERNEL = amax.S | |||
CAMAXKERNEL = zamax.S | |||
ZAMAXKERNEL = zamax.S | |||
SAXPYKERNEL = axpy.S | |||
DAXPYKERNEL = axpy.S | |||
CAXPYKERNEL = zaxpy.S | |||
ZAXPYKERNEL = zaxpy.S | |||
SROTKERNEL = rot.S | |||
DROTKERNEL = rot.S | |||
CROTKERNEL = zrot.S | |||
ZROTKERNEL = zrot.S | |||
SSCALKERNEL = scal.S | |||
DSCALKERNEL = scal.S | |||
CSCALKERNEL = zscal.S | |||
ZSCALKERNEL = zscal.S | |||
SGEMVNKERNEL = gemv_n.S | |||
DGEMVNKERNEL = gemv_n.S | |||
CGEMVNKERNEL = zgemv_n.S | |||
ZGEMVNKERNEL = zgemv_n.S | |||
SGEMVTKERNEL = gemv_t.S | |||
DGEMVTKERNEL = gemv_t.S | |||
CGEMVTKERNEL = zgemv_t.S | |||
ZGEMVTKERNEL = zgemv_t.S | |||
SASUMKERNEL = asum.S | |||
DASUMKERNEL = asum.S | |||
CASUMKERNEL = casum.S | |||
ZASUMKERNEL = zasum.S | |||
SCOPYKERNEL = copy.S | |||
DCOPYKERNEL = copy.S | |||
CCOPYKERNEL = copy.S | |||
ZCOPYKERNEL = copy.S | |||
SSWAPKERNEL = swap.S | |||
DSWAPKERNEL = swap.S | |||
CSWAPKERNEL = swap.S | |||
ZSWAPKERNEL = swap.S | |||
ISAMAXKERNEL = iamax.S | |||
IDAMAXKERNEL = iamax.S | |||
ICAMAXKERNEL = izamax.S | |||
IZAMAXKERNEL = izamax.S | |||
SNRM2KERNEL = nrm2.S | |||
DNRM2KERNEL = nrm2.S | |||
CNRM2KERNEL = znrm2.S | |||
ZNRM2KERNEL = znrm2.S | |||
DDOTKERNEL = dot.S | |||
ifneq ($(C_COMPILER), PGI) | |||
SDOTKERNEL = ../generic/dot.c | |||
else | |||
SDOTKERNEL = dot.S | |||
endif | |||
ifneq ($(C_COMPILER), PGI) | |||
CDOTKERNEL = zdot.S | |||
ZDOTKERNEL = zdot.S | |||
else | |||
CDOTKERNEL = ../arm/zdot.c | |||
ZDOTKERNEL = ../arm/zdot.c | |||
endif | |||
DSDOTKERNEL = dot.S | |||
DGEMM_BETA = dgemm_beta.S | |||
SGEMM_BETA = sgemm_beta.S | |||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S | |||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S | |||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c | |||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c | |||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
SSYMMUCOPY_M = symm_ucopy_sve.c | |||
SSYMMLCOPY_M = symm_lcopy_sve.c | |||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S | |||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S | |||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c | |||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c | |||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c | |||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c | |||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c | |||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c | |||
DSYMMUCOPY_M = symm_ucopy_sve.c | |||
DSYMMLCOPY_M = symm_lcopy_sve.c | |||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c | |||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
CSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
CSYMMLCOPY_M = zsymm_lcopy_sve.c | |||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S | |||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c | |||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c | |||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c | |||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c | |||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c | |||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c | |||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c | |||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c | |||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c |
@@ -0,0 +1,3 @@ | |||
include $(KERNELDIR)/KERNEL.CORTEXA57 | |||
@@ -1,152 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2020, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#define HAVE_KERNEL 1 | |||
static void copy_kernel (BLASLONG n, FLOAT *x, FLOAT *y) | |||
{ | |||
__asm__ | |||
( | |||
"lxvp 32, 0(%2) \n\t" | |||
"lxvp 34, 32(%2) \n\t" | |||
"lxvp 36, 64(%2) \n\t" | |||
"lxvp 38, 96(%2) \n\t" | |||
"lxvp 40, 128(%2) \n\t" | |||
"lxvp 42, 160(%2) \n\t" | |||
"lxvp 44, 192(%2) \n\t" | |||
"lxvp 46, 224(%2) \n\t" | |||
"addi %2, %2, 256 \n\t" | |||
"addic. %1, %1, -32 \n\t" | |||
"ble two%= \n\t" | |||
".align 5 \n" | |||
"one%=: \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 32, 0(%3) \n\t" | |||
"stxv 33, 16(%3) \n\t" | |||
"stxv 34, 32(%3) \n\t" | |||
"stxv 35, 48(%3) \n\t" | |||
"stxv 36, 64(%3) \n\t" | |||
"stxv 37, 80(%3) \n\t" | |||
"stxv 38, 96(%3) \n\t" | |||
"stxv 39, 112(%3) \n\t" | |||
#else | |||
"stxv 33, 0(%3) \n\t" | |||
"stxv 32, 16(%3) \n\t" | |||
"stxv 35, 32(%3) \n\t" | |||
"stxv 34, 48(%3) \n\t" | |||
"stxv 37, 64(%3) \n\t" | |||
"stxv 36, 80(%3) \n\t" | |||
"stxv 39, 96(%3) \n\t" | |||
"stxv 38, 112(%3) \n\t" | |||
#endif | |||
"lxvp 32, 0(%2) \n\t" | |||
"lxvp 34, 32(%2) \n\t" | |||
"lxvp 36, 64(%2) \n\t" | |||
"lxvp 38, 96(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 40, 128(%3) \n\t" | |||
"stxv 41, 144(%3) \n\t" | |||
"stxv 42, 160(%3) \n\t" | |||
"stxv 43, 176(%3) \n\t" | |||
"stxv 44, 192(%3) \n\t" | |||
"stxv 45, 208(%3) \n\t" | |||
"stxv 46, 224(%3) \n\t" | |||
"stxv 47, 240(%3) \n\t" | |||
#else | |||
"stxv 41, 128(%3) \n\t" | |||
"stxv 40, 144(%3) \n\t" | |||
"stxv 43, 160(%3) \n\t" | |||
"stxv 42, 176(%3) \n\t" | |||
"stxv 45, 192(%3) \n\t" | |||
"stxv 44, 208(%3) \n\t" | |||
"stxv 47, 224(%3) \n\t" | |||
"stxv 46, 240(%3) \n\t" | |||
#endif | |||
"lxvp 40, 128(%2) \n\t" | |||
"lxvp 42, 160(%2) \n\t" | |||
"lxvp 44, 192(%2) \n\t" | |||
"lxvp 46, 224(%2) \n\t" | |||
"addi %3, %3, 256 \n\t" | |||
"addi %2, %2, 256 \n\t" | |||
"addic. %1, %1, -32 \n\t" | |||
"bgt one%= \n" | |||
"two%=: \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 32, 0(%3) \n\t" | |||
"stxv 33, 16(%3) \n\t" | |||
"stxv 34, 32(%3) \n\t" | |||
"stxv 35, 48(%3) \n\t" | |||
"stxv 36, 64(%3) \n\t" | |||
"stxv 37, 80(%3) \n\t" | |||
"stxv 38, 96(%3) \n\t" | |||
"stxv 39, 112(%3) \n\t" | |||
"stxv 40, 128(%3) \n\t" | |||
"stxv 41, 144(%3) \n\t" | |||
"stxv 42, 160(%3) \n\t" | |||
"stxv 43, 176(%3) \n\t" | |||
"stxv 44, 192(%3) \n\t" | |||
"stxv 45, 208(%3) \n\t" | |||
"stxv 46, 224(%3) \n\t" | |||
"stxv 47, 240(%3) \n\t" | |||
#else | |||
"stxv 33, 0(%3) \n\t" | |||
"stxv 32, 16(%3) \n\t" | |||
"stxv 35, 32(%3) \n\t" | |||
"stxv 34, 48(%3) \n\t" | |||
"stxv 37, 64(%3) \n\t" | |||
"stxv 36, 80(%3) \n\t" | |||
"stxv 39, 96(%3) \n\t" | |||
"stxv 38, 112(%3) \n\t" | |||
"stxv 41, 128(%3) \n\t" | |||
"stxv 40, 144(%3) \n\t" | |||
"stxv 43, 160(%3) \n\t" | |||
"stxv 42, 176(%3) \n\t" | |||
"stxv 45, 192(%3) \n\t" | |||
"stxv 44, 208(%3) \n\t" | |||
"stxv 47, 224(%3) \n\t" | |||
"stxv 46, 240(%3) \n\t" | |||
#endif | |||
"#n=%1 x=%4=%2 y=%0=%3" | |||
: | |||
"=m" (*y), | |||
"+r" (n), // 1 | |||
"+b" (x), // 2 | |||
"+b" (y) // 3 | |||
: | |||
"m" (*x) | |||
: | |||
"cr0", | |||
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39", | |||
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47" | |||
); | |||
} |
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(__VEC__) || defined(__ALTIVEC__) | |||
#include "ccopy_microk_power10.c" | |||
#include "copy_microk_power10.c" | |||
#endif | |||
#ifndef HAVE_KERNEL | |||
@@ -86,7 +86,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
if ( (inc_x == 1) && (inc_y == 1 )) | |||
{ | |||
BLASLONG n1 = n & -32; | |||
BLASLONG n1 = n & -64; | |||
if ( n1 > 0 ) | |||
{ | |||
copy_kernel(n1, x, y); | |||
@@ -61,37 +61,97 @@ static void copy_kernel (BLASLONG n, FLOAT *x, FLOAT *y) | |||
".align 5 \n" | |||
"one%=: \n\t" | |||
"stxvp 32, 0(%3) \n\t" | |||
"stxvp 34, 32(%3) \n\t" | |||
"stxvp 36, 64(%3) \n\t" | |||
"stxvp 38, 96(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 32, 0(%3) \n\t" | |||
"stxv 33, 16(%3) \n\t" | |||
"stxv 34, 32(%3) \n\t" | |||
"stxv 35, 48(%3) \n\t" | |||
"stxv 36, 64(%3) \n\t" | |||
"stxv 37, 80(%3) \n\t" | |||
"stxv 38, 96(%3) \n\t" | |||
"stxv 39, 112(%3) \n\t" | |||
#else | |||
"stxv 33, 0(%3) \n\t" | |||
"stxv 32, 16(%3) \n\t" | |||
"stxv 35, 32(%3) \n\t" | |||
"stxv 34, 48(%3) \n\t" | |||
"stxv 37, 64(%3) \n\t" | |||
"stxv 36, 80(%3) \n\t" | |||
"stxv 39, 96(%3) \n\t" | |||
"stxv 38, 112(%3) \n\t" | |||
#endif | |||
"lxvp 32, 0(%2) \n\t" | |||
"lxvp 34, 32(%2) \n\t" | |||
"lxvp 36, 64(%2) \n\t" | |||
"lxvp 38, 96(%2) \n\t" | |||
"stxvp 40, 128(%3) \n\t" | |||
"stxvp 42, 160(%3) \n\t" | |||
"stxvp 44, 192(%3) \n\t" | |||
"stxvp 46, 224(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 40, 128(%3) \n\t" | |||
"stxv 41, 144(%3) \n\t" | |||
"stxv 42, 160(%3) \n\t" | |||
"stxv 43, 176(%3) \n\t" | |||
"stxv 44, 192(%3) \n\t" | |||
"stxv 45, 208(%3) \n\t" | |||
"stxv 46, 224(%3) \n\t" | |||
"stxv 47, 240(%3) \n\t" | |||
#else | |||
"stxv 41, 128(%3) \n\t" | |||
"stxv 40, 144(%3) \n\t" | |||
"stxv 43, 160(%3) \n\t" | |||
"stxv 42, 176(%3) \n\t" | |||
"stxv 45, 192(%3) \n\t" | |||
"stxv 44, 208(%3) \n\t" | |||
"stxv 47, 224(%3) \n\t" | |||
"stxv 46, 240(%3) \n\t" | |||
#endif | |||
"lxvp 40, 128(%2) \n\t" | |||
"lxvp 42, 160(%2) \n\t" | |||
"lxvp 44, 192(%2) \n\t" | |||
"lxvp 46, 224(%2) \n\t" | |||
"stxvp 48, 256(%3) \n\t" | |||
"stxvp 50, 288(%3) \n\t" | |||
"stxvp 52, 320(%3) \n\t" | |||
"stxvp 54, 352(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 48, 256(%3) \n\t" | |||
"stxv 49, 272(%3) \n\t" | |||
"stxv 50, 288(%3) \n\t" | |||
"stxv 51, 304(%3) \n\t" | |||
"stxv 52, 320(%3) \n\t" | |||
"stxv 53, 336(%3) \n\t" | |||
"stxv 54, 352(%3) \n\t" | |||
"stxv 55, 368(%3) \n\t" | |||
#else | |||
"stxv 49, 256(%3) \n\t" | |||
"stxv 48, 272(%3) \n\t" | |||
"stxv 51, 288(%3) \n\t" | |||
"stxv 50, 304(%3) \n\t" | |||
"stxv 53, 320(%3) \n\t" | |||
"stxv 52, 336(%3) \n\t" | |||
"stxv 55, 352(%3) \n\t" | |||
"stxv 54, 368(%3) \n\t" | |||
#endif | |||
"lxvp 48, 256(%2) \n\t" | |||
"lxvp 50, 288(%2) \n\t" | |||
"lxvp 52, 320(%2) \n\t" | |||
"lxvp 54, 352(%2) \n\t" | |||
"stxvp 56, 384(%3) \n\t" | |||
"stxvp 58, 416(%3) \n\t" | |||
"stxvp 60, 448(%3) \n\t" | |||
"stxvp 62, 480(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 56, 384(%3) \n\t" | |||
"stxv 57, 400(%3) \n\t" | |||
"stxv 58, 416(%3) \n\t" | |||
"stxv 59, 432(%3) \n\t" | |||
"stxv 60, 448(%3) \n\t" | |||
"stxv 61, 464(%3) \n\t" | |||
"stxv 62, 480(%3) \n\t" | |||
"stxv 63, 496(%3) \n\t" | |||
#else | |||
"stxv 57, 384(%3) \n\t" | |||
"stxv 56, 400(%3) \n\t" | |||
"stxv 59, 416(%3) \n\t" | |||
"stxv 58, 432(%3) \n\t" | |||
"stxv 61, 448(%3) \n\t" | |||
"stxv 60, 464(%3) \n\t" | |||
"stxv 63, 480(%3) \n\t" | |||
"stxv 62, 496(%3) \n\t" | |||
#endif | |||
"lxvp 56, 384(%2) \n\t" | |||
"lxvp 58, 416(%2) \n\t" | |||
"lxvp 60, 448(%2) \n\t" | |||
@@ -111,22 +171,73 @@ static void copy_kernel (BLASLONG n, FLOAT *x, FLOAT *y) | |||
"two%=: \n\t" | |||
"stxvp 32, 0(%3) \n\t" | |||
"stxvp 34, 32(%3) \n\t" | |||
"stxvp 36, 64(%3) \n\t" | |||
"stxvp 38, 96(%3) \n\t" | |||
"stxvp 40, 128(%3) \n\t" | |||
"stxvp 42, 160(%3) \n\t" | |||
"stxvp 44, 192(%3) \n\t" | |||
"stxvp 46, 224(%3) \n\t" | |||
"stxvp 48, 256(%3) \n\t" | |||
"stxvp 50, 288(%3) \n\t" | |||
"stxvp 52, 320(%3) \n\t" | |||
"stxvp 54, 352(%3) \n\t" | |||
"stxvp 56, 384(%3) \n\t" | |||
"stxvp 58, 416(%3) \n\t" | |||
"stxvp 60, 448(%3) \n\t" | |||
"stxvp 62, 480(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 32, 0(%3) \n\t" | |||
"stxv 33, 16(%3) \n\t" | |||
"stxv 34, 32(%3) \n\t" | |||
"stxv 35, 48(%3) \n\t" | |||
"stxv 36, 64(%3) \n\t" | |||
"stxv 37, 80(%3) \n\t" | |||
"stxv 38, 96(%3) \n\t" | |||
"stxv 39, 112(%3) \n\t" | |||
"stxv 40, 128(%3) \n\t" | |||
"stxv 41, 144(%3) \n\t" | |||
"stxv 42, 160(%3) \n\t" | |||
"stxv 43, 176(%3) \n\t" | |||
"stxv 44, 192(%3) \n\t" | |||
"stxv 45, 208(%3) \n\t" | |||
"stxv 46, 224(%3) \n\t" | |||
"stxv 47, 240(%3) \n\t" | |||
"stxv 48, 256(%3) \n\t" | |||
"stxv 49, 272(%3) \n\t" | |||
"stxv 50, 288(%3) \n\t" | |||
"stxv 51, 304(%3) \n\t" | |||
"stxv 52, 320(%3) \n\t" | |||
"stxv 53, 336(%3) \n\t" | |||
"stxv 54, 352(%3) \n\t" | |||
"stxv 55, 368(%3) \n\t" | |||
"stxv 56, 384(%3) \n\t" | |||
"stxv 57, 400(%3) \n\t" | |||
"stxv 58, 416(%3) \n\t" | |||
"stxv 59, 432(%3) \n\t" | |||
"stxv 60, 448(%3) \n\t" | |||
"stxv 61, 464(%3) \n\t" | |||
"stxv 62, 480(%3) \n\t" | |||
"stxv 63, 496(%3) \n\t" | |||
#else | |||
"stxv 33, 0(%3) \n\t" | |||
"stxv 32, 16(%3) \n\t" | |||
"stxv 35, 32(%3) \n\t" | |||
"stxv 34, 48(%3) \n\t" | |||
"stxv 37, 64(%3) \n\t" | |||
"stxv 36, 80(%3) \n\t" | |||
"stxv 39, 96(%3) \n\t" | |||
"stxv 38, 112(%3) \n\t" | |||
"stxv 41, 128(%3) \n\t" | |||
"stxv 40, 144(%3) \n\t" | |||
"stxv 43, 160(%3) \n\t" | |||
"stxv 42, 176(%3) \n\t" | |||
"stxv 45, 192(%3) \n\t" | |||
"stxv 44, 208(%3) \n\t" | |||
"stxv 47, 224(%3) \n\t" | |||
"stxv 46, 240(%3) \n\t" | |||
"stxv 49, 256(%3) \n\t" | |||
"stxv 48, 272(%3) \n\t" | |||
"stxv 51, 288(%3) \n\t" | |||
"stxv 50, 304(%3) \n\t" | |||
"stxv 53, 320(%3) \n\t" | |||
"stxv 52, 336(%3) \n\t" | |||
"stxv 55, 352(%3) \n\t" | |||
"stxv 54, 368(%3) \n\t" | |||
"stxv 57, 384(%3) \n\t" | |||
"stxv 56, 400(%3) \n\t" | |||
"stxv 59, 416(%3) \n\t" | |||
"stxv 58, 432(%3) \n\t" | |||
"stxv 61, 448(%3) \n\t" | |||
"stxv 60, 464(%3) \n\t" | |||
"stxv 63, 480(%3) \n\t" | |||
"stxv 62, 496(%3) \n\t" | |||
#endif | |||
"#n=%1 x=%4=%2 y=%0=%3" | |||
: | |||
@@ -95,18 +95,38 @@ static void zscal_kernel_8 (long n, float *x, float alpha_r, float alpha_i) | |||
"xvaddsp 50, 50, 36 \n\t" | |||
"xvaddsp 51, 51, 37 \n\t" | |||
"stxvp 48, 0(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 48, 0(%2) \n\t" | |||
"stxv 49, 16(%2) \n\t" | |||
#else | |||
"stxv 49, 0(%2) \n\t" | |||
"stxv 48, 16(%2) \n\t" | |||
#endif | |||
"xvaddsp 52, 52, 38 \n\t" | |||
"xvaddsp 53, 53, 39 \n\t" | |||
"stxvp 50, 32(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 50, 32(%2) \n\t" | |||
"stxv 51, 48(%2) \n\t" | |||
#else | |||
"stxv 51, 32(%2) \n\t" | |||
"stxv 50, 48(%2) \n\t" | |||
#endif | |||
"xvaddsp 54, 54, 56 \n\t" | |||
"xvaddsp 55, 55, 57 \n\t" | |||
"stxvp 52, 64(%2) \n\t" | |||
"stxvp 54, 96(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 52, 64(%2) \n\t" | |||
"stxv 53, 80(%2) \n\t" | |||
"stxv 54, 96(%2) \n\t" | |||
"stxv 55, 112(%2) \n\t" | |||
#else | |||
"stxv 53, 64(%2) \n\t" | |||
"stxv 52, 80(%2) \n\t" | |||
"stxv 55, 96(%2) \n\t" | |||
"stxv 54, 112(%2) \n\t" | |||
#endif | |||
"addi %2, %2, 128 \n\t" | |||
@@ -148,18 +168,39 @@ static void zscal_kernel_8 (long n, float *x, float alpha_r, float alpha_i) | |||
"xvaddsp 50, 50, 36 \n\t" | |||
"xvaddsp 51, 51, 37 \n\t" | |||
"stxvp 48, 0(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 48, 0(%2) \n\t" | |||
"stxv 49, 16(%2) \n\t" | |||
#else | |||
"stxv 49, 0(%2) \n\t" | |||
"stxv 48, 16(%2) \n\t" | |||
#endif | |||
"xvaddsp 52, 52, 38 \n\t" | |||
"xvaddsp 53, 53, 39 \n\t" | |||
"stxvp 50, 32(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 50, 32(%2) \n\t" | |||
"stxv 51, 48(%2) \n\t" | |||
#else | |||
"stxv 51, 32(%2) \n\t" | |||
"stxv 50, 48(%2) \n\t" | |||
#endif | |||
"xvaddsp 54, 54, 56 \n\t" | |||
"xvaddsp 55, 55, 57 \n\t" | |||
"stxvp 52, 64(%2) \n\t" | |||
"stxvp 54, 96(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 52, 64(%2) \n\t" | |||
"stxv 53, 80(%2) \n\t" | |||
"stxv 54, 96(%2) \n\t" | |||
"stxv 55, 112(%2) \n\t" | |||
#else | |||
"stxv 53, 64(%2) \n\t" | |||
"stxv 52, 80(%2) \n\t" | |||
"stxv 55, 96(%2) \n\t" | |||
"stxv 54, 112(%2) \n\t" | |||
#endif | |||
"#n=%1 x=%0=%2 alpha=(%3,%4)\n" | |||
: | |||
@@ -60,14 +60,25 @@ static void daxpy_kernel_8 (long n, double *x, double *y, double alpha) | |||
"xvmaddadp 37, 33, %x4 \n\t" | |||
"lxvp 32, 0(%2) \n\t" | |||
"stxvp 36, 0(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 36, 0(%3) \n\t" | |||
"stxv 37, 16(%3) \n\t" | |||
#else | |||
"stxv 37, 0(%3) \n\t" | |||
"stxv 36, 16(%3) \n\t" | |||
#endif | |||
"xvmaddadp 38, 34, %x4 \n\t" | |||
"xvmaddadp 39, 35, %x4 \n\t" | |||
"lxvp 34, 32(%2) \n\t" | |||
"stxvp 38, 32(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 38, 32(%3) \n\t" | |||
"stxv 39, 48(%3) \n\t" | |||
#else | |||
"stxv 39, 32(%3) \n\t" | |||
"stxv 38, 48(%3) \n\t" | |||
#endif | |||
"lxvp 36, 128(%3) \n\t" | |||
"lxvp 38, 160(%3) \n\t" | |||
@@ -76,13 +87,25 @@ static void daxpy_kernel_8 (long n, double *x, double *y, double alpha) | |||
"xvmaddadp 45, 41, %x4 \n\t" | |||
"lxvp 40, 64(%2) \n\t" | |||
"stxvp 44, 64(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 44, 64(%3) \n\t" | |||
"stxv 45, 80(%3) \n\t" | |||
#else | |||
"stxv 45, 64(%3) \n\t" | |||
"stxv 44, 80(%3) \n\t" | |||
#endif | |||
"xvmaddadp 46, 42, %x4 \n\t" | |||
"xvmaddadp 47, 43, %x4 \n\t" | |||
"lxvp 42, 96(%2) \n\t" | |||
"stxvp 46, 96(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 46, 96(%3) \n\t" | |||
"stxv 47, 112(%3) \n\t" | |||
#else | |||
"stxv 47, 96(%3) \n\t" | |||
"stxv 46, 112(%3) \n\t" | |||
#endif | |||
"addi %2, %2, 128 \n\t" | |||
"addi %3, %3, 128 \n\t" | |||
@@ -105,10 +128,25 @@ static void daxpy_kernel_8 (long n, double *x, double *y, double alpha) | |||
"xvmaddadp 46, 42, %x4 \n\t" | |||
"xvmaddadp 47, 43, %x4 \n\t" | |||
"stxvp 36, 0(%3) \n\t" | |||
"stxvp 38, 32(%3) \n\t" | |||
"stxvp 44, 64(%3) \n\t" | |||
"stxvp 46, 96(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 36, 0(%3) \n\t" | |||
"stxv 37, 16(%3) \n\t" | |||
"stxv 38, 32(%3) \n\t" | |||
"stxv 39, 48(%3) \n\t" | |||
"stxv 44, 64(%3) \n\t" | |||
"stxv 45, 80(%3) \n\t" | |||
"stxv 46, 96(%3) \n\t" | |||
"stxv 47, 112(%3) \n\t" | |||
#else | |||
"stxv 37, 0(%3) \n\t" | |||
"stxv 36, 16(%3) \n\t" | |||
"stxv 39, 32(%3) \n\t" | |||
"stxv 38, 48(%3) \n\t" | |||
"stxv 45, 64(%3) \n\t" | |||
"stxv 44, 80(%3) \n\t" | |||
"stxv 47, 96(%3) \n\t" | |||
"stxv 46, 112(%3) \n\t" | |||
#endif | |||
"#n=%1 x=%5=%2 y=%0=%3 alpha=%6 t0=%x4\n" | |||
: | |||
@@ -68,7 +68,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS | |||
if ( n >= 16 ) | |||
{ | |||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; | |||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; | |||
for (i = 0; i < align; i++) { | |||
y[i] += da * x[i] ; | |||
} | |||
@@ -87,7 +87,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
{ | |||
if ( n >= 64 ) | |||
{ | |||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; | |||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; | |||
for (i = 0; i < align; i++) { | |||
y[i] = x[i] ; | |||
} | |||
@@ -59,10 +59,25 @@ static void dscal_kernel_8 (long n, double *x, double alpha) | |||
"lxvp 36, 192(%2) \n\t" | |||
"lxvp 38, 224(%2) \n\t" | |||
"stxvp 40, 0(%2) \n\t" | |||
"stxvp 42, 32(%2) \n\t" | |||
"stxvp 44, 64(%2) \n\t" | |||
"stxvp 46, 96(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 40, 0(%2) \n\t" | |||
"stxv 41, 16(%2) \n\t" | |||
"stxv 42, 32(%2) \n\t" | |||
"stxv 43, 48(%2) \n\t" | |||
"stxv 44, 64(%2) \n\t" | |||
"stxv 45, 80(%2) \n\t" | |||
"stxv 46, 96(%2) \n\t" | |||
"stxv 47, 112(%2) \n\t" | |||
#else | |||
"stxv 41, 0(%2) \n\t" | |||
"stxv 40, 16(%2) \n\t" | |||
"stxv 43, 32(%2) \n\t" | |||
"stxv 42, 48(%2) \n\t" | |||
"stxv 45, 64(%2) \n\t" | |||
"stxv 44, 80(%2) \n\t" | |||
"stxv 47, 96(%2) \n\t" | |||
"stxv 46, 112(%2) \n\t" | |||
#endif | |||
"addi %2, %2, 128 \n\t" | |||
@@ -81,10 +96,25 @@ static void dscal_kernel_8 (long n, double *x, double alpha) | |||
"xvmuldp 46, 38, 48 \n\t" | |||
"xvmuldp 47, 39, 48 \n\t" | |||
"stxvp 40, 0(%2) \n\t" | |||
"stxvp 42, 32(%2) \n\t" | |||
"stxvp 44, 64(%2) \n\t" | |||
"stxvp 46, 96(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 40, 0(%2) \n\t" | |||
"stxv 41, 16(%2) \n\t" | |||
"stxv 42, 32(%2) \n\t" | |||
"stxv 43, 48(%2) \n\t" | |||
"stxv 44, 64(%2) \n\t" | |||
"stxv 45, 80(%2) \n\t" | |||
"stxv 46, 96(%2) \n\t" | |||
"stxv 47, 112(%2) \n\t" | |||
#else | |||
"stxv 41, 0(%2) \n\t" | |||
"stxv 40, 16(%2) \n\t" | |||
"stxv 43, 32(%2) \n\t" | |||
"stxv 42, 48(%2) \n\t" | |||
"stxv 45, 64(%2) \n\t" | |||
"stxv 44, 80(%2) \n\t" | |||
"stxv 47, 96(%2) \n\t" | |||
"stxv 46, 112(%2) \n\t" | |||
#endif | |||
"#n=%1 alpha=%3 x=%0=%2" | |||
: | |||
@@ -112,10 +142,14 @@ static void dscal_kernel_8_zero (long n, double *x) | |||
".align 5 \n" | |||
"one%=: \n\t" | |||
"stxvp 32, 0(%2) \n\t" | |||
"stxvp 32, 32(%2) \n\t" | |||
"stxvp 32, 64(%2) \n\t" | |||
"stxvp 32, 96(%2) \n\t" | |||
"stxv 32, 0(%2) \n\t" | |||
"stxv 32, 16(%2) \n\t" | |||
"stxv 32, 32(%2) \n\t" | |||
"stxv 32, 48(%2) \n\t" | |||
"stxv 32, 64(%2) \n\t" | |||
"stxv 32, 80(%2) \n\t" | |||
"stxv 32, 96(%2) \n\t" | |||
"stxv 32, 112(%2) \n\t" | |||
"addi %2, %2, 128 \n\t" | |||
@@ -120,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, | |||
#if defined(POWER10) | |||
if ( n >= 32 ) | |||
{ | |||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; | |||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; | |||
for (i = 0; i < align; i++) { | |||
temp = y[i]; | |||
y[i] = x[i]; | |||
@@ -131,6 +131,10 @@ | |||
#define alpha f27 | |||
#if defined(PPC440) | |||
#define PREFETCHSIZE_A (3 * 4) | |||
#endif | |||
#if defined(PPCG4) | |||
#define PREFETCHSIZE_A (3 * 4) | |||
#endif | |||
@@ -96,6 +96,11 @@ | |||
#define X1 r22 | |||
#if defined(PPC440) | |||
#define PREFETCHSIZE_A 42 | |||
#define PREFETCHSIZE_C 7 | |||
#endif | |||
#if defined(PPCG4) | |||
#define PREFETCHSIZE_A 42 | |||
#define PREFETCHSIZE_C 7 | |||
@@ -67,13 +67,25 @@ static void saxpy_kernel_64(long n, float *x, float *y, float alpha) | |||
"xvmaddasp 37, 33, %x4 \n\t" | |||
"lxvp 32, 0(%2) \n\t" | |||
"stxvp 36, 0(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 36, 0(%3) \n\t" | |||
"stxv 37, 16(%3) \n\t" | |||
#else | |||
"stxv 37, 0(%3) \n\t" | |||
"stxv 36, 16(%3) \n\t" | |||
#endif | |||
"xvmaddasp 38, 34, %x4 \n\t" | |||
"xvmaddasp 39, 35, %x4 \n\t" | |||
"lxvp 34, 32(%2) \n\t" | |||
"stxvp 38, 32(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 38, 32(%3) \n\t" | |||
"stxv 39, 48(%3) \n\t" | |||
#else | |||
"stxv 39, 32(%3) \n\t" | |||
"stxv 38, 48(%3) \n\t" | |||
#endif | |||
"lxvp 36, 256(%3) \n\t" | |||
"lxvp 38, 288(%3) \n\t" | |||
@@ -82,13 +94,25 @@ static void saxpy_kernel_64(long n, float *x, float *y, float alpha) | |||
"xvmaddasp 45, 41, %x4 \n\t" | |||
"lxvp 40, 64(%2) \n\t" | |||
"stxvp 44, 64(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 44, 64(%3) \n\t" | |||
"stxv 45, 80(%3) \n\t" | |||
#else | |||
"stxv 45, 64(%3) \n\t" | |||
"stxv 44, 80(%3) \n\t" | |||
#endif | |||
"xvmaddasp 46, 42, %x4 \n\t" | |||
"xvmaddasp 47, 43, %x4 \n\t" | |||
"lxvp 42, 96(%2) \n\t" | |||
"stxvp 46, 96(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 46, 96(%3) \n\t" | |||
"stxv 47, 112(%3) \n\t" | |||
#else | |||
"stxv 47, 96(%3) \n\t" | |||
"stxv 46, 112(%3) \n\t" | |||
#endif | |||
"lxvp 44, 320(%3) \n\t" | |||
"lxvp 46, 352(%3) \n\t" | |||
@@ -97,13 +121,25 @@ static void saxpy_kernel_64(long n, float *x, float *y, float alpha) | |||
"xvmaddasp 57, 49, %x4 \n\t" | |||
"lxvp 48, 128(%2) \n\t" | |||
"stxvp 56, 128(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 56, 128(%3) \n\t" | |||
"stxv 57, 144(%3) \n\t" | |||
#else | |||
"stxv 57, 128(%3) \n\t" | |||
"stxv 56, 144(%3) \n\t" | |||
#endif | |||
"xvmaddasp 58, 50, %x4 \n\t" | |||
"xvmaddasp 59, 51, %x4 \n\t" | |||
"lxvp 50, 160(%2) \n\t" | |||
"stxvp 58, 160(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 58, 160(%3) \n\t" | |||
"stxv 59, 176(%3) \n\t" | |||
#else | |||
"stxv 59, 160(%3) \n\t" | |||
"stxv 58, 176(%3) \n\t" | |||
#endif | |||
"lxvp 56, 384(%3) \n\t" | |||
"lxvp 58, 416(%3) \n\t" | |||
@@ -112,13 +148,25 @@ static void saxpy_kernel_64(long n, float *x, float *y, float alpha) | |||
"xvmaddasp 61, 53, %x4 \n\t" | |||
"lxvp 52, 192(%2) \n\t" | |||
"stxvp 60, 192(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 60, 192(%3) \n\t" | |||
"stxv 61, 208(%3) \n\t" | |||
#else | |||
"stxv 61, 192(%3) \n\t" | |||
"stxv 60, 208(%3) \n\t" | |||
#endif | |||
"xvmaddasp 62, 54, %x4 \n\t" | |||
"xvmaddasp 63, 55, %x4 \n\t" | |||
"lxvp 54, 224(%2) \n\t" | |||
"stxvp 62, 224(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 62, 224(%3) \n\t" | |||
"stxv 63, 240(%3) \n\t" | |||
#else | |||
"stxv 63, 224(%3) \n\t" | |||
"stxv 62, 240(%3) \n\t" | |||
#endif | |||
"lxvp 60, 448(%3) \n\t" | |||
"lxvp 62, 480(%3) \n\t" | |||
@@ -150,14 +198,43 @@ static void saxpy_kernel_64(long n, float *x, float *y, float alpha) | |||
"xvmaddasp 61, 53, %x4 \n\t" | |||
"xvmaddasp 62, 54, %x4 \n\t" | |||
"xvmaddasp 63, 55, %x4 \n\t" | |||
"stxvp 36, 0(%3) \n\t" | |||
"stxvp 38, 32(%3) \n\t" | |||
"stxvp 44, 64(%3) \n\t" | |||
"stxvp 46, 96(%3) \n\t" | |||
"stxvp 56, 128(%3) \n\t" | |||
"stxvp 58, 160(%3) \n\t" | |||
"stxvp 60, 192(%3) \n\t" | |||
"stxvp 62, 224(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 36, 0(%3) \n\t" | |||
"stxv 37, 16(%3) \n\t" | |||
"stxv 38, 32(%3) \n\t" | |||
"stxv 39, 48(%3) \n\t" | |||
"stxv 44, 64(%3) \n\t" | |||
"stxv 45, 80(%3) \n\t" | |||
"stxv 46, 96(%3) \n\t" | |||
"stxv 47, 112(%3) \n\t" | |||
"stxv 56, 128(%3) \n\t" | |||
"stxv 57, 144(%3) \n\t" | |||
"stxv 58, 160(%3) \n\t" | |||
"stxv 59, 176(%3) \n\t" | |||
"stxv 60, 192(%3) \n\t" | |||
"stxv 61, 208(%3) \n\t" | |||
"stxv 62, 224(%3) \n\t" | |||
"stxv 63, 240(%3) \n\t" | |||
#else | |||
"stxv 37, 0(%3) \n\t" | |||
"stxv 36, 16(%3) \n\t" | |||
"stxv 39, 32(%3) \n\t" | |||
"stxv 38, 48(%3) \n\t" | |||
"stxv 45, 64(%3) \n\t" | |||
"stxv 44, 80(%3) \n\t" | |||
"stxv 47, 96(%3) \n\t" | |||
"stxv 46, 112(%3) \n\t" | |||
"stxv 57, 128(%3) \n\t" | |||
"stxv 56, 144(%3) \n\t" | |||
"stxv 59, 160(%3) \n\t" | |||
"stxv 58, 176(%3) \n\t" | |||
"stxv 61, 192(%3) \n\t" | |||
"stxv 60, 208(%3) \n\t" | |||
"stxv 63, 224(%3) \n\t" | |||
"stxv 62, 240(%3) \n\t" | |||
#endif | |||
"#n=%1 x=%5=%2 y=%0=%3 t0=%x4\n" | |||
: | |||
@@ -66,7 +66,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS | |||
if ( n >= 64 ) | |||
{ | |||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; | |||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; | |||
for (i = 0; i < align; i++) { | |||
y[i] += da * x[i] ; | |||
} | |||
@@ -88,7 +88,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
if ( n >= 128 ) | |||
{ | |||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; | |||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; | |||
for (i = 0; i < align; i++) { | |||
y[i] = x[i] ; | |||
} | |||
@@ -60,10 +60,25 @@ static void sscal_kernel_16 (long n, float *x, float alpha) | |||
"lxvp 36, 192(%2) \n\t" | |||
"lxvp 38, 224(%2) \n\t" | |||
"stxvp 40, 0(%2) \n\t" | |||
"stxvp 42, 32(%2) \n\t" | |||
"stxvp 44, 64(%2) \n\t" | |||
"stxvp 46, 96(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 40, 0(%2) \n\t" | |||
"stxv 41, 16(%2) \n\t" | |||
"stxv 42, 32(%2) \n\t" | |||
"stxv 43, 48(%2) \n\t" | |||
"stxv 44, 64(%2) \n\t" | |||
"stxv 45, 80(%2) \n\t" | |||
"stxv 46, 96(%2) \n\t" | |||
"stxv 47, 112(%2) \n\t" | |||
#else | |||
"stxv 41, 0(%2) \n\t" | |||
"stxv 40, 16(%2) \n\t" | |||
"stxv 43, 32(%2) \n\t" | |||
"stxv 42, 48(%2) \n\t" | |||
"stxv 45, 64(%2) \n\t" | |||
"stxv 44, 80(%2) \n\t" | |||
"stxv 47, 96(%2) \n\t" | |||
"stxv 46, 112(%2) \n\t" | |||
#endif | |||
"addi %2, %2, 128 \n\t" | |||
@@ -82,10 +97,25 @@ static void sscal_kernel_16 (long n, float *x, float alpha) | |||
"xvmulsp 46, 38, 48 \n\t" | |||
"xvmulsp 47, 39, 48 \n\t" | |||
"stxvp 40, 0(%2) \n\t" | |||
"stxvp 42, 32(%2) \n\t" | |||
"stxvp 44, 64(%2) \n\t" | |||
"stxvp 46, 96(%2) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 40, 0(%2) \n\t" | |||
"stxv 41, 16(%2) \n\t" | |||
"stxv 42, 32(%2) \n\t" | |||
"stxv 43, 48(%2) \n\t" | |||
"stxv 44, 64(%2) \n\t" | |||
"stxv 45, 80(%2) \n\t" | |||
"stxv 46, 96(%2) \n\t" | |||
"stxv 47, 112(%2) \n\t" | |||
#else | |||
"stxv 41, 0(%2) \n\t" | |||
"stxv 40, 16(%2) \n\t" | |||
"stxv 43, 32(%2) \n\t" | |||
"stxv 42, 48(%2) \n\t" | |||
"stxv 45, 64(%2) \n\t" | |||
"stxv 44, 80(%2) \n\t" | |||
"stxv 47, 96(%2) \n\t" | |||
"stxv 46, 112(%2) \n\t" | |||
#endif | |||
"#n=%1 alpha=%3 x=%0=%2" | |||
: | |||
@@ -113,10 +143,14 @@ static void sscal_kernel_16_zero (long n, float *x) | |||
".align 5 \n" | |||
"one%=: \n\t" | |||
"stxvp 32, 0(%2) \n\t" | |||
"stxvp 32, 32(%2) \n\t" | |||
"stxvp 32, 64(%2) \n\t" | |||
"stxvp 32, 96(%2) \n\t" | |||
"stxv 32, 0(%2) \n\t" | |||
"stxv 32, 16(%2) \n\t" | |||
"stxv 32, 32(%2) \n\t" | |||
"stxv 32, 48(%2) \n\t" | |||
"stxv 32, 64(%2) \n\t" | |||
"stxv 32, 80(%2) \n\t" | |||
"stxv 32, 96(%2) \n\t" | |||
"stxv 32, 112(%2) \n\t" | |||
"addi %2, %2, 128 \n\t" | |||
@@ -120,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, | |||
#if defined(POWER10) | |||
if ( n >= 64 ) | |||
{ | |||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; | |||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; | |||
for (i = 0; i < align; i++) { | |||
temp = y[i]; | |||
y[i] = x[i]; | |||
@@ -57,25 +57,79 @@ static void sswap_kernel_32 (long n, float *x, float *y) | |||
"lxvp 60, 192(%3) \n\t" | |||
"lxvp 62, 224(%3) \n\t" | |||
"stxvp 32, 0(%3) \n\t" | |||
"stxvp 34, 32(%3) \n\t" | |||
"stxvp 36, 64(%3) \n\t" | |||
"stxvp 38, 96(%3) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 32, 0(%3) \n\t" | |||
"stxv 33, 16(%3) \n\t" | |||
"stxv 34, 32(%3) \n\t" | |||
"stxv 35, 48(%3) \n\t" | |||
"stxv 36, 64(%3) \n\t" | |||
"stxv 37, 80(%3) \n\t" | |||
"stxv 38, 96(%3) \n\t" | |||
"stxv 39, 112(%3) \n\t" | |||
"stxvp 40, 128(%3) \n\t" | |||
"stxvp 42, 160(%3) \n\t" | |||
"stxvp 44, 192(%3) \n\t" | |||
"stxvp 46, 224(%3) \n\t" | |||
"stxv 40, 128(%3) \n\t" | |||
"stxv 41, 144(%3) \n\t" | |||
"stxv 42, 160(%3) \n\t" | |||
"stxv 43, 176(%3) \n\t" | |||
"stxv 44, 192(%3) \n\t" | |||
"stxv 45, 208(%3) \n\t" | |||
"stxv 46, 224(%3) \n\t" | |||
"stxv 47, 240(%3) \n\t" | |||
"stxvp 48, 0(%4) \n\t" | |||
"stxvp 50, 32(%4) \n\t" | |||
"stxvp 52, 64(%4) \n\t" | |||
"stxvp 54, 96(%4) \n\t" | |||
"stxv 48, 0(%4) \n\t" | |||
"stxv 49, 16(%4) \n\t" | |||
"stxv 50, 32(%4) \n\t" | |||
"stxv 51, 48(%4) \n\t" | |||
"stxv 52, 64(%4) \n\t" | |||
"stxv 53, 80(%4) \n\t" | |||
"stxv 54, 96(%4) \n\t" | |||
"stxv 55, 112(%4) \n\t" | |||
"stxvp 56, 128(%4) \n\t" | |||
"stxvp 58, 160(%4) \n\t" | |||
"stxvp 60, 192(%4) \n\t" | |||
"stxvp 62, 224(%4) \n\t" | |||
"stxv 56, 128(%4) \n\t" | |||
"stxv 57, 144(%4) \n\t" | |||
"stxv 58, 160(%4) \n\t" | |||
"stxv 59, 176(%4) \n\t" | |||
"stxv 60, 192(%4) \n\t" | |||
"stxv 61, 208(%4) \n\t" | |||
"stxv 62, 224(%4) \n\t" | |||
"stxv 63, 240(%4) \n\t" | |||
#else | |||
"stxv 33, 0(%3) \n\t" | |||
"stxv 32, 16(%3) \n\t" | |||
"stxv 35, 32(%3) \n\t" | |||
"stxv 34, 48(%3) \n\t" | |||
"stxv 37, 64(%3) \n\t" | |||
"stxv 36, 80(%3) \n\t" | |||
"stxv 39, 96(%3) \n\t" | |||
"stxv 38, 112(%3) \n\t" | |||
"stxv 41, 128(%3) \n\t" | |||
"stxv 40, 144(%3) \n\t" | |||
"stxv 43, 160(%3) \n\t" | |||
"stxv 42, 176(%3) \n\t" | |||
"stxv 45, 192(%3) \n\t" | |||
"stxv 44, 208(%3) \n\t" | |||
"stxv 47, 224(%3) \n\t" | |||
"stxv 46, 240(%3) \n\t" | |||
"stxv 49, 0(%4) \n\t" | |||
"stxv 48, 16(%4) \n\t" | |||
"stxv 51, 32(%4) \n\t" | |||
"stxv 50, 48(%4) \n\t" | |||
"stxv 53, 64(%4) \n\t" | |||
"stxv 52, 80(%4) \n\t" | |||
"stxv 55, 96(%4) \n\t" | |||
"stxv 54, 112(%4) \n\t" | |||
"stxv 57, 128(%4) \n\t" | |||
"stxv 56, 144(%4) \n\t" | |||
"stxv 59, 160(%4) \n\t" | |||
"stxv 58, 176(%4) \n\t" | |||
"stxv 61, 192(%4) \n\t" | |||
"stxv 60, 208(%4) \n\t" | |||
"stxv 63, 224(%4) \n\t" | |||
"stxv 62, 240(%4) \n\t" | |||
#endif | |||
"addi %4, %4, 256 \n\t" | |||
"addi %3, %3, 256 \n\t" | |||
@@ -125,10 +125,25 @@ static void zaxpy_kernel_4 (long n, double *x, double *y, | |||
"xvmaddadp 38, %x10, 33 \n\t" | |||
"xvmaddadp 39, %x11, 33 \n\t" | |||
"stxvp 48, 0(%12) \n\t" | |||
"stxvp 50, 32(%12) \n\t" | |||
"stxvp 34, 64(%12) \n\t" | |||
"stxvp 38, 96(%12) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 48, 0(%12) \n\t" | |||
"stxv 49, 16(%12) \n\t" | |||
"stxv 50, 32(%12) \n\t" | |||
"stxv 51, 48(%12) \n\t" | |||
"stxv 34, 64(%12) \n\t" | |||
"stxv 35, 80(%12) \n\t" | |||
"stxv 38, 96(%12) \n\t" | |||
"stxv 39, 112(%12) \n\t" | |||
#else | |||
"stxv 49, 0(%12) \n\t" | |||
"stxv 48, 16(%12) \n\t" | |||
"stxv 51, 32(%12) \n\t" | |||
"stxv 50, 48(%12) \n\t" | |||
"stxv 35, 64(%12) \n\t" | |||
"stxv 34, 80(%12) \n\t" | |||
"stxv 39, 96(%12) \n\t" | |||
"stxv 38, 112(%12) \n\t" | |||
#endif | |||
"addi %12, %12, 128 \n\t" | |||
@@ -172,10 +187,25 @@ static void zaxpy_kernel_4 (long n, double *x, double *y, | |||
"xvmaddadp 38, %x10, 33 \n\t" | |||
"xvmaddadp 39, %x11, 33 \n\t" | |||
"stxvp 48, 0(%12) \n\t" | |||
"stxvp 50, 32(%12) \n\t" | |||
"stxvp 34, 64(%12) \n\t" | |||
"stxvp 38, 96(%12) \n\t" | |||
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) | |||
"stxv 48, 0(%12) \n\t" | |||
"stxv 49, 16(%12) \n\t" | |||
"stxv 50, 32(%12) \n\t" | |||
"stxv 51, 48(%12) \n\t" | |||
"stxv 34, 64(%12) \n\t" | |||
"stxv 35, 80(%12) \n\t" | |||
"stxv 38, 96(%12) \n\t" | |||
"stxv 39, 112(%12) \n\t" | |||
#else | |||
"stxv 49, 0(%12) \n\t" | |||
"stxv 48, 16(%12) \n\t" | |||
"stxv 51, 32(%12) \n\t" | |||
"stxv 50, 48(%12) \n\t" | |||
"stxv 35, 64(%12) \n\t" | |||
"stxv 34, 80(%12) \n\t" | |||
"stxv 39, 96(%12) \n\t" | |||
"stxv 38, 112(%12) \n\t" | |||
#endif | |||
"#n=%1 x=%13=%2 y=%0=%3 alpha=(%15,%16) mvecp=%14=%17 ytmp=%12\n" | |||
"#t0=%x4 t1=%x5 t2=%x6 t3=%x7 t4=%x8 t5=%x9 t6=%x10 t7=%x11" | |||
@@ -1239,7 +1239,6 @@ static void init_parameter(void) { | |||
#ifdef BUILD_BFLOAT16 | |||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | |||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | |||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | |||
#endif | |||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | |||
@@ -20,7 +20,6 @@ SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_skylakex.c | |||
SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_skylakex.c | |||
SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_skylakex.c | |||
ifndef DYNAMIC_ARCH | |||
DGEMMKERNEL = dgemm_kernel_16x2_skylakex.c | |||
DTRMMKERNEL = dgemm_kernel_16x2_skylakex.c | |||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
@@ -28,11 +27,7 @@ DGEMMITCOPY = dgemm_tcopy_16_skylakex.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
else | |||
DGEMMKERNEL = dgemm_kernel_4x8_skylakex_2.c | |||
DGEMMONCOPY = dgemm_ncopy_8_skylakex.c | |||
DGEMMOTCOPY = dgemm_tcopy_8_skylakex.c | |||
endif | |||
DGEMM_SMALL_M_PERMIT = dgemm_small_kernel_permit_skylakex.c | |||
DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_skylakex.c | |||
DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_skylakex.c | |||
@@ -48,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | |||
#define MASK_STORE_512(M, N) \ | |||
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | |||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ | |||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \ | |||
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | |||
#endif | |||
@@ -266,7 +266,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
int mm = M - i; | |||
if (!mm) return 0; | |||
if (mm > 4 || K < 16) { | |||
register __mmask8 mask asm("k1") = (1UL << mm) - 1; | |||
register __mmask8 mask = (1UL << mm) - 1; | |||
for (j = 0; j < n6; j += 6) { | |||
DECLARE_RESULT_512(0, 0); | |||
DECLARE_RESULT_512(0, 1); | |||
@@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
_mm512_storeu_pd(&C[(j+N)*ldc + i + (M*8)], result##M##N) | |||
#define MASK_STORE_512(M, N) \ | |||
result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | |||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "k"(mask)); \ | |||
asm("vfmadd231pd (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*8)]), "v"(beta_512), "Yk"(mask)); \ | |||
_mm512_mask_storeu_pd(&C[(j+N)*ldc + i + (M*8)], mask, result##M##N) | |||
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_pd(result##M##N, alpha_512); \ | |||
__m512d tmp##M##N = _mm512_i64gather_pd(vindex_n, &C[(j + N*8)*ldc + i + M], 8); \ | |||
@@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
} | |||
int mm = M - i; | |||
if (mm >= 6) { | |||
register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||
register __mmask16 mask = (1UL << mm) - 1; | |||
for (j = 0; j < n8; j += 8) { | |||
DECLARE_RESULT_512(0, 0); | |||
DECLARE_RESULT_512(0, 1); | |||
@@ -48,7 +48,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | |||
#define MASK_STORE_512(M, N) \ | |||
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | |||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ | |||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \ | |||
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | |||
#endif | |||
@@ -267,7 +267,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
int mm = M - i; | |||
if (!mm) return 0; | |||
if (mm > 8 || K < 32) { | |||
register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||
register __mmask16 mask = (1UL << mm) - 1; | |||
for (j = 0; j < n6; j += 6) { | |||
DECLARE_RESULT_512(0, 0); | |||
DECLARE_RESULT_512(0, 1); | |||
@@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
_mm512_storeu_ps(&C[(j+N)*ldc + i + (M*16)], result##M##N) | |||
#define MASK_STORE_512(M, N) \ | |||
result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | |||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "k"(mask)); \ | |||
asm("vfmadd231ps (%1), %2, %0 %{%3%}": "+v"(result##M##N):"r"(&C[(j+N)*ldc + i + (M*16)]), "v"(beta_512), "Yk"(mask)); \ | |||
_mm512_mask_storeu_ps(&C[(j+N)*ldc + i + (M*16)], mask, result##M##N) | |||
#define SCATTER_STORE_512(M, N) result##M##N = _mm512_mul_ps(result##M##N, alpha_512); \ | |||
__m512 tmp##M##N = _mm512_i32gather_ps(vindex_n, &C[(j + N*16)*ldc + i + M], 4); \ | |||
@@ -303,7 +303,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp | |||
} | |||
int mm = M - i; | |||
if (mm >= 12) { | |||
register __mmask16 mask asm("k1") = (1UL << mm) - 1; | |||
register __mmask16 mask = (1UL << mm) - 1; | |||
for (j = 0; j < n8; j += 8) { | |||
DECLARE_RESULT_512(0, 0); | |||
DECLARE_RESULT_512(0, 1); | |||
@@ -452,11 +452,6 @@ | |||
MOVDDUP(4 * SIZE, A1, a1) | |||
movsd 0 * SIZE(YY), yy1 | |||
movhpd 1 * SIZE(YY), yy1 | |||
movsd 2 * SIZE(YY), yy2 | |||
movhpd 3 * SIZE(YY), yy2 | |||
movapd 8 * SIZE(XX), xtemp1 | |||
movapd 10 * SIZE(XX), xtemp2 | |||
movapd 12 * SIZE(XX), xtemp3 | |||
@@ -475,6 +470,12 @@ | |||
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | |||
ALIGN_3 | |||
.L12_prep: | |||
movsd 0 * SIZE(YY), yy1 | |||
movhpd 1 * SIZE(YY), yy1 | |||
movsd 2 * SIZE(YY), yy2 | |||
movhpd 3 * SIZE(YY), yy2 | |||
.L12: | |||
movapd xtemp1, xt1 | |||
mulpd a1, xt1 | |||
@@ -608,8 +609,6 @@ | |||
movlpd yy2, 6 * SIZE(YY) | |||
movhpd yy2, 7 * SIZE(YY) | |||
movsd 10 * SIZE(YY), yy2 | |||
movhpd 11 * SIZE(YY), yy2 | |||
movapd xtemp2, xt1 | |||
movapd 18 * SIZE(XX), xtemp2 | |||
@@ -621,8 +620,6 @@ | |||
movlpd yy1, 4 * SIZE(YY) | |||
movhpd yy1, 5 * SIZE(YY) | |||
movsd 8 * SIZE(YY), yy1 | |||
movhpd 9 * SIZE(YY), yy1 | |||
subq $-16 * SIZE, XX | |||
addq $ 8 * SIZE, YY | |||
@@ -630,7 +627,8 @@ | |||
addq $ 8 * SIZE, A2 | |||
decq I | |||
jg .L12 | |||
jg .L12_prep | |||
jmp .L15 | |||
ALIGN_3 | |||
.L14: | |||
@@ -641,7 +639,6 @@ | |||
jle .L16 | |||
MOVDDUP(6 * SIZE - (4 * SIZE), A2, a2) | |||
jmp .L15_pastcheck | |||
.L15: | |||
movq M, I | |||
@@ -650,6 +647,11 @@ | |||
testq $2, I | |||
jle .L16 | |||
movsd 0 * SIZE(YY), yy1 | |||
movhpd 1 * SIZE(YY), yy1 | |||
movsd 2 * SIZE(YY), yy2 | |||
movhpd 3 * SIZE(YY), yy2 | |||
.L15_pastcheck: | |||
movapd xtemp1, xt1 | |||
mulpd a1, xt1 | |||
@@ -705,8 +707,6 @@ | |||
movlpd yy2, 2 * SIZE(YY) | |||
movhpd yy2, 3 * SIZE(YY) | |||
movsd 6 * SIZE(YY), yy2 | |||
movhpd 7 * SIZE(YY), yy2 | |||
movapd xtemp2, xt1 | |||
movapd 10 * SIZE(XX), xtemp2 | |||
@@ -717,8 +717,6 @@ | |||
movlpd yy1, 0 * SIZE(YY) | |||
movhpd yy1, 1 * SIZE(YY) | |||
movsd 4 * SIZE(YY), yy1 | |||
movhpd 5 * SIZE(YY), yy1 | |||
addq $4 * SIZE, YY | |||
addq $4 * SIZE, A1 | |||
@@ -731,6 +729,9 @@ | |||
MOVDDUP(1 * SIZE, A1, a2) | |||
movsd 0 * SIZE(YY), yy1 | |||
movhpd 1 * SIZE(YY), yy1 | |||
movapd xtemp1, xt1 | |||
mulpd a1, xt1 | |||
mulpd atemp1, a1 | |||
@@ -2,9 +2,9 @@ add_subdirectory(SRC) | |||
if(BUILD_TESTING) | |||
add_subdirectory(TESTING) | |||
endif() | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc @ONLY) | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/blas.pc | |||
${CMAKE_CURRENT_BINARY_DIR}/${BLASLIB}.pc | |||
DESTINATION ${PKG_CONFIG_DIR} | |||
COMPONENT Development | |||
) |
@@ -97,10 +97,10 @@ if(BUILD_COMPLEX16) | |||
endif() | |||
list(REMOVE_DUPLICATES SOURCES) | |||
add_library(blas ${SOURCES}) | |||
add_library(${BLASLIB} ${SOURCES}) | |||
set_target_properties( | |||
blas PROPERTIES | |||
${BLASLIB} PROPERTIES | |||
VERSION ${LAPACK_VERSION} | |||
SOVERSION ${LAPACK_MAJOR_VERSION} | |||
) | |||
lapack_install_library(blas) | |||
lapack_install_library(${BLASLIB}) |
@@ -2,7 +2,7 @@ macro(add_blas_test name src) | |||
get_filename_component(baseNAME ${src} NAME_WE) | |||
set(TEST_INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${baseNAME}.in") | |||
add_executable(${name} ${src}) | |||
target_link_libraries(${name} blas) | |||
target_link_libraries(${name} ${BLASLIB}) | |||
if(EXISTS "${TEST_INPUT}") | |||
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}" | |||
-DTEST=$<TARGET_FILE:${name}> | |||
@@ -5,4 +5,4 @@ Name: BLAS | |||
Description: FORTRAN reference implementation of BLAS Basic Linear Algebra Subprograms | |||
Version: @LAPACK_VERSION@ | |||
URL: http://www.netlib.org/blas/ | |||
Libs: -L${libdir} -lblas | |||
Libs: -L${libdir} -l@BLASLIB@ |
@@ -1,7 +1,7 @@ | |||
message(STATUS "CBLAS enable") | |||
enable_language(C) | |||
set(LAPACK_INSTALL_EXPORT_NAME cblas-targets) | |||
set(LAPACK_INSTALL_EXPORT_NAME ${CBLASLIB}-targets) | |||
# Create a header file cblas.h for the routines called in my C programs | |||
include(FortranCInterface) | |||
@@ -42,15 +42,15 @@ if(BUILD_TESTING) | |||
endif() | |||
if(NOT BLAS_FOUND) | |||
set(ALL_TARGETS ${ALL_TARGETS} blas) | |||
set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB}) | |||
endif() | |||
# Export cblas targets from the | |||
# install tree, if any. | |||
set(_cblas_config_install_guard_target "") | |||
if(ALL_TARGETS) | |||
install(EXPORT cblas-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
install(EXPORT ${CBLASLIB}-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
# Choose one of the cblas targets to use as a guard for | |||
@@ -61,7 +61,7 @@ endif() | |||
# Export cblas targets from the build tree, if any. | |||
set(_cblas_config_build_guard_target "") | |||
if(ALL_TARGETS) | |||
export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake) | |||
export(TARGETS ${ALL_TARGETS} FILE ${CBLASLIB}-targets.cmake) | |||
# Choose one of the cblas targets to use as a guard | |||
# for cblas-config.cmake to load targets from the build tree. | |||
@@ -69,26 +69,26 @@ if(ALL_TARGETS) | |||
endif() | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-version.cmake.in | |||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY) | |||
${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-build.cmake.in | |||
${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY) | |||
${LAPACK_BINARY_DIR}/${CBLASLIB}-config.cmake @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc @ONLY) | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/cblas.pc | |||
${CMAKE_CURRENT_BINARY_DIR}/${CBLASLIB}.pc | |||
DESTINATION ${PKG_CONFIG_DIR} | |||
) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY) | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake @ONLY) | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake | |||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${CBLASLIB}-config.cmake | |||
${LAPACK_BINARY_DIR}/${CBLASLIB}-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||
) | |||
#install(EXPORT cblas-targets | |||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION} | |||
#install(EXPORT ${CBLASLIB}-targets | |||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CBLASLIB}-${LAPACK_VERSION} | |||
# COMPONENT Development | |||
# ) |
@@ -5,6 +5,6 @@ Name: CBLAS | |||
Description: C Standard Interface to BLAS Basic Linear Algebra Subprograms | |||
Version: @LAPACK_VERSION@ | |||
URL: http://www.netlib.org/blas/#_cblas | |||
Libs: -L${libdir} -lcblas | |||
Libs: -L${libdir} -l@CBLASLIB@ | |||
Cflags: -I${includedir} | |||
Requires.private: blas | |||
Requires.private: @BLASLIB@ |
@@ -4,11 +4,11 @@ find_package(LAPACK NO_MODULE) | |||
# Load lapack targets from the build tree, including lapacke targets. | |||
if(NOT TARGET lapacke) | |||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||
endif() | |||
# Report cblas header search locations from build tree. | |||
set(CBLAS_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | |||
# Report cblas libraries. | |||
set(CBLAS_LIBRARIES cblas) | |||
set(CBLAS_LIBRARIES @CBLASLIB@) |
@@ -5,19 +5,19 @@ get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | |||
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH) | |||
# Load the LAPACK package with which we were built. | |||
set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") | |||
set(LAPACK_DIR "${_CBLAS_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACKLIB@-@LAPACK_VERSION@") | |||
find_package(LAPACK NO_MODULE) | |||
# Load lapacke targets from the install tree. | |||
if(NOT TARGET cblas) | |||
include(${_CBLAS_SELF_DIR}/cblas-targets.cmake) | |||
if(NOT TARGET @CBLASLIB@) | |||
include(${_CBLAS_SELF_DIR}/@CBLASLIB@-targets.cmake) | |||
endif() | |||
# Report lapacke header search locations. | |||
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include) | |||
# Report lapacke libraries. | |||
set(CBLAS_LIBRARIES cblas) | |||
set(CBLAS_LIBRARIES @CBLASLIB@) | |||
unset(_CBLAS_PREFIX) | |||
unset(_CBLAS_SELF_DIR) |
@@ -1,8 +1,8 @@ | |||
add_executable(xexample1_CBLAS cblas_example1.c) | |||
add_executable(xexample2_CBLAS cblas_example2.c) | |||
target_link_libraries(xexample1_CBLAS cblas) | |||
target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES}) | |||
target_link_libraries(xexample1_CBLAS ${CBLASLIB}) | |||
target_link_libraries(xexample2_CBLAS ${CBLASLIB} ${BLAS_LIBRARIES}) | |||
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS) | |||
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS) |
@@ -11,7 +11,7 @@ int main ( ) | |||
double *a, *x, *y; | |||
double alpha, beta; | |||
int m, n, lda, incx, incy, i; | |||
CBLAS_INDEX m, n, lda, incx, incy, i; | |||
Layout = CblasColMajor; | |||
transa = CblasNoTrans; | |||
@@ -9,7 +9,7 @@ | |||
int main (int argc, char **argv ) | |||
{ | |||
int rout=-1,info=0,m,n,k,lda,ldb,ldc; | |||
CBLAS_INDEX rout=-1,info=0,m,n,k,lda,ldb,ldc; | |||
double A[2] = {0.0,0.0}, | |||
B[2] = {0.0,0.0}, | |||
C[2] = {0.0,0.0}, | |||
@@ -1,6 +1,7 @@ | |||
#ifndef CBLAS_H | |||
#define CBLAS_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#ifdef __cplusplus | |||
@@ -11,9 +12,9 @@ extern "C" { /* Assume C declarations for C++ */ | |||
* Enumerated and derived types | |||
*/ | |||
#ifdef WeirdNEC | |||
#define CBLAS_INDEX long | |||
#define CBLAS_INDEX int64_t | |||
#else | |||
#define CBLAS_INDEX int | |||
#define CBLAS_INDEX int32_t | |||
#endif | |||
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT; | |||
@@ -9,6 +9,8 @@ | |||
#ifndef CBLAS_F77_H | |||
#define CBLAS_F77_H | |||
#include <stdint.h> | |||
#ifdef CRAY | |||
#include <fortran.h> | |||
#define F77_CHAR _fcd | |||
@@ -17,8 +19,12 @@ | |||
#define F77_STRLEN(a) (_fcdlen) | |||
#endif | |||
#ifndef F77_INT | |||
#ifdef WeirdNEC | |||
#define F77_INT long | |||
#define F77_INT int64_t | |||
#else | |||
#define F77_INT int32_t | |||
#endif | |||
#endif | |||
#ifdef F77_CHAR | |||
@@ -113,16 +113,16 @@ if(BUILD_COMPLEX16) | |||
endif() | |||
list(REMOVE_DUPLICATES SOURCES) | |||
add_library(cblas ${SOURCES}) | |||
add_library(${CBLASLIB} ${SOURCES}) | |||
set_target_properties( | |||
cblas PROPERTIES | |||
${CBLASLIB} PROPERTIES | |||
LINKER_LANGUAGE C | |||
VERSION ${LAPACK_VERSION} | |||
SOVERSION ${LAPACK_MAJOR_VERSION} | |||
) | |||
target_include_directories(cblas PUBLIC | |||
target_include_directories(${CBLASLIB} PUBLIC | |||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> | |||
$<INSTALL_INTERFACE:include> | |||
) | |||
target_link_libraries(cblas PRIVATE ${BLAS_LIBRARIES}) | |||
lapack_install_library(cblas) | |||
target_link_libraries(${CBLASLIB} PRIVATE ${BLAS_LIBRARIES}) | |||
lapack_install_library(${CBLASLIB}) |
@@ -52,9 +52,9 @@ if(BUILD_SINGLE) | |||
add_executable(xscblat2 c_sblat2.f ${STESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
add_executable(xscblat3 c_sblat3.f ${STESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
target_link_libraries(xscblat1 cblas) | |||
target_link_libraries(xscblat2 cblas) | |||
target_link_libraries(xscblat3 cblas) | |||
target_link_libraries(xscblat1 ${CBLASLIB}) | |||
target_link_libraries(xscblat2 ${CBLASLIB}) | |||
target_link_libraries(xscblat3 ${CBLASLIB}) | |||
add_cblas_test(stest1.out "" xscblat1) | |||
add_cblas_test(stest2.out sin2 xscblat2) | |||
@@ -66,9 +66,9 @@ if(BUILD_DOUBLE) | |||
add_executable(xdcblat2 c_dblat2.f ${DTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
add_executable(xdcblat3 c_dblat3.f ${DTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
target_link_libraries(xdcblat1 cblas) | |||
target_link_libraries(xdcblat2 cblas) | |||
target_link_libraries(xdcblat3 cblas) | |||
target_link_libraries(xdcblat1 ${CBLASLIB}) | |||
target_link_libraries(xdcblat2 ${CBLASLIB}) | |||
target_link_libraries(xdcblat3 ${CBLASLIB}) | |||
add_cblas_test(dtest1.out "" xdcblat1) | |||
add_cblas_test(dtest2.out din2 xdcblat2) | |||
@@ -80,9 +80,9 @@ if(BUILD_COMPLEX) | |||
add_executable(xccblat2 c_cblat2.f ${CTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
add_executable(xccblat3 c_cblat3.f ${CTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
target_link_libraries(xccblat1 cblas ${BLAS_LIBRARIES}) | |||
target_link_libraries(xccblat2 cblas) | |||
target_link_libraries(xccblat3 cblas) | |||
target_link_libraries(xccblat1 ${CBLASLIB} ${BLAS_LIBRARIES}) | |||
target_link_libraries(xccblat2 ${CBLASLIB}) | |||
target_link_libraries(xccblat3 ${CBLASLIB}) | |||
add_cblas_test(ctest1.out "" xccblat1) | |||
add_cblas_test(ctest2.out cin2 xccblat2) | |||
@@ -94,9 +94,9 @@ if(BUILD_COMPLEX16) | |||
add_executable(xzcblat2 c_zblat2.f ${ZTESTL2O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
add_executable(xzcblat3 c_zblat3.f ${ZTESTL3O} ${LAPACK_BINARY_DIR}/include/cblas_test.h) | |||
target_link_libraries(xzcblat1 cblas) | |||
target_link_libraries(xzcblat2 cblas) | |||
target_link_libraries(xzcblat3 cblas) | |||
target_link_libraries(xzcblat1 ${CBLASLIB}) | |||
target_link_libraries(xzcblat2 ${CBLASLIB}) | |||
target_link_libraries(xzcblat3 ${CBLASLIB}) | |||
add_cblas_test(ztest1.out "" xzcblat1) | |||
add_cblas_test(ztest2.out zin2 xzcblat2) | |||
@@ -14,6 +14,19 @@ macro( CheckLAPACKCompilerFlags ) | |||
set( FPE_EXIT FALSE ) | |||
# FORTRAN ILP default | |||
if ( FORTRAN_ILP ) | |||
if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) | |||
if ( WIN32 ) | |||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} /integer-size:64") | |||
else () | |||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -integer-size 64") | |||
endif() | |||
else() | |||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-integer-8") | |||
endif() | |||
endif() | |||
# GNU Fortran | |||
if( CMAKE_Fortran_COMPILER_ID STREQUAL "GNU" ) | |||
if( "${CMAKE_Fortran_FLAGS}" MATCHES "-ffpe-trap=[izoupd]") | |||
@@ -1,7 +1,7 @@ | |||
# Load lapack targets from the build tree if necessary. | |||
set(_LAPACK_TARGET "@_lapack_config_build_guard_target@") | |||
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||
endif() | |||
unset(_LAPACK_TARGET) | |||
@@ -4,7 +4,7 @@ get_filename_component(_LAPACK_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) | |||
# Load lapack targets from the install tree if necessary. | |||
set(_LAPACK_TARGET "@_lapack_config_install_guard_target@") | |||
if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}") | |||
include("${_LAPACK_SELF_DIR}/lapack-targets.cmake") | |||
include("${_LAPACK_SELF_DIR}/@LAPACKLIB@-targets.cmake") | |||
endif() | |||
unset(_LAPACK_TARGET) | |||
@@ -44,6 +44,24 @@ endif() | |||
# By default static library | |||
option(BUILD_SHARED_LIBS "Build shared libraries" OFF) | |||
# By default build index32 library | |||
option(BUILD_INDEX64 "Build Index-64 API libraries" OFF) | |||
if(BUILD_INDEX64) | |||
set(BLASLIB "blas64") | |||
set(CBLASLIB "cblas64") | |||
set(LAPACKLIB "lapack64") | |||
set(LAPACKELIB "lapacke64") | |||
set(TMGLIB "tmglib64") | |||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWeirdNEC -DLAPACK_ILP64 -DHAVE_LAPACK_CONFIG_H") | |||
set(FORTRAN_ILP TRUE) | |||
else() | |||
set(BLASLIB "blas") | |||
set(CBLASLIB "cblas") | |||
set(LAPACKLIB "lapack") | |||
set(LAPACKELIB "lapacke") | |||
set(TMGLIB "tmglib") | |||
endif() | |||
include(GNUInstallDirs) | |||
# Updated OSX RPATH settings | |||
@@ -73,10 +91,10 @@ include(PreventInBuildInstalls) | |||
if(UNIX) | |||
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel) | |||
list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict") | |||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict") | |||
endif() | |||
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL) | |||
list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none") | |||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none") | |||
endif() | |||
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler. | |||
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin | |||
@@ -112,7 +130,7 @@ endif() | |||
# -------------------------------------------------- | |||
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets) | |||
set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKLIB}-targets) | |||
macro(lapack_install_library lib) | |||
install(TARGETS ${lib} | |||
@@ -220,7 +238,7 @@ endif() | |||
if(NOT BLAS_FOUND) | |||
message(STATUS "Using supplied NETLIB BLAS implementation") | |||
add_subdirectory(BLAS) | |||
set(BLAS_LIBRARIES blas) | |||
set(BLAS_LIBRARIES ${BLASLIB}) | |||
else() | |||
set(CMAKE_EXE_LINKER_FLAGS | |||
"${CMAKE_EXE_LINKER_FLAGS} ${BLAS_LINKER_FLAGS}" | |||
@@ -279,7 +297,7 @@ endif() | |||
# Neither user specified or optimized LAPACK libraries can be used | |||
if(NOT LATESTLAPACK_FOUND) | |||
message(STATUS "Using supplied NETLIB LAPACK implementation") | |||
set(LAPACK_LIBRARIES lapack) | |||
set(LAPACK_LIBRARIES ${LAPACKLIB}) | |||
add_subdirectory(SRC) | |||
else() | |||
set(CMAKE_EXE_LINKER_FLAGS | |||
@@ -371,23 +389,23 @@ include(CPack) | |||
# -------------------------------------------------- | |||
if(NOT BLAS_FOUND) | |||
set(ALL_TARGETS ${ALL_TARGETS} blas) | |||
set(ALL_TARGETS ${ALL_TARGETS} ${BLASLIB}) | |||
endif() | |||
if(NOT LATESTLAPACK_FOUND) | |||
set(ALL_TARGETS ${ALL_TARGETS} lapack) | |||
set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKLIB}) | |||
endif() | |||
if(BUILD_TESTING OR LAPACKE_WITH_TMG) | |||
set(ALL_TARGETS ${ALL_TARGETS} tmglib) | |||
set(ALL_TARGETS ${ALL_TARGETS} ${TMGLIB}) | |||
endif() | |||
# Export lapack targets, not including lapacke, from the | |||
# install tree, if any. | |||
set(_lapack_config_install_guard_target "") | |||
if(ALL_TARGETS) | |||
install(EXPORT lapack-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
install(EXPORT ${LAPACKLIB}-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
@@ -398,18 +416,18 @@ endif() | |||
# Include cblas in targets exported from the build tree. | |||
if(CBLAS) | |||
set(ALL_TARGETS ${ALL_TARGETS} cblas) | |||
set(ALL_TARGETS ${ALL_TARGETS} ${CBLASLIB}) | |||
endif() | |||
# Include lapacke in targets exported from the build tree. | |||
if(LAPACKE) | |||
set(ALL_TARGETS ${ALL_TARGETS} lapacke) | |||
set(ALL_TARGETS ${ALL_TARGETS} ${LAPACKELIB}) | |||
endif() | |||
# Export lapack and lapacke targets from the build tree, if any. | |||
set(_lapack_config_build_guard_target "") | |||
if(ALL_TARGETS) | |||
export(TARGETS ${ALL_TARGETS} FILE lapack-targets.cmake) | |||
export(TARGETS ${ALL_TARGETS} FILE ${LAPACKLIB}-targets.cmake) | |||
# Choose one of the lapack or lapacke targets to use as a guard | |||
# for lapack-config.cmake to load targets from the build tree. | |||
@@ -417,30 +435,30 @@ if(ALL_TARGETS) | |||
endif() | |||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-build.cmake.in | |||
${LAPACK_BINARY_DIR}/lapack-config.cmake @ONLY) | |||
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config.cmake @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapack.pc @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc @ONLY) | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc | |||
${CMAKE_CURRENT_BINARY_DIR}/${LAPACKLIB}.pc | |||
DESTINATION ${PKG_CONFIG_DIR} | |||
COMPONENT Development | |||
) | |||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in | |||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake @ONLY) | |||
${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake @ONLY) | |||
include(CMakePackageConfigHelpers) | |||
write_basic_package_version_file( | |||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake | |||
VERSION ${LAPACK_VERSION} | |||
COMPATIBILITY SameMajorVersion | |||
) | |||
install(FILES | |||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake | |||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION} | |||
${LAPACK_BINARY_DIR}/CMakeFiles/${LAPACKLIB}-config.cmake | |||
${LAPACK_BINARY_DIR}/${LAPACKLIB}-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKLIB}-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
@@ -4,6 +4,7 @@ include $(TOPSRCDIR)/make.inc | |||
.PHONY: all testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | |||
all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion | |||
ifneq ($(C_LAPACK), 1) | |||
testlsame: lsame.o lsametst.o | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
@@ -27,6 +28,31 @@ testieee: tstiee.o | |||
testversion: ilaver.o LAPACK_version.o | |||
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^ | |||
else | |||
testlsame: lsame.o lsametst.o | |||
$(CC) -O2 -o $@ $^ | |||
testslamch: slamch.o lsame.o slamchtst.o | |||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ | |||
testdlamch: dlamch.o lsame.o dlamchtst.o | |||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ | |||
testsecond: second_$(TIMER).o secondtst.o | |||
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | |||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ | |||
testdsecnd: dsecnd_$(TIMER).o dsecndtst.o | |||
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" | |||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ | |||
testieee: tstiee.o | |||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ | |||
testversion: ilaver.o LAPACK_version.o | |||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ | |||
endif | |||
.PHONY: run | |||
run: all | |||
./testlsame | |||
@@ -46,5 +72,10 @@ cleanexe: | |||
cleantest: | |||
rm -f core | |||
ifneq ($(C_LAPACK), 1) | |||
slamch.o: slamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
dlamch.o: dlamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< | |||
else | |||
slamch.o: slamch.c ; $(CC) $(CFLAGS) -c -o $@ $< | |||
dlamch.o: dlamch.c ; $(CC) $(CFLAGS) -c -o $@ $< | |||
endif |
@@ -0,0 +1,445 @@ | |||
/* f2c.h -- Standard Fortran to C header file */ | |||
/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." | |||
- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ | |||
#ifndef F2C_INCLUDE | |||
#define F2C_INCLUDE | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
typedef int integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimag(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle_() continue; | |||
#define myceiling_(w) ceil(w) | |||
#define myhuge_(w) HUGE_VAL | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* > \brief \b DSECND returns nothing */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Definition: */ | |||
/* =========== */ | |||
/* DOUBLE PRECISION FUNCTION DSECND( ) */ | |||
/* > \par Purpose: */ | |||
/* ============= */ | |||
/* > */ | |||
/* > \verbatim */ | |||
/* > */ | |||
/* > DSECND returns nothing instead of returning the user time for a process in seconds. */ | |||
/* > If you are using that routine, it means that neither EXTERNAL ETIME, */ | |||
/* > EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on */ | |||
/* > your machine. */ | |||
/* > \endverbatim */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date December 2016 */ | |||
/* > \ingroup auxOTHERauxiliary */ | |||
/* ===================================================================== */ | |||
doublereal dsecnd_(void) | |||
{ | |||
/* System generated locals */ | |||
doublereal ret_val; | |||
/* -- LAPACK auxiliary routine (version 3.7.0) -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* December 2016 */ | |||
/* ===================================================================== */ | |||
ret_val = 0.; | |||
return ret_val; | |||
/* End of DSECND */ | |||
} /* dsecnd_ */ | |||
@@ -0,0 +1,569 @@ | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
#if defined(_WIN64) | |||
typedef long long BLASLONG; | |||
typedef unsigned long long BLASULONG; | |||
#else | |||
typedef long BLASLONG; | |||
typedef unsigned long BLASULONG; | |||
#endif | |||
#ifdef LAPACK_ILP64 | |||
typedef BLASLONG blasint; | |||
#if defined(_WIN64) | |||
#define blasabs(x) llabs(x) | |||
#else | |||
#define blasabs(x) labs(x) | |||
#endif | |||
#else | |||
typedef int blasint; | |||
#define blasabs(x) abs(x) | |||
#endif | |||
typedef blasint integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
#ifdef _MSC_VER | |||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
#else | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#endif | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#ifdef _MSC_VER | |||
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} | |||
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);} | |||
#else | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#endif | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimagf(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle() continue; | |||
#define myceiling(w) {ceil(w)} | |||
#define myhuge(w) {HUGE_VAL} | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#ifdef _MSC_VER | |||
static _Fcomplex cpow_ui(complex x, integer n) { | |||
complex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
for(u = n; ; ) { | |||
if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
else break; | |||
} | |||
} | |||
_Fcomplex p={pow.r, pow.i}; | |||
return p; | |||
} | |||
#else | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
#ifdef _MSC_VER | |||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
_Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
for(u = n; ; ) { | |||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
else break; | |||
} | |||
} | |||
_Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
return p; | |||
} | |||
#else | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* > \brief \b DSECND returns nothing */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Definition: */ | |||
/* =========== */ | |||
/* DOUBLE PRECISION FUNCTION DSECND( ) */ | |||
/* > \par Purpose: */ | |||
/* ============= */ | |||
/* > */ | |||
/* > \verbatim */ | |||
/* > */ | |||
/* > DSECND returns nothing instead of returning the user time for a process in seconds. */ | |||
/* > If you are using that routine, it means that neither EXTERNAL ETIME, */ | |||
/* > EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on */ | |||
/* > your machine. */ | |||
/* > \endverbatim */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date December 2016 */ | |||
/* > \ingroup auxOTHERauxiliary */ | |||
/* ===================================================================== */ | |||
doublereal dsecnd_(void) | |||
{ | |||
/* System generated locals */ | |||
doublereal ret_val; | |||
/* -- LAPACK auxiliary routine (version 3.7.0) -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* December 2016 */ | |||
/* ===================================================================== */ | |||
ret_val = 0.; | |||
return ret_val; | |||
/* End of DSECND */ | |||
} /* dsecnd_ */ | |||
@@ -0,0 +1,582 @@ | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
#if defined(_WIN64) | |||
typedef long long BLASLONG; | |||
typedef unsigned long long BLASULONG; | |||
#else | |||
typedef long BLASLONG; | |||
typedef unsigned long BLASULONG; | |||
#endif | |||
#ifdef LAPACK_ILP64 | |||
typedef BLASLONG blasint; | |||
#if defined(_WIN64) | |||
#define blasabs(x) llabs(x) | |||
#else | |||
#define blasabs(x) labs(x) | |||
#endif | |||
#else | |||
typedef int blasint; | |||
#define blasabs(x) abs(x) | |||
#endif | |||
typedef blasint integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
#ifdef _MSC_VER | |||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
#else | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#endif | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#ifdef _MSC_VER | |||
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} | |||
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);} | |||
#else | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#endif | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimagf(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle() continue; | |||
#define myceiling(w) {ceil(w)} | |||
#define myhuge(w) {HUGE_VAL} | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#ifdef _MSC_VER | |||
static _Fcomplex cpow_ui(complex x, integer n) { | |||
complex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
for(u = n; ; ) { | |||
if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
else break; | |||
} | |||
} | |||
_Fcomplex p={pow.r, pow.i}; | |||
return p; | |||
} | |||
#else | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
#ifdef _MSC_VER | |||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
_Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
for(u = n; ; ) { | |||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
else break; | |||
} | |||
} | |||
_Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
return p; | |||
} | |||
#else | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* > \brief \b ILAVER returns the LAPACK version. */ | |||
/* * */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Definition: */ | |||
/* =========== */ | |||
/* SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) */ | |||
/* INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH */ | |||
/* > \par Purpose: */ | |||
/* ============= */ | |||
/* > */ | |||
/* > \verbatim */ | |||
/* > */ | |||
/* > This subroutine returns the LAPACK version. */ | |||
/* > \endverbatim */ | |||
/* Arguments: */ | |||
/* ========== */ | |||
/* > \param[out] VERS_MAJOR */ | |||
/* > VERS_MAJOR is INTEGER */ | |||
/* > return the lapack major version */ | |||
/* > */ | |||
/* > \param[out] VERS_MINOR */ | |||
/* > VERS_MINOR is INTEGER */ | |||
/* > return the lapack minor version from the major version */ | |||
/* > */ | |||
/* > \param[out] VERS_PATCH */ | |||
/* > VERS_PATCH is INTEGER */ | |||
/* > return the lapack patch version from the minor version */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date November 2019 */ | |||
/* > \ingroup auxOTHERauxiliary */ | |||
/* ===================================================================== */ | |||
/* Subroutine */ int ilaver_(integer *vers_major__, integer *vers_minor__, | |||
integer *vers_patch__) | |||
{ | |||
/* -- LAPACK computational routine -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* ===================================================================== */ | |||
/* ===================================================================== */ | |||
*vers_major__ = 3; | |||
*vers_minor__ = 9; | |||
*vers_patch__ = 0; | |||
/* ===================================================================== */ | |||
return 0; | |||
} /* ilaver_ */ | |||
@@ -0,0 +1,645 @@ | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
#if defined(_WIN64) | |||
typedef long long BLASLONG; | |||
typedef unsigned long long BLASULONG; | |||
#else | |||
typedef long BLASLONG; | |||
typedef unsigned long BLASULONG; | |||
#endif | |||
#ifdef LAPACK_ILP64 | |||
typedef BLASLONG blasint; | |||
#if defined(_WIN64) | |||
#define blasabs(x) llabs(x) | |||
#else | |||
#define blasabs(x) labs(x) | |||
#endif | |||
#else | |||
typedef int blasint; | |||
#define blasabs(x) abs(x) | |||
#endif | |||
typedef blasint integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
#ifdef _MSC_VER | |||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
#else | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#endif | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#ifdef _MSC_VER | |||
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} | |||
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);} | |||
#else | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#endif | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimagf(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle() continue; | |||
#define myceiling(w) {ceil(w)} | |||
#define myhuge(w) {HUGE_VAL} | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#ifdef _MSC_VER | |||
static _Fcomplex cpow_ui(complex x, integer n) { | |||
complex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
for(u = n; ; ) { | |||
if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
else break; | |||
} | |||
} | |||
_Fcomplex p={pow.r, pow.i}; | |||
return p; | |||
} | |||
#else | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
#ifdef _MSC_VER | |||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
_Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
for(u = n; ; ) { | |||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
else break; | |||
} | |||
} | |||
_Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
return p; | |||
} | |||
#else | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* > \brief \b LSAME */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Definition: */ | |||
/* =========== */ | |||
/* LOGICAL FUNCTION LSAME( CA, CB ) */ | |||
/* CHARACTER CA, CB */ | |||
/* > \par Purpose: */ | |||
/* ============= */ | |||
/* > */ | |||
/* > \verbatim */ | |||
/* > */ | |||
/* > LSAME returns .TRUE. if CA is the same letter as CB regardless of */ | |||
/* > case. */ | |||
/* > \endverbatim */ | |||
/* Arguments: */ | |||
/* ========== */ | |||
/* > \param[in] CA */ | |||
/* > \verbatim */ | |||
/* > \endverbatim */ | |||
/* > */ | |||
/* > \param[in] CB */ | |||
/* > \verbatim */ | |||
/* > CA and CB specify the single characters to be compared. */ | |||
/* > \endverbatim */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date December 2016 */ | |||
/* > \ingroup auxOTHERauxiliary */ | |||
/* ===================================================================== */ | |||
logical lsame_(char *ca, char *cb) | |||
{ | |||
/* System generated locals */ | |||
logical ret_val; | |||
/* Local variables */ | |||
integer inta, intb, zcode; | |||
/* -- LAPACK auxiliary routine (version 3.7.0) -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* December 2016 */ | |||
/* ===================================================================== */ | |||
/* Test if the characters are equal */ | |||
ret_val = *(unsigned char *)ca == *(unsigned char *)cb; | |||
if (ret_val) { | |||
return ret_val; | |||
} | |||
/* Now test for equivalence if both characters are alphabetic. */ | |||
zcode = 'Z'; | |||
/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ | |||
/* machines, on which ICHAR returns a value with bit 8 set. */ | |||
/* ICHAR('A') on Prime machines returns 193 which is the same as */ | |||
/* ICHAR('A') on an EBCDIC machine. */ | |||
inta = *(unsigned char *)ca; | |||
intb = *(unsigned char *)cb; | |||
if (zcode == 90 || zcode == 122) { | |||
/* ASCII is assumed - ZCODE is the ASCII code of either lower or */ | |||
/* upper case 'Z'. */ | |||
if (inta >= 97 && inta <= 122) { | |||
inta += -32; | |||
} | |||
if (intb >= 97 && intb <= 122) { | |||
intb += -32; | |||
} | |||
} else if (zcode == 233 || zcode == 169) { | |||
/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ | |||
/* upper case 'Z'. */ | |||
if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta | |||
>= 162 && inta <= 169) { | |||
inta += 64; | |||
} | |||
if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb | |||
>= 162 && intb <= 169) { | |||
intb += 64; | |||
} | |||
} else if (zcode == 218 || zcode == 250) { | |||
/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ | |||
/* plus 128 of either lower or upper case 'Z'. */ | |||
if (inta >= 225 && inta <= 250) { | |||
inta += -32; | |||
} | |||
if (intb >= 225 && intb <= 250) { | |||
intb += -32; | |||
} | |||
} | |||
ret_val = inta == intb; | |||
/* RETURN */ | |||
/* End of LSAME */ | |||
return ret_val; | |||
} /* lsame_ */ | |||
@@ -0,0 +1,595 @@ | |||
/* f2c.h -- Standard Fortran to C header file */ | |||
/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." | |||
- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ | |||
#ifndef F2C_INCLUDE | |||
#define F2C_INCLUDE | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
typedef int integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimag(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle_() continue; | |||
#define myceiling_(w) ceil(w) | |||
#define myhuge_(w) HUGE_VAL | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* Table of constant values */ | |||
static integer c__9 = 9; | |||
static integer c__1 = 1; | |||
static integer c__3 = 3; | |||
/* > \brief \b LSAMETST */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Definition: */ | |||
/* =========== */ | |||
/* PROGRAM LSAMETST */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date December 2016 */ | |||
/* > \ingroup auxOTHERauxiliary */ | |||
/* ===================================================================== PROGRAM LSAMETST */ | |||
/* -- LAPACK test routine (version 3.7.0) -- */ | |||
/* -- LAPACK computational routine (version 3.7.0) -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* December 2016 */ | |||
/* ===================================================================== */ | |||
/* Main program */ main(void) | |||
{ | |||
/* Format strings */ | |||
static char fmt_9999[] = "(\002 *** Error: LSAME( \002,a1,\002, \002," | |||
"a1,\002) is .FALSE.\002)"; | |||
static char fmt_9998[] = "(\002 *** Error: LSAME( \002,a1,\002, \002," | |||
"a1,\002) is .TRUE.\002)"; | |||
/* System generated locals */ | |||
integer i__1; | |||
/* Local variables */ | |||
extern logical lsame_(char *, char *); | |||
integer i1, i2; | |||
/* Fortran I/O blocks */ | |||
static cilist io___3 = { 0, 6, 0, 0, 0 }; | |||
static cilist io___4 = { 0, 6, 0, 0, 0 }; | |||
static cilist io___5 = { 0, 6, 0, fmt_9999, 0 }; | |||
static cilist io___6 = { 0, 6, 0, fmt_9999, 0 }; | |||
static cilist io___7 = { 0, 6, 0, fmt_9999, 0 }; | |||
static cilist io___8 = { 0, 6, 0, fmt_9999, 0 }; | |||
static cilist io___9 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___10 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___11 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___12 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___13 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___14 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___15 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___16 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___17 = { 0, 6, 0, 0, 0 }; | |||
/* Determine the character set. */ | |||
i1 = 'A'; | |||
i2 = 'a'; | |||
if (i2 - i1 == 32) { | |||
/* | |||
s_wsle(&io___3); | |||
do_lio(&c__9, &c__1, " ASCII character set", (ftnlen)20); | |||
e_wsle(); | |||
*/ | |||
printf(" ASCII character set"); | |||
} else { | |||
printf(" Non-ASCII character set, IOFF should be %d",i2-i1); | |||
/* | |||
s_wsle(&io___4); | |||
do_lio(&c__9, &c__1, " Non-ASCII character set, IOFF should be ", ( | |||
ftnlen)41); | |||
i__1 = i2 - i1; | |||
do_lio(&c__3, &c__1, (char *)&i__1, (ftnlen)sizeof(integer)); | |||
e_wsle(); | |||
*/ | |||
} | |||
/* Test LSAME. */ | |||
if (! lsame_("A", "A")) { | |||
printf(" *** Error: LSAME(A,A) is .FALSE.\n"); | |||
/* s_wsfe(&io___5); | |||
do_fio(&c__1, "A", (ftnlen)1); | |||
do_fio(&c__1, "A", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (! lsame_("A", "a")) { | |||
printf(" *** Error: LSAME(A,a) is .FALSE.\n"); | |||
/* | |||
s_wsfe(&io___6); | |||
do_fio(&c__1, "A", (ftnlen)1); | |||
do_fio(&c__1, "a", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (! lsame_("a", "A")) { | |||
printf(" *** Error: LSAME(a,A) is .FALSE.\n"); | |||
/* s_wsfe(&io___7); | |||
do_fio(&c__1, "a", (ftnlen)1); | |||
do_fio(&c__1, "A", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (! lsame_("a", "a")) { | |||
printf(" *** Error: LSAME(a,a) is .FALSE.\n"); | |||
/* s_wsfe(&io___8); | |||
do_fio(&c__1, "a", (ftnlen)1); | |||
do_fio(&c__1, "a", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("A", "B")) { | |||
printf(" *** Error: LSAME(A,B) is .TRUE.\n"); | |||
/* s_wsfe(&io___9); | |||
do_fio(&c__1, "A", (ftnlen)1); | |||
do_fio(&c__1, "B", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("A", "b")) { | |||
printf(" *** Error: LSAME(A,b) is .TRUE.\n"); | |||
/* s_wsfe(&io___10); | |||
do_fio(&c__1, "A", (ftnlen)1); | |||
do_fio(&c__1, "b", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("a", "B")) { | |||
printf(" *** Error: LSAME(a,B) is .TRUE.\n"); | |||
/* s_wsfe(&io___11); | |||
do_fio(&c__1, "a", (ftnlen)1); | |||
do_fio(&c__1, "B", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("a", "b")) { | |||
printf(" *** Error: LSAME(a,b) is .TRUE.\n"); | |||
/* s_wsfe(&io___12); | |||
do_fio(&c__1, "a", (ftnlen)1); | |||
do_fio(&c__1, "b", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("O", "/")) { | |||
printf(" *** Error: LSAME(O,/) is .TRUE.\n"); | |||
/* s_wsfe(&io___13); | |||
do_fio(&c__1, "O", (ftnlen)1); | |||
do_fio(&c__1, "/", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("/", "O")) { | |||
printf(" *** Error: LSAME(/,O) is .TRUE.\n"); | |||
/* s_wsfe(&io___14); | |||
do_fio(&c__1, "/", (ftnlen)1); | |||
do_fio(&c__1, "O", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("o", "/")) { | |||
printf(" *** Error: LSAME(o,/) is .TRUE.\n"); | |||
/* s_wsfe(&io___15); | |||
do_fio(&c__1, "o", (ftnlen)1); | |||
do_fio(&c__1, "/", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
if (lsame_("/", "o")) { | |||
printf(" *** Error: LSAME(/,o) is .TRUE.\n"); | |||
/* s_wsfe(&io___16); | |||
do_fio(&c__1, "/", (ftnlen)1); | |||
do_fio(&c__1, "o", (ftnlen)1); | |||
e_wsfe(); | |||
*/ | |||
} | |||
printf(" Tests completed"); | |||
/* s_wsle(&io___17); | |||
do_lio(&c__9, &c__1, " Tests completed", (ftnlen)16); | |||
e_wsle(); | |||
*/ | |||
return 0; | |||
} /* MAIN__ */ | |||
@@ -0,0 +1,445 @@ | |||
/* f2c.h -- Standard Fortran to C header file */ | |||
/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." | |||
- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ | |||
#ifndef F2C_INCLUDE | |||
#define F2C_INCLUDE | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
typedef int integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimag(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle_() continue; | |||
#define myceiling_(w) ceil(w) | |||
#define myhuge_(w) HUGE_VAL | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* > \brief \b SECOND returns nothing */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Definition: */ | |||
/* =========== */ | |||
/* REAL FUNCTION SECOND( ) */ | |||
/* > \par Purpose: */ | |||
/* ============= */ | |||
/* > */ | |||
/* > \verbatim */ | |||
/* > */ | |||
/* > SECOND returns nothing instead of returning the user time for a process in seconds. */ | |||
/* > If you are using that routine, it means that neither EXTERNAL ETIME, */ | |||
/* > EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on */ | |||
/* > your machine. */ | |||
/* > \endverbatim */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date December 2016 */ | |||
/* > \ingroup auxOTHERauxiliary */ | |||
/* ===================================================================== */ | |||
real second_(void) | |||
{ | |||
/* System generated locals */ | |||
real ret_val; | |||
/* -- LAPACK auxiliary routine (version 3.7.0) -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* December 2016 */ | |||
/* ===================================================================== */ | |||
ret_val = 0.f; | |||
return ret_val; | |||
/* End of SECOND */ | |||
} /* second_ */ | |||
@@ -0,0 +1,569 @@ | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
#if defined(_WIN64) | |||
typedef long long BLASLONG; | |||
typedef unsigned long long BLASULONG; | |||
#else | |||
typedef long BLASLONG; | |||
typedef unsigned long BLASULONG; | |||
#endif | |||
#ifdef LAPACK_ILP64 | |||
typedef BLASLONG blasint; | |||
#if defined(_WIN64) | |||
#define blasabs(x) llabs(x) | |||
#else | |||
#define blasabs(x) labs(x) | |||
#endif | |||
#else | |||
typedef int blasint; | |||
#define blasabs(x) abs(x) | |||
#endif | |||
typedef blasint integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
#ifdef _MSC_VER | |||
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} | |||
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} | |||
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} | |||
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} | |||
#else | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#endif | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#ifdef _MSC_VER | |||
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} | |||
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);} | |||
#else | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#endif | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimagf(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle() continue; | |||
#define myceiling(w) {ceil(w)} | |||
#define myhuge(w) {HUGE_VAL} | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#ifdef _MSC_VER | |||
static _Fcomplex cpow_ui(complex x, integer n) { | |||
complex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; | |||
for(u = n; ; ) { | |||
if(u & 01) pow.r *= x.r, pow.i *= x.i; | |||
if(u >>= 1) x.r *= x.r, x.i *= x.i; | |||
else break; | |||
} | |||
} | |||
_Fcomplex p={pow.r, pow.i}; | |||
return p; | |||
} | |||
#else | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
#ifdef _MSC_VER | |||
static _Dcomplex zpow_ui(_Dcomplex x, integer n) { | |||
_Dcomplex pow={1.0,0.0}; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; | |||
for(u = n; ; ) { | |||
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; | |||
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; | |||
else break; | |||
} | |||
} | |||
_Dcomplex p = {pow._Val[0], pow._Val[1]}; | |||
return p; | |||
} | |||
#else | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
#endif | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Fcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#else | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
#endif | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
#ifdef _MSC_VER | |||
_Dcomplex zdotc = {0.0, 0.0}; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]; | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]; | |||
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]; | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#else | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* > \brief \b SECOND returns nothing */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Definition: */ | |||
/* =========== */ | |||
/* REAL FUNCTION SECOND( ) */ | |||
/* > \par Purpose: */ | |||
/* ============= */ | |||
/* > */ | |||
/* > \verbatim */ | |||
/* > */ | |||
/* > SECOND returns nothing instead of returning the user time for a process in seconds. */ | |||
/* > If you are using that routine, it means that neither EXTERNAL ETIME, */ | |||
/* > EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on */ | |||
/* > your machine. */ | |||
/* > \endverbatim */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date December 2016 */ | |||
/* > \ingroup auxOTHERauxiliary */ | |||
/* ===================================================================== */ | |||
real second_(void) | |||
{ | |||
/* System generated locals */ | |||
real ret_val; | |||
/* -- LAPACK auxiliary routine (version 3.7.0) -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* December 2016 */ | |||
/* ===================================================================== */ | |||
ret_val = 0.f; | |||
return ret_val; | |||
/* End of SECOND */ | |||
} /* second_ */ | |||
@@ -0,0 +1,566 @@ | |||
/* f2c.h -- Standard Fortran to C header file */ | |||
/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." | |||
- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ | |||
#ifndef F2C_INCLUDE | |||
#define F2C_INCLUDE | |||
#include <math.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <stdio.h> | |||
#include <complex.h> | |||
#ifdef complex | |||
#undef complex | |||
#endif | |||
#ifdef I | |||
#undef I | |||
#endif | |||
typedef int integer; | |||
typedef unsigned int uinteger; | |||
typedef char *address; | |||
typedef short int shortint; | |||
typedef float real; | |||
typedef double doublereal; | |||
typedef struct { real r, i; } complex; | |||
typedef struct { doublereal r, i; } doublecomplex; | |||
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} | |||
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} | |||
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} | |||
#define pCf(z) (*_pCf(z)) | |||
#define pCd(z) (*_pCd(z)) | |||
typedef int logical; | |||
typedef short int shortlogical; | |||
typedef char logical1; | |||
typedef char integer1; | |||
#define TRUE_ (1) | |||
#define FALSE_ (0) | |||
/* Extern is for use with -E */ | |||
#ifndef Extern | |||
#define Extern extern | |||
#endif | |||
/* I/O stuff */ | |||
typedef int flag; | |||
typedef int ftnlen; | |||
typedef int ftnint; | |||
/*external read, write*/ | |||
typedef struct | |||
{ flag cierr; | |||
ftnint ciunit; | |||
flag ciend; | |||
char *cifmt; | |||
ftnint cirec; | |||
} cilist; | |||
/*internal read, write*/ | |||
typedef struct | |||
{ flag icierr; | |||
char *iciunit; | |||
flag iciend; | |||
char *icifmt; | |||
ftnint icirlen; | |||
ftnint icirnum; | |||
} icilist; | |||
/*open*/ | |||
typedef struct | |||
{ flag oerr; | |||
ftnint ounit; | |||
char *ofnm; | |||
ftnlen ofnmlen; | |||
char *osta; | |||
char *oacc; | |||
char *ofm; | |||
ftnint orl; | |||
char *oblnk; | |||
} olist; | |||
/*close*/ | |||
typedef struct | |||
{ flag cerr; | |||
ftnint cunit; | |||
char *csta; | |||
} cllist; | |||
/*rewind, backspace, endfile*/ | |||
typedef struct | |||
{ flag aerr; | |||
ftnint aunit; | |||
} alist; | |||
/* inquire */ | |||
typedef struct | |||
{ flag inerr; | |||
ftnint inunit; | |||
char *infile; | |||
ftnlen infilen; | |||
ftnint *inex; /*parameters in standard's order*/ | |||
ftnint *inopen; | |||
ftnint *innum; | |||
ftnint *innamed; | |||
char *inname; | |||
ftnlen innamlen; | |||
char *inacc; | |||
ftnlen inacclen; | |||
char *inseq; | |||
ftnlen inseqlen; | |||
char *indir; | |||
ftnlen indirlen; | |||
char *infmt; | |||
ftnlen infmtlen; | |||
char *inform; | |||
ftnint informlen; | |||
char *inunf; | |||
ftnlen inunflen; | |||
ftnint *inrecl; | |||
ftnint *innrec; | |||
char *inblank; | |||
ftnlen inblanklen; | |||
} inlist; | |||
#define VOID void | |||
union Multitype { /* for multiple entry points */ | |||
integer1 g; | |||
shortint h; | |||
integer i; | |||
/* longint j; */ | |||
real r; | |||
doublereal d; | |||
complex c; | |||
doublecomplex z; | |||
}; | |||
typedef union Multitype Multitype; | |||
struct Vardesc { /* for Namelist */ | |||
char *name; | |||
char *addr; | |||
ftnlen *dims; | |||
int type; | |||
}; | |||
typedef struct Vardesc Vardesc; | |||
struct Namelist { | |||
char *name; | |||
Vardesc **vars; | |||
int nvars; | |||
}; | |||
typedef struct Namelist Namelist; | |||
#define abs(x) ((x) >= 0 ? (x) : -(x)) | |||
#define dabs(x) (fabs(x)) | |||
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) | |||
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) | |||
#define dmin(a,b) (f2cmin(a,b)) | |||
#define dmax(a,b) (f2cmax(a,b)) | |||
#define bit_test(a,b) ((a) >> (b) & 1) | |||
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) | |||
#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) | |||
#define abort_() { sig_die("Fortran abort routine called", 1); } | |||
#define c_abs(z) (cabsf(Cf(z))) | |||
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } | |||
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} | |||
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} | |||
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} | |||
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} | |||
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} | |||
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} | |||
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} | |||
#define d_abs(x) (fabs(*(x))) | |||
#define d_acos(x) (acos(*(x))) | |||
#define d_asin(x) (asin(*(x))) | |||
#define d_atan(x) (atan(*(x))) | |||
#define d_atn2(x, y) (atan2(*(x),*(y))) | |||
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } | |||
#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } | |||
#define d_cos(x) (cos(*(x))) | |||
#define d_cosh(x) (cosh(*(x))) | |||
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) | |||
#define d_exp(x) (exp(*(x))) | |||
#define d_imag(z) (cimag(Cd(z))) | |||
#define r_imag(z) (cimag(Cf(z))) | |||
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) | |||
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) | |||
#define d_log(x) (log(*(x))) | |||
#define d_mod(x, y) (fmod(*(x), *(y))) | |||
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) | |||
#define d_nint(x) u_nint(*(x)) | |||
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) | |||
#define d_sign(a,b) u_sign(*(a),*(b)) | |||
#define r_sign(a,b) u_sign(*(a),*(b)) | |||
#define d_sin(x) (sin(*(x))) | |||
#define d_sinh(x) (sinh(*(x))) | |||
#define d_sqrt(x) (sqrt(*(x))) | |||
#define d_tan(x) (tan(*(x))) | |||
#define d_tanh(x) (tanh(*(x))) | |||
#define i_abs(x) abs(*(x)) | |||
#define i_dnnt(x) ((integer)u_nint(*(x))) | |||
#define i_len(s, n) (n) | |||
#define i_nint(x) ((integer)u_nint(*(x))) | |||
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) | |||
#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) | |||
#define pow_si(B,E) spow_ui(*(B),*(E)) | |||
#define pow_ri(B,E) spow_ui(*(B),*(E)) | |||
#define pow_di(B,E) dpow_ui(*(B),*(E)) | |||
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} | |||
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} | |||
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} | |||
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } | |||
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) | |||
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } | |||
#define sig_die(s, kill) { exit(1); } | |||
#define s_stop(s, n) {exit(0);} | |||
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; | |||
#define z_abs(z) (cabs(Cd(z))) | |||
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} | |||
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} | |||
#define myexit_() break; | |||
#define mycycle_() continue; | |||
#define myceiling_(w) ceil(w) | |||
#define myhuge_(w) HUGE_VAL | |||
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} | |||
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) | |||
/* procedure parameter types for -A and -C++ */ | |||
#define F2C_proc_par_types 1 | |||
#ifdef __cplusplus | |||
typedef logical (*L_fp)(...); | |||
#else | |||
typedef logical (*L_fp)(); | |||
#endif | |||
static float spow_ui(float x, integer n) { | |||
float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static double dpow_ui(double x, integer n) { | |||
double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex float cpow_ui(_Complex float x, integer n) { | |||
_Complex float pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static _Complex double zpow_ui(_Complex double x, integer n) { | |||
_Complex double pow=1.0; unsigned long int u; | |||
if(n != 0) { | |||
if(n < 0) n = -n, x = 1/x; | |||
for(u = n; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer pow_ii(integer x, integer n) { | |||
integer pow; unsigned long int u; | |||
if (n <= 0) { | |||
if (n == 0 || x == 1) pow = 1; | |||
else if (x != -1) pow = x == 0 ? 1/x : 0; | |||
else n = -n; | |||
} | |||
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { | |||
u = n; | |||
for(pow = 1; ; ) { | |||
if(u & 01) pow *= x; | |||
if(u >>= 1) x *= x; | |||
else break; | |||
} | |||
} | |||
return pow; | |||
} | |||
static integer dmaxloc_(double *w, integer s, integer e, integer *n) | |||
{ | |||
double m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static integer smaxloc_(float *w, integer s, integer e, integer *n) | |||
{ | |||
float m; integer i, mi; | |||
for(m=w[s-1], mi=s, i=s+1; i<=e; i++) | |||
if (w[i-1]>m) mi=i ,m=w[i-1]; | |||
return mi-s+1; | |||
} | |||
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i])) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex float zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i]) * Cf(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]); | |||
} | |||
} | |||
pCf(z) = zdotc; | |||
} | |||
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) { | |||
integer n = *n_, incx = *incx_, incy = *incy_, i; | |||
_Complex double zdotc = 0.0; | |||
if (incx == 1 && incy == 1) { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i]) * Cd(&y[i]); | |||
} | |||
} else { | |||
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */ | |||
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]); | |||
} | |||
} | |||
pCd(z) = zdotc; | |||
} | |||
#endif | |||
/* -- translated by f2c (version 20000121). | |||
You must link the resulting object file with the libraries: | |||
-lf2c -lm (in that order) | |||
*/ | |||
/* Table of constant values */ | |||
static integer c__1 = 1; | |||
static integer c__1000 = 1000; | |||
/* > \brief \b SECONDTST */ | |||
/* =========== DOCUMENTATION =========== */ | |||
/* Online html documentation available at */ | |||
/* http://www.netlib.org/lapack/explore-html/ */ | |||
/* Authors: */ | |||
/* ======== */ | |||
/* > \author Univ. of Tennessee */ | |||
/* > \author Univ. of California Berkeley */ | |||
/* > \author Univ. of Colorado Denver */ | |||
/* > \author NAG Ltd. */ | |||
/* > \date November 2017 */ | |||
/* > \ingroup auxOTHERcomputational */ | |||
/* ===================================================================== PROGRAM SECONDTST */ | |||
/* -- LAPACK test routine (version 3.8.0) -- */ | |||
/* -- LAPACK computational routine (version 3.8.0) -- */ | |||
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ | |||
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ | |||
/* November 2017 */ | |||
/* ===================================================================== */ | |||
/* Main program */ main(void) | |||
{ | |||
/* Format strings */ | |||
static char fmt_9999[] = "(\002 Time for \002,g10.3,\002 SAXPY ops = " | |||
"\002,g10.3,\002 seconds\002)"; | |||
static char fmt_9998[] = "(\002 SAXPY performance rate = \002,g10" | |||
".3,\002 mflops \002)"; | |||
static char fmt_9994[] = "(\002 *** Warning: Time for operations was le" | |||
"ss or equal\002,\002 than zero => timing in TESTING might be dub" | |||
"ious\002)"; | |||
static char fmt_9997[] = "(\002 Including SECOND, time = \002,g10" | |||
".3,\002 seconds\002)"; | |||
static char fmt_9996[] = "(\002 Average time for SECOND = \002,g10" | |||
".3,\002 milliseconds\002)"; | |||
static char fmt_9995[] = "(\002 Equivalent floating point ops = \002,g10" | |||
".3,\002 ops\002)"; | |||
/* System generated locals */ | |||
real r__1; | |||
/* Local variables */ | |||
integer i__, j; | |||
real alpha, x[1000], y[1000], total; | |||
extern /* Subroutine */ int mysub_(integer *, real *, real *); | |||
real t1, t2; | |||
extern real second_(void); | |||
real tnosec, avg; | |||
/* Fortran I/O blocks */ | |||
static cilist io___10 = { 0, 6, 0, fmt_9999, 0 }; | |||
static cilist io___11 = { 0, 6, 0, fmt_9998, 0 }; | |||
static cilist io___12 = { 0, 6, 0, fmt_9994, 0 }; | |||
static cilist io___13 = { 0, 6, 0, fmt_9997, 0 }; | |||
static cilist io___15 = { 0, 6, 0, fmt_9996, 0 }; | |||
static cilist io___16 = { 0, 6, 0, fmt_9995, 0 }; | |||
total = 1e8f; | |||
/* Initialize X and Y */ | |||
for (i__ = 1; i__ <= 1000; ++i__) { | |||
x[i__ - 1] = 1.f / (real) i__; | |||
y[i__ - 1] = (real) (1000 - i__) / 1e3f; | |||
/* L10: */ | |||
} | |||
alpha = .315f; | |||
/* Time TOTAL SAXPY operations */ | |||
t1 = second_(); | |||
for (j = 1; j <= 50000; ++j) { | |||
for (i__ = 1; i__ <= 1000; ++i__) { | |||
y[i__ - 1] += alpha * x[i__ - 1]; | |||
/* L20: */ | |||
} | |||
alpha = -alpha; | |||
/* L30: */ | |||
} | |||
t2 = second_(); | |||
tnosec = t2 - t1; | |||
/* | |||
s_wsfe(&io___10); | |||
do_fio(&c__1, (char *)&total, (ftnlen)sizeof(real)); | |||
do_fio(&c__1, (char *)&tnosec, (ftnlen)sizeof(real)); | |||
e_wsfe(); | |||
if (tnosec > 0.f) { | |||
s_wsfe(&io___11); | |||
r__1 = total / 1e6f / tnosec; | |||
do_fio(&c__1, (char *)&r__1, (ftnlen)sizeof(real)); | |||
e_wsfe(); | |||
} else { | |||
s_wsfe(&io___12); | |||
e_wsfe(); | |||
} | |||
*/ | |||
printf("Time for %f10.3 SAXPY ops = %f10.3 seconds\n",total,tnosec); | |||
if (tnosec > 0.f) { | |||
printf("SAXPY performance rate = %f10.3 mflops\n",total/1.e6/tnosec ); | |||
} else { | |||
printf("*** Warning: Time for operations was less or equal than zero => timing in TESTING might be dubious\n" ); | |||
} | |||
/* Time TOTAL SAXPY operations with SECOND in the outer loop */ | |||
t1 = second_(); | |||
for (j = 1; j <= 50000; ++j) { | |||
for (i__ = 1; i__ <= 1000; ++i__) { | |||
y[i__ - 1] += alpha * x[i__ - 1]; | |||
/* L40: */ | |||
} | |||
alpha = -alpha; | |||
t2 = second_(); | |||
/* L50: */ | |||
} | |||
/* Compute the time used in milliseconds used by an average call */ | |||
/* to SECOND. */ | |||
/* | |||
s_wsfe(&io___13); | |||
r__1 = t2 - t1; | |||
do_fio(&c__1, (char *)&r__1, (ftnlen)sizeof(real)); | |||
e_wsfe(); | |||
*/ | |||
printf("Including SECOND, time = %f10.3 seconds\n",t2-t1); | |||
avg = (t2 - t1 - tnosec) * 1e3f / 5e4f; | |||
if (avg > 0.f) { | |||
printf("Average time for SECOND = %f10.3 milliseconds\n",avg ); | |||
/* | |||
s_wsfe(&io___15); | |||
do_fio(&c__1, (char *)&avg, (ftnlen)sizeof(real)); | |||
e_wsfe(); | |||
*/ | |||
} | |||
/* Compute the equivalent number of floating point operations used */ | |||
/* by an average call to SECOND. */ | |||
if (avg > 0.f && tnosec > 0.f) { | |||
printf("Equivalent floating point ops = %f10.3 ops\n", avg/1000*total/tnosec); | |||
/* s_wsfe(&io___16); | |||
r__1 = avg / 1000 * total / tnosec; | |||
do_fio(&c__1, (char *)&r__1, (ftnlen)sizeof(real)); | |||
e_wsfe(); | |||
*/ | |||
} | |||
mysub_(&c__1000, x, y); | |||
return 0; | |||
} /* MAIN__ */ | |||
/* Subroutine */ int mysub_(integer *n, real *x, real *y) | |||
{ | |||
/* Parameter adjustments */ | |||
--y; | |||
--x; | |||
/* Function Body */ | |||
return 0; | |||
} /* mysub_ */ | |||
@@ -1,7 +1,7 @@ | |||
message(STATUS "LAPACKE enable") | |||
enable_language(C) | |||
set(LAPACK_INSTALL_EXPORT_NAME lapacke-targets) | |||
set(LAPACK_INSTALL_EXPORT_NAME ${LAPACKELIB}-targets) | |||
# Create a header file lapacke_mangling.h for the routines called in my C programs | |||
include(FortranCInterface) | |||
@@ -72,28 +72,28 @@ if(LAPACKE_WITH_TMG) | |||
endif() | |||
list(APPEND SOURCES ${UTILS}) | |||
add_library(lapacke ${SOURCES}) | |||
add_library(${LAPACKELIB} ${SOURCES}) | |||
set_target_properties( | |||
lapacke PROPERTIES | |||
${LAPACKELIB} PROPERTIES | |||
LINKER_LANGUAGE C | |||
VERSION ${LAPACK_VERSION} | |||
SOVERSION ${LAPACK_MAJOR_VERSION} | |||
) | |||
target_include_directories(lapacke PUBLIC | |||
target_include_directories(${LAPACKELIB} PUBLIC | |||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | |||
$<INSTALL_INTERFACE:include> | |||
) | |||
if(WIN32 AND NOT UNIX) | |||
target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||
target_compile_definitions(${LAPACKELIB} PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE) | |||
message(STATUS "Windows BUILD") | |||
endif() | |||
if(LAPACKE_WITH_TMG) | |||
target_link_libraries(lapacke PRIVATE tmglib) | |||
target_link_libraries(${LAPACKELIB} PRIVATE ${TMGLIB}) | |||
endif() | |||
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES}) | |||
target_link_libraries(${LAPACKELIB} PRIVATE ${LAPACK_LIBRARIES}) | |||
lapack_install_library(lapacke) | |||
lapack_install_library(${LAPACKELIB}) | |||
install( | |||
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h | |||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||
@@ -105,28 +105,28 @@ if(BUILD_TESTING) | |||
endif() | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc @ONLY) | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc | |||
${CMAKE_CURRENT_BINARY_DIR}/${LAPACKELIB}.pc | |||
DESTINATION ${PKG_CONFIG_DIR} | |||
COMPONENT Development | |||
) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in | |||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake @ONLY) | |||
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-build.cmake.in | |||
${LAPACK_BINARY_DIR}/lapacke-config.cmake @ONLY) | |||
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config.cmake @ONLY) | |||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-install.cmake.in | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake @ONLY) | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake @ONLY) | |||
install(FILES | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake | |||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${LAPACKELIB}-config.cmake | |||
${LAPACK_BINARY_DIR}/${LAPACKELIB}-config-version.cmake | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) | |||
install(EXPORT lapacke-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION} | |||
install(EXPORT ${LAPACKELIB}-targets | |||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${LAPACKELIB}-${LAPACK_VERSION} | |||
COMPONENT Development | |||
) |
@@ -3,8 +3,8 @@ set(LAPACK_DIR "@LAPACK_BINARY_DIR@") | |||
find_package(LAPACK NO_MODULE) | |||
# Load lapack targets from the build tree, including lapacke targets. | |||
if(NOT TARGET lapacke) | |||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake") | |||
if(NOT TARGET @LAPACKELIB@) | |||
include("@LAPACK_BINARY_DIR@/@LAPACKLIB@-targets.cmake") | |||
endif() | |||
# Hint for project building against lapack | |||
@@ -14,4 +14,4 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include") | |||
# Report lapacke libraries. | |||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||
set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES}) |
@@ -5,12 +5,12 @@ get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) | |||
get_filename_component(_LAPACKE_PREFIX "${_LAPACKE_PREFIX}" PATH) | |||
# Load the LAPACK package with which we were built. | |||
set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/lapack-@LAPACK_VERSION@") | |||
set(LAPACK_DIR "${_LAPACKE_PREFIX}/@CMAKE_INSTALL_LIBDIR@/cmake/@LAPACK@-@LAPACK_VERSION@") | |||
find_package(LAPACK NO_MODULE) | |||
# Load lapacke targets from the install tree. | |||
if(NOT TARGET lapacke) | |||
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake) | |||
if(NOT TARGET @LAPACKELIB@) | |||
include(${_LAPACKE_SELF_DIR}/@LAPACKELIB@-targets.cmake) | |||
endif() | |||
# Hint for project building against lapack | |||
@@ -20,7 +20,7 @@ set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID}) | |||
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include) | |||
# Report lapacke libraries. | |||
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES}) | |||
set(LAPACKE_LIBRARIES @LAPACKELIB@ ${LAPACK_LIBRARIES}) | |||
unset(_LAPACKE_PREFIX) | |||
unset(_LAPACKE_SELF_DIR) |
@@ -3,10 +3,10 @@ add_executable(xexample_DGESV_colmajor example_DGESV_colmajor.c lapacke_example_ | |||
add_executable(xexample_DGELS_rowmajor example_DGELS_rowmajor.c lapacke_example_aux.c lapacke_example_aux.h) | |||
add_executable(xexample_DGELS_colmajor example_DGELS_colmajor.c lapacke_example_aux.c lapacke_example_aux.h) | |||
target_link_libraries(xexample_DGESV_rowmajor lapacke) | |||
target_link_libraries(xexample_DGESV_colmajor lapacke) | |||
target_link_libraries(xexample_DGELS_rowmajor lapacke) | |||
target_link_libraries(xexample_DGELS_colmajor lapacke) | |||
target_link_libraries(xexample_DGESV_rowmajor ${LAPACKELIB}) | |||
target_link_libraries(xexample_DGESV_colmajor ${LAPACKELIB}) | |||
target_link_libraries(xexample_DGELS_rowmajor ${LAPACKELIB}) | |||
target_link_libraries(xexample_DGELS_colmajor ${LAPACKELIB}) | |||
add_test(example_DGESV_rowmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_rowmajor) | |||
add_test(example_DGESV_colmajor ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample_DGESV_colmajor) | |||
@@ -49,12 +49,13 @@ extern "C" { | |||
#endif /* __cplusplus */ | |||
#include <stdlib.h> | |||
#include <stdint.h> | |||
#ifndef lapack_int | |||
#if defined(LAPACK_ILP64) | |||
#define lapack_int long | |||
#define lapack_int int64_t | |||
#else | |||
#define lapack_int int | |||
#define lapack_int int32_t | |||
#endif | |||
#endif | |||
@@ -5,6 +5,6 @@ Name: LAPACKE | |||
Description: C Standard Interface to LAPACK Linear Algebra PACKage | |||
Version: @LAPACK_VERSION@ | |||
URL: http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack | |||
Libs: -L${libdir} -llapacke | |||
Libs: -L${libdir} -l@LAPACKELIB@ | |||
Cflags: -I${includedir} | |||
Requires.private: lapack | |||
Requires.private: @LAPACKLIB@ |